diff --git a/.clang-tidy b/.clang-tidy index 49a9f8458..eccdfe06e 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -33,17 +33,16 @@ Checks: "*, -readability-static-accessed-through-instance, -misc-unused-parameters, -hicpp-multiway-paths-covered, + -cert-err58-cpp, google-readability-avoid-underscore-in-googletest-name, google-upgrade-googletest-case, -bugprone-implicit-widening-of-multiplication-result, -bugprone-narrowing-conversions, - -cert-dcl59-cpp, -cert-env33-c, -cert-err33-c, -cert-err34-c, - -cert-err58-cpp, -cert-msc32-c, -cert-msc51-cpp, -clang-analyzer-core.CallAndMessage, @@ -80,7 +79,6 @@ Checks: "*, -cppcoreguidelines-pro-type-vararg, -cppcoreguidelines-special-member-functions, -cppcoreguidelines-virtual-class-destructor, - -google-build-namespaces, -google-explicit-constructor, -google-global-names-in-headers, -google-readability-casting, @@ -95,7 +93,6 @@ Checks: "*, -hicpp-no-malloc, -hicpp-signed-bitwise, -hicpp-special-member-functions, - -hicpp-use-auto, -hicpp-use-equals-default, -hicpp-use-noexcept, -hicpp-use-nullptr, @@ -110,11 +107,8 @@ Checks: "*, -misc-non-private-member-variables-in-classes, -modernize-avoid-c-arrays, -modernize-deprecated-headers, - -modernize-loop-convert, -modernize-macro-to-enum, -modernize-redundant-void-arg, - -modernize-use-auto, - -modernize-use-default-member-init, -modernize-use-equals-default, -modernize-use-nodiscard, -modernize-use-noexcept, @@ -122,11 +116,6 @@ Checks: "*, -modernize-use-override, -modernize-use-using, -openmp-use-default-none, - -performance-faster-string-find, - -performance-for-range-copy, - -performance-inefficient-vector-operation, - -performance-unnecessary-value-param, - -readability-const-return-type, -readability-convert-member-functions-to-static, -readability-delete-null-pointer, -readability-duplicate-include, @@ -142,7 +131,6 @@ Checks: "*, -readability-non-const-parameter, -readability-redundant-control-flow, -readability-redundant-preprocessor, - -readability-simplify-boolean-expr, -readability-suspicious-call-argument" WarningsAsErrors: '' # More paths can be ignored by modifying this so that it looks like '^((?!/PATH/ONE/|/PATH/TWO/).)*$' diff --git a/.gitignore b/.gitignore index 7ce01c9e9..1f1fa3018 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ bin/* *.a a.out *.dSYM +__pycache__ # Makefiles # ############# @@ -39,11 +40,8 @@ data out.* o.* run - - disk.* - # Logs and databases # ###################### *.err @@ -67,7 +65,6 @@ disk.* # OS generated files # ###################### .DS_Store - .remote-sync.json .remote-sync_macos.json ._* @@ -85,4 +82,3 @@ Thumbs.db ############################# docs/doxygen/build docs/sphinx/build - diff --git a/Jenkinsfile b/Jenkinsfile index f974e3da7..d699732cb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -105,10 +105,10 @@ pipeline sh ''' printf '=%.0s' {1..100} printf "\n" - cat tidy_results_cpp.log + cat tidy_results_cpp_${CHOLLA_MAKE_TYPE}.log printf '=%.0s' {1..100} printf "\n" - cat tidy_results_gpu.log + cat tidy_results_gpu_${CHOLLA_MAKE_TYPE}.log printf '=%.0s' {1..100} printf "\n" ''' diff --git a/Makefile b/Makefile index e5b7a56e3..868f2c5bc 100644 --- a/Makefile +++ b/Makefile @@ -205,10 +205,10 @@ tidy: # - --warnings-as-errors= Upgrade all warnings to error, good for CI clang-tidy --verify-config @echo -e - (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES_TIDY) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp.log 2>&1 & \ - (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES_TIDY) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu.log 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES_TIDY) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp_$(TYPE).log 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES_TIDY) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu_$(TYPE).log 2>&1 & \ for i in 1 2; do wait -n; done - @echo -e "\nResults from clang-tidy are available in the 'tidy_results_cpp.log' and 'tidy_results_gpu.log' files." + @echo -e "\nResults from clang-tidy are available in the 'tidy_results_cpp_$(TYPE).log' and 'tidy_results_gpu_$(TYPE).log' files." clean: rm -f $(CLEAN_OBJS) diff --git a/builds/make.type.mhd b/builds/make.type.mhd index 486ba2547..2c6cbf68d 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -9,9 +9,7 @@ MPI_GPU ?= DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -# Currently MHD only supports PCM reconstruction. Higher order reconstruction -# methods will be added later -DFLAGS += -DPCM +DFLAGS += -DPPMC DFLAGS += -DHLLD DFLAGS += -DMHD @@ -34,7 +32,7 @@ DFLAGS += -DTEMPERATURE_FLOOR # Apply the cooling in the GPU from precomputed tables # DFLAGS += -DCOOLING_GPU -#Measure the Timing of the different stages +# Measure the Timing of the different stages DFLAGS += -DCPU_TIME DFLAGS += $(OUTPUT) @@ -49,7 +47,7 @@ DFLAGS += $(MPI_GPU) # used on scientific runs # Do CUDA error checking -DFLAGS += -DCUDA_ERROR_CHECK +# DFLAGS += -DCUDA_ERROR_CHECK # Limit the number of steps to evolve. # DFLAGS += -DN_STEPS_LIMIT=1000 diff --git a/builds/setup.frontier.cce.sh b/builds/setup.frontier.cce.sh index 4a22344d2..afb251680 100755 --- a/builds/setup.frontier.cce.sh +++ b/builds/setup.frontier.cce.sh @@ -15,3 +15,4 @@ export MPICH_GPU_SUPPORT_ENABLED=1 export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH} export CHOLLA_ENVSET=1 +export ROCFFT_RTC_CACHE_PATH=/dev/null diff --git a/cholla-tests-data b/cholla-tests-data index d6202baad..dcd73ff52 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit d6202baadc9eaac6dce5ec4060f1f3fda8abdf1f +Subproject commit dcd73ff52b9027627b247c6d888bcdb56840c85e diff --git a/examples/3D/mhd_blast.txt b/examples/3D/mhd_blast.txt index f60997c7e..5d078f674 100644 --- a/examples/3D/mhd_blast.txt +++ b/examples/3D/mhd_blast.txt @@ -1,27 +1,27 @@ # # Parameter File for the MHD Blast wavelength -# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) for details. +# See [Stone & Gardiner 2009](https://ui.adsabs.harvard.edu/abs/2009NewA...14..139S/abstract) for details. # ################################################ # number of grid cells in the x dimension -nx=128 +nx=200 # number of grid cells in the y dimension -ny=128 +ny=300 # number of grid cells in the z dimension -nz=128 +nz=200 # final output time -tout=0.02 +tout=0.2 # time interval for output -outstep=0.02 +outstep=0.2 # name of initial conditions init=MHD_Spherical_Blast # domain properties xmin=-0.5 -ymin=-0.5 +ymin=-0.75 zmin=-0.5 xlen=1.0 -ylen=1.0 +ylen=1.5 zlen=1.0 # type of boundary conditions xl_bcnd=1 @@ -45,17 +45,17 @@ vy=0.0 # velocity in the z direction vz=0.0 # initial pressure outside the blast zone -P=1.0 -# initial pressure inside the blast zone -P_blast=100.0 +P=0.1 +# initial pressure inside the blast zone. Note that the paper says this should be 100, that is a typo +P_blast=10.0 # The radius of the blast zone -radius=0.125 -# magnetic field in the x direction. Equal to 10/sqrt(2) -Bx=7.0710678118654746 -# magnetic field in the y direction -By=0.0 -# magnetic field in the z direction. Equal to 10/sqrt(2) -Bz=7.0710678118654746 +radius=0.1 +# magnetic field in the x direction. Equal to 1/sqrt(2) +Bx=0.70710678118654746 +# magnetic field in the y direction. Equal to 1/sqrt(2) +By=0.70710678118654746 +# magnetic field in the z direction +Bz=0.0 # value of gamma gamma=1.666666666666667 diff --git a/examples/3D/mhd_contact_wave.txt b/examples/3D/mhd_contact_wave.txt index 9250bba5a..0ff7e7989 100644 --- a/examples/3D/mhd_contact_wave.txt +++ b/examples/3D/mhd_contact_wave.txt @@ -57,7 +57,7 @@ By=1.5 # magnetic field in the z direction Bz=0 # amplitude of perturbing oscillations -A=1e-1 +A=1e-6 # value of gamma gamma=1.666666666666667 # The right eigenvectors to set the wave properly diff --git a/python_scripts/cat.py b/python_scripts/cat.py new file mode 100755 index 000000000..dc840c570 --- /dev/null +++ b/python_scripts/cat.py @@ -0,0 +1,406 @@ +# Utils for concat cholla output + +import h5py +import numpy as np +import os + +verbose = True + +def parse(argv): + # Determine prefix + if 'h5' in argv: + preprefix = argv.split('.h5')[0] + prefix = preprefix +'.h5' + + else: + prefix = './{}.h5'.format(argv) + + # Check existing + firstfile = prefix+'.0' + if not os.path.isfile(firstfile): + print(firstfile,' is missing') + exit() + + # Set dirnames + dnamein = os.path.dirname(firstfile)+'/' + dnameout = os.path.dirname(firstfile) + '/' + return dnamein,dnameout + +def hydro(n,dnamein,dnameout,double=True): + """ + n: integer, output number of file + dnamein: string, directory name of input files, should include '/' at end or leave blank for current directory + dnameout: string, directory name of output files, should include '/' at end or leave blank for current directory + double: optional bool, double precision (float64) if True, single precision (float32) if False + + Reads files of form dnamein{n}.h5.{rank}, looping over rank, outputting to file dnameout{n}.h5. + """ + + fileout = h5py.File(dnameout+str(n)+'.h5', 'a') + + i = -1 + # loops over all files + while True: + i += 1 + + fileinname = dnamein+str(n)+'.h5.'+str(i) + + if not os.path.isfile(fileinname): + break + print('Load:',fileinname,flush=True) + + # open the input file for reading + filein = h5py.File(fileinname,'r') + + # read in the header data from the input file + head = filein.attrs + + # if it's the first input file, write the header attributes + # and create the datasets in the output file + if (i == 0): + nx = head['dims'][0] + ny = head['dims'][1] + nz = head['dims'][2] + nxl = head['dims_local'][0] + nyl = head['dims_local'][1] + nzl = head['dims_local'][2] + fileout.attrs['dims'] = [nx, ny, nz] + fileout.attrs['gamma'] = [head['gamma'][0]] + fileout.attrs['t'] = [head['t'][0]] + fileout.attrs['dt'] = [head['dt'][0]] + fileout.attrs['n_step'] = [head['n_step'][0]] + + units = ['time_unit', 'mass_unit', 'length_unit', 'energy_unit', 'velocity_unit', 'densit\ +y_unit'] + for unit in units: + fileout.attrs[unit] = [head[unit][0]] + keys = list(filein.keys()) + #['density','momentum_x','momentum_y','momentum_z','Energy','GasEnergy','scalar0'] + + for key in keys: + if key not in fileout: + # WARNING: If you don't set dataset dtype it will default to 32-bit, but CHOLLA likes to be 64-bit + if double: + dtype = filein[key].dtype + else: + dtype = None + if nz > 1: + fileout.create_dataset(key, (nx, ny, nz), chunks=(nxl,nyl,nzl), dtype=dtype) + elif ny > 1: + fileout.create_dataset(key, (nx, ny), chunks=(nxl,nyl), dtype=dtype) + elif nx > 1: + fileout.create_dataset(key, (nx,), chunks=(nxl,), dtype=dtype) + #fileout.create_dataset(key, (nx, ny, nz)) + + # write data from individual processor file to + # correct location in concatenated file + nxl = head['dims_local'][0] + nyl = head['dims_local'][1] + nzl = head['dims_local'][2] + xs = head['offset'][0] + ys = head['offset'][1] + zs = head['offset'][2] + for key in keys: + if key in filein: + if nz > 1: + fileout[key][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein[key] + elif ny > 1: + fileout[key][xs:xs+nxl,ys:ys+nyl] = filein[key] + elif nx > 1: + fileout[key][xs:xs+nxl] = filein[key] + filein.close() + + # end loop over all files + fileout.close() + + +def projection(n,dnamein,dnameout): + """ + n: integer, output number of file + dnamein: string, directory name of input files, should include '/' at end or leave blank for current directory + dnameout: string, directory name of output files, should include '/' at end or leave blank for current directory + double: optional bool, double precision (float64) if True, single precision (float32) if False + + Reads files of form dnamein{n}.h5.{rank}, looping over rank, outputting to file dnameout{n}.h5. + """ + + # open the output file for writing + fileout = h5py.File(dnameout+str(n)+'_proj.h5', 'w') + i = -1 + while True: + i += 1 + + fileinname = dnamein+str(n)+'_proj.h5.'+str(i) + + if not os.path.isfile(fileinname): + break + + if verbose: + print(fileinname) + # open the input file for reading + filein = h5py.File(fileinname,'r') + # read in the header data from the input file + head = filein.attrs + + # if it's the first input file, write the header attributes + # and create the datasets in the output file + if (i == 0): + nx = head['dims'][0] + ny = head['dims'][1] + nz = head['dims'][2] + fileout.attrs['dims'] = [nx, ny, nz] + fileout.attrs['gamma'] = [head['gamma'][0]] + fileout.attrs['t'] = [head['t'][0]] + fileout.attrs['dt'] = [head['dt'][0]] + fileout.attrs['n_step'] = [head['n_step'][0]] + + dxy = np.zeros((nx,ny)) + dxz = np.zeros((nx,nz)) + Txy = np.zeros((nx,ny)) + Txz = np.zeros((nx,nz)) + + # write data from individual processor file to + # correct location in concatenated file + nxl = head['dims_local'][0] + nyl = head['dims_local'][1] + nzl = head['dims_local'][2] + xs = head['offset'][0] + ys = head['offset'][1] + zs = head['offset'][2] + + dxy[xs:xs+nxl,ys:ys+nyl] += filein['d_xy'] + dxz[xs:xs+nxl,zs:zs+nzl] += filein['d_xz'] + Txy[xs:xs+nxl,ys:ys+nyl] += filein['T_xy'] + Txz[xs:xs+nxl,zs:zs+nzl] += filein['T_xz'] + + filein.close() + + # write out the new datasets + fileout.create_dataset('d_xy', data=dxy) + fileout.create_dataset('d_xz', data=dxz) + fileout.create_dataset('T_xy', data=Txy) + fileout.create_dataset('T_xz', data=Txz) + + fileout.close() + return + +def slice(n,dnamein,dnameout): + """ + n: integer, output number of file + dnamein: string, directory name of input files, should include '/' at end or leave blank for current directory + dnameout: string, directory name of output files, should include '/' at end or leave blank for current directory + double: optional bool, double precision (float64) if True, single precision (float32) if False + + Reads files of form dnamein{n}_slice.h5.{rank}, looping over rank, outputting to file dnameout{n}_slice.h5. + """ + + # open the output file for writing + fileout = h5py.File(dnameout+str(n)+'_slice.h5', 'w') + + i = -1 + while True: + # loop over files for a given output time + i += 1 + + fileinname = dnamein+str(n)+'_slice.h5.'+str(i) + if not os.path.isfile(fileinname): + break + + if verbose: + print(fileinname) + # open the input file for reading + filein = h5py.File(fileinname,'r') + # read in the header data from the input file + head = filein.attrs + + # Detect DE + DE = 'GE_xy' in filein + SCALAR = 'scalar_xy' in filein + + # if it's the first input file, write the header attributes + # and create the datasets in the output file + if (i == 0): + gamma = head['gamma'] + t = head['t'] + dt = head['dt'] + n_step = head['n_step'] + nx = head['dims'][0] + ny = head['dims'][1] + nz = head['dims'][2] + fileout.attrs['gamma'] = gamma + fileout.attrs['t'] = t + fileout.attrs['dt'] = dt + fileout.attrs['n_step'] = n_step + fileout.attrs['dims'] = [nx, ny, nz] + + d_xy = np.zeros((nx,ny)) + d_xz = np.zeros((nx,nz)) + d_yz = np.zeros((ny,nz)) + mx_xy = np.zeros((nx,ny)) + mx_xz = np.zeros((nx,nz)) + mx_yz = np.zeros((ny,nz)) + my_xy = np.zeros((nx,ny)) + my_xz = np.zeros((nx,nz)) + my_yz = np.zeros((ny,nz)) + mz_xy = np.zeros((nx,ny)) + mz_xz = np.zeros((nx,nz)) + mz_yz = np.zeros((ny,nz)) + E_xy = np.zeros((nx,ny)) + E_xz = np.zeros((nx,nz)) + E_yz = np.zeros((ny,nz)) + if DE: + GE_xy = np.zeros((nx,ny)) + GE_xz = np.zeros((nx,nz)) + GE_yz = np.zeros((ny,nz)) + if SCALAR: + scalar_xy = np.zeros((nx,ny)) + scalar_xz = np.zeros((nx,nz)) + scalar_yz = np.zeros((ny,nz)) + + # write data from individual processor file to + # correct location in concatenated file + nxl = head['dims_local'][0] + nyl = head['dims_local'][1] + nzl = head['dims_local'][2] + xs = head['offset'][0] + ys = head['offset'][1] + zs = head['offset'][2] + + d_xy[xs:xs+nxl,ys:ys+nyl] += filein['d_xy'] + d_xz[xs:xs+nxl,zs:zs+nzl] += filein['d_xz'] + d_yz[ys:ys+nyl,zs:zs+nzl] += filein['d_yz'] + mx_xy[xs:xs+nxl,ys:ys+nyl] += filein['mx_xy'] + mx_xz[xs:xs+nxl,zs:zs+nzl] += filein['mx_xz'] + mx_yz[ys:ys+nyl,zs:zs+nzl] += filein['mx_yz'] + my_xy[xs:xs+nxl,ys:ys+nyl] += filein['my_xy'] + my_xz[xs:xs+nxl,zs:zs+nzl] += filein['my_xz'] + my_yz[ys:ys+nyl,zs:zs+nzl] += filein['my_yz'] + mz_xy[xs:xs+nxl,ys:ys+nyl] += filein['mz_xy'] + mz_xz[xs:xs+nxl,zs:zs+nzl] += filein['mz_xz'] + mz_yz[ys:ys+nyl,zs:zs+nzl] += filein['mz_yz'] + E_xy[xs:xs+nxl,ys:ys+nyl] += filein['E_xy'] + E_xz[xs:xs+nxl,zs:zs+nzl] += filein['E_xz'] + E_yz[ys:ys+nyl,zs:zs+nzl] += filein['E_yz'] + if DE: + GE_xy[xs:xs+nxl,ys:ys+nyl] += filein['GE_xy'] + GE_xz[xs:xs+nxl,zs:zs+nzl] += filein['GE_xz'] + GE_yz[ys:ys+nyl,zs:zs+nzl] += filein['GE_yz'] + if SCALAR: + scalar_xy[xs:xs+nxl,ys:ys+nyl] += filein['scalar_xy'] + scalar_xz[xs:xs+nxl,zs:zs+nzl] += filein['scalar_xz'] + scalar_yz[ys:ys+nyl,zs:zs+nzl] += filein['scalar_yz'] + + filein.close() + + # wrte out the new datasets + fileout.create_dataset('d_xy', data=d_xy) + fileout.create_dataset('d_xz', data=d_xz) + fileout.create_dataset('d_yz', data=d_yz) + fileout.create_dataset('mx_xy', data=mx_xy) + fileout.create_dataset('mx_xz', data=mx_xz) + fileout.create_dataset('mx_yz', data=mx_yz) + fileout.create_dataset('my_xy', data=my_xy) + fileout.create_dataset('my_xz', data=my_xz) + fileout.create_dataset('my_yz', data=my_yz) + fileout.create_dataset('mz_xy', data=mz_xy) + fileout.create_dataset('mz_xz', data=mz_xz) + fileout.create_dataset('mz_yz', data=mz_yz) + fileout.create_dataset('E_xy', data=E_xy) + fileout.create_dataset('E_xz', data=E_xz) + fileout.create_dataset('E_yz', data=E_yz) + if DE: + fileout.create_dataset('GE_xy', data=GE_xy) + fileout.create_dataset('GE_xz', data=GE_xz) + fileout.create_dataset('GE_yz', data=GE_yz) + if SCALAR: + fileout.create_dataset('scalar_xy', data=scalar_xy) + fileout.create_dataset('scalar_xz', data=scalar_xz) + fileout.create_dataset('scalar_yz', data=scalar_yz) + + fileout.close() + return + +def rot_proj(n,dnamein,dnameout): + """ + n: integer, output number of file + dnamein: string, directory name of input files, should include '/' at end or leave blank for current directory + dnameout: string, directory name of output files, should include '/' at end or leave blank for current directory + double: optional bool, double precision (float64) if True, single precision (float32) if False + + Reads files of form dnamein{n}_rot_proj.h5.{rank}, looping over rank, outputting to file dnameout{n}_rot_proj.h5. + """ + + fileout = h5py.File(dnameout+str(n)+'_rot_proj.h5', 'w') + i = -1 + + while True: + # loop over files for a given output time + i += 1 + fileinname = dnamein+str(n)+'_rot_proj.h5.'+str(i) + if not os.path.isfile(fileinname): + break + + if verbose: + print(fileinname) + + filein = h5py.File(dnamein+fileinname,'r') + head = filein.attrs + # if it's the first input file, write the header attributes + # and create the arrays to hold the output data + if (i == 0): + + nxr = int(head['nxr']) + nzr = int(head['nzr']) + Lx = head['Lx'] + Lz = head['Lz'] + delta = head['delta'] + theta = head['theta'] + phi = head['phi'] + gamma = head['gamma'] + t = head['t'] + dt = head['dt'] + n_step = head['n_step'] + fileout.attrs['nxr'] = nxr + fileout.attrs['nzr'] = nzr + fileout.attrs['Lx'] = Lx + fileout.attrs['Lz'] = Lz + fileout.attrs['delta'] = delta + fileout.attrs['theta'] = theta + fileout.attrs['phi'] = phi + fileout.attrs['gamma'] = gamma + fileout.attrs['t'] = t + fileout.attrs['dt'] = dt + fileout.attrs['n_step'] = n_step + + d_xzr = np.zeros((nxr, nzr)) + vx_xzr = np.zeros((nxr, nzr)) + vy_xzr = np.zeros((nxr, nzr)) + vz_xzr = np.zeros((nxr, nzr)) + T_xzr = np.zeros((nxr, nzr)) + + # end first input file + + # write data from individual processor file to + # correct location in concatenated file + nx_min = int(head['nx_min']) + nx_max = int(head['nx_max']) + nz_min = int(head['nz_min']) + nz_max = int(head['nz_max']) + + d_xzr[nx_min:nx_max,nz_min:nz_max] += filein['d_xzr'][:] + vx_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vx_xzr'][:] + vy_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vy_xzr'][:] + vz_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vz_xzr'][:] + T_xzr[nx_min:nx_max,nz_min:nz_max] += filein['T_xzr'][:] + + filein.close() + # end while loop + + # write out the new datasets + fileout.create_dataset("d_xzr", data=d_xzr) + fileout.create_dataset("vx_xzr", data=vx_xzr) + fileout.create_dataset("vy_xzr", data=vy_xzr) + fileout.create_dataset("vz_xzr", data=vz_xzr) + fileout.create_dataset("T_xzr", data=T_xzr) + + fileout.close() diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/cat_dset_3D.py index 5ac71a612..4cff6dc9a 100755 --- a/python_scripts/cat_dset_3D.py +++ b/python_scripts/cat_dset_3D.py @@ -1,86 +1,141 @@ #!/usr/bin/env python3 -# Example file for concatenating 3D hdf5 datasets +""" +Python script for concatenating 3D hdf5 datasets. Includes a CLI for concatenating Cholla HDF5 datasets and can be +imported into other scripts where the `concat_3d` function can be used to concatenate the datasets. + +Generally the easiest way to import this script is to add the `python_scripts` directory to your python path in your +script like this: +``` +import sys +sys.path.append('/PATH/TO/CHOLLA/python_scripts') +import cat_dset_3D +``` +""" import h5py import numpy as np +import argparse +import pathlib + +def main(): + """This function handles the CLI argument parsing and is only intended to be used when this script is invoked from the + command line. If you're importing this file then use the `concat_3d` function directly. + """ + # Argument handling + cli = argparse.ArgumentParser() + # Required Arguments + cli.add_argument('-s', '--start_num', type=int, required=True, help='The first output step to concatenate') + cli.add_argument('-e', '--end_num', type=int, required=True, help='The last output step to concatenate') + cli.add_argument('-n', '--num_processes', type=int, required=True, help='The number of processes that were used') + # Optional Arguments + cli.add_argument('-i', '--input_dir', type=pathlib.Path, default=pathlib.Path.cwd(), help='The input directory.') + cli.add_argument('-o', '--output_dir', type=pathlib.Path, default=pathlib.Path.cwd(), help='The output directory.') + args = cli.parse_args() + + # Perform the concatenation + concat_3d(start_num=args.start_num, + end_num=args.end_num, + num_processes=args.num_processes, + input_dir=args.input_dir, + output_dir=args.output_dir) + + +# ====================================================================================================================== +def concat_3d(start_num: int, + end_num: int, + num_processes: int, + input_dir: pathlib.Path = pathlib.Path.cwd(), + output_dir: pathlib.Path = pathlib.Path.cwd()): + """Concatenate 3D HDF5 Cholla datasets. i.e. take the single files generated per process and concatenate them into a + single, large file. All outputs from start_num to end_num will be concatenated. + + Args: + start_num (int): The first output step to concatenate + end_num (int): The last output step to concatenate + num_processes (int): The number of processes that were used + input_dir (pathlib.Path, optional): The input directory. Defaults to pathlib.Path.cwd(). + output_dir (pathlib.Path, optional): The output directory. Defaults to pathlib.Path.cwd(). + """ + + # Error checking + assert start_num >= 0, 'start_num must be greater than or equal to 0' + assert end_num >= 0, 'end_num must be greater than or equal to 0' + assert start_num <= end_num, 'end_num should be greater than or equal to start_num' + assert num_processes > 1, 'num_processes must be greater than 1' + + # loop over outputs + for n in range(start_num, end_num+1): -ns = 0 -ne = 0 -n_proc = 16 # number of processors that did the calculations -istart = 0*n_proc -iend = 1*n_proc -dnamein = './hdf5/raw/' -dnameout = './hdf5/' - -# loop over outputs -for n in range(ns, ne+1): - - # loop over files for a given output - for i in range(istart, iend): - - # open the output file for writing (don't overwrite if exists) - fileout = h5py.File(dnameout+str(n)+'.h5', 'a') - # open the input file for reading - filein = h5py.File(dnamein+str(n)+'.h5.'+str(i), 'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - fileout.attrs['dims'] = [nx, ny, nz] - fileout.attrs['gamma'] = [head['gamma'][0]] - fileout.attrs['t'] = [head['t'][0]] - fileout.attrs['dt'] = [head['dt'][0]] - fileout.attrs['n_step'] = [head['n_step'][0]] - - units = ['time_unit', 'mass_unit', 'length_unit', 'energy_unit', 'velocity_unit', 'density_unit'] - for unit in units: - fileout.attrs[unit] = [head[unit][0]] - - d = fileout.create_dataset("density", (nx, ny, nz), chunks=True, dtype=filein['density'].dtype) - mx = fileout.create_dataset("momentum_x", (nx, ny, nz), chunks=True, dtype=filein['momentum_x'].dtype) - my = fileout.create_dataset("momentum_y", (nx, ny, nz), chunks=True, dtype=filein['momentum_y'].dtype) - mz = fileout.create_dataset("momentum_z", (nx, ny, nz), chunks=True, dtype=filein['momentum_z'].dtype) - E = fileout.create_dataset("Energy", (nx, ny, nz), chunks=True, dtype=filein['Energy'].dtype) + # loop over files for a given output + for i in range(0, num_processes): + + # open the output file for writing (don't overwrite if exists) + fileout = h5py.File(output_dir / f'{n}.h5', 'a') + # open the input file for reading + filein = h5py.File(input_dir / f'{n}.h5.{i}', 'r') + # read in the header data from the input file + head = filein.attrs + + # if it's the first input file, write the header attributes + # and create the datasets in the output file + if (i == 0): + nx = head['dims'][0] + ny = head['dims'][1] + nz = head['dims'][2] + fileout.attrs['dims'] = [nx, ny, nz] + fileout.attrs['gamma'] = [head['gamma'][0]] + fileout.attrs['t'] = [head['t'][0]] + fileout.attrs['dt'] = [head['dt'][0]] + fileout.attrs['n_step'] = [head['n_step'][0]] + + units = ['time_unit', 'mass_unit', 'length_unit', 'energy_unit', 'velocity_unit', 'density_unit'] + for unit in units: + fileout.attrs[unit] = [head[unit][0]] + + d = fileout.create_dataset("density", (nx, ny, nz), chunks=True, dtype=filein['density'].dtype) + mx = fileout.create_dataset("momentum_x", (nx, ny, nz), chunks=True, dtype=filein['momentum_x'].dtype) + my = fileout.create_dataset("momentum_y", (nx, ny, nz), chunks=True, dtype=filein['momentum_y'].dtype) + mz = fileout.create_dataset("momentum_z", (nx, ny, nz), chunks=True, dtype=filein['momentum_z'].dtype) + E = fileout.create_dataset("Energy", (nx, ny, nz), chunks=True, dtype=filein['Energy'].dtype) + try: + GE = fileout.create_dataset("GasEnergy", (nx, ny, nz), chunks=True, dtype=filein['GasEnergy'].dtype) + except KeyError: + print('No Dual energy data present'); + try: + bx = fileout.create_dataset("magnetic_x", (nx+1, ny, nz), chunks=True, dtype=filein['magnetic_x'].dtype) + by = fileout.create_dataset("magnetic_y", (nx, ny+1, nz), chunks=True, dtype=filein['magnetic_y'].dtype) + bz = fileout.create_dataset("magnetic_z", (nx, ny, nz+1), chunks=True, dtype=filein['magnetic_z'].dtype) + except KeyError: + print('No magnetic field data present'); + + # write data from individual processor file to + # correct location in concatenated file + nxl = head['dims_local'][0] + nyl = head['dims_local'][1] + nzl = head['dims_local'][2] + xs = head['offset'][0] + ys = head['offset'][1] + zs = head['offset'][2] + fileout['density'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['density'] + fileout['momentum_x'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_x'] + fileout['momentum_y'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_y'] + fileout['momentum_z'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_z'] + fileout['Energy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['Energy'] try: - GE = fileout.create_dataset("GasEnergy", (nx, ny, nz), chunks=True, dtype=filein['GasEnergy'].dtype) + fileout['GasEnergy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['GasEnergy'] except KeyError: - print('No Dual energy data present'); + print('No Dual energy data present'); try: - bx = fileout.create_dataset("magnetic_x", (nx+1, ny, nz), chunks=True, dtype=filein['magnetic_x'].dtype) - by = fileout.create_dataset("magnetic_y", (nx, ny+1, nz), chunks=True, dtype=filein['magnetic_y'].dtype) - bz = fileout.create_dataset("magnetic_z", (nx, ny, nz+1), chunks=True, dtype=filein['magnetic_z'].dtype) + fileout['magnetic_x'][xs:xs+nxl+1, ys:ys+nyl, zs:zs+nzl] = filein['magnetic_x'] + fileout['magnetic_y'][xs:xs+nxl, ys:ys+nyl+1, zs:zs+nzl] = filein['magnetic_y'] + fileout['magnetic_z'][xs:xs+nxl, ys:ys+nyl, zs:zs+nzl+1] = filein['magnetic_z'] except KeyError: - print('No magnetic field data present'); - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - fileout['density'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['density'] - fileout['momentum_x'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_x'] - fileout['momentum_y'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_y'] - fileout['momentum_z'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_z'] - fileout['Energy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['Energy'] - try: - fileout['GasEnergy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['GasEnergy'] - except KeyError: - print('No Dual energy data present'); - try: - fileout['magnetic_x'][xs:xs+nxl+1, ys:ys+nyl, zs:zs+nzl] = filein['magnetic_x'] - fileout['magnetic_y'][xs:xs+nxl, ys:ys+nyl+1, zs:zs+nzl] = filein['magnetic_y'] - fileout['magnetic_z'][xs:xs+nxl, ys:ys+nyl, zs:zs+nzl+1] = filein['magnetic_z'] - except KeyError: - print('No magnetic field data present'); - - filein.close() - - fileout.close() + print('No magnetic field data present'); + + filein.close() + + fileout.close() +# ====================================================================================================================== + +if __name__ == '__main__': + main() diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index b0969c455..bbecf1935 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -44,18 +44,18 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // get a global thread ID int blockId = blockIdx.x + blockIdx.y * gridDim.x; int id = threadIdx.x + blockId * blockDim.x; - int zid = id / (nx * ny); - int yid = (id - zid * nx * ny) / nx; - int xid = id - zid * nx * ny - yid * nx; + int id_z = id / (nx * ny); + int id_y = (id - id_z * nx * ny) / nx; + int id_x = id - id_z * nx * ny - id_y * nx; // define physics variables - Real d_gas, d_dust; // fluid mass densities - Real n; // gas number density - Real mu = 0.6; // mean molecular weight - Real T, E, P; // temperature, energy, pressure - Real vx, vy, vz; // velocities + Real density_gas, density_dust; // fluid mass densities + Real number_density; // gas number density + Real mu = 0.6; // mean molecular weight + Real temperature, energy, pressure; // temperature, energy, pressure + Real velocity_x, velocity_y, velocity_z; // velocities #ifdef DE - Real ge; + Real energy_gas; #endif // DE // define integration variables @@ -64,82 +64,84 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g Real dd_max = 0.01; // allowable percentage of dust density increase Real dt_sub; // refined timestep - if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { + if (id_x >= is && id_x < ie && id_y >= js && id_y < je && id_z >= ks && id_z < ke) { // get conserved quanitites - d_gas = dev_conserved[id + n_cells * grid_enum::density]; - d_dust = dev_conserved[id + n_cells * grid_enum::dust_density]; - E = dev_conserved[id + n_cells * grid_enum::Energy]; + density_gas = dev_conserved[id + n_cells * grid_enum::density]; + density_dust = dev_conserved[id + n_cells * grid_enum::dust_density]; + energy = dev_conserved[id + n_cells * grid_enum::Energy]; // convert mass density to number density - n = d_gas * DENSITY_UNIT / (mu * MP); + number_density = density_gas * DENSITY_UNIT / (mu * MP); - if (E < 0.0 || E != E) { + if (energy < 0.0 || energy != energy) { return; } // get conserved quanitites - vx = dev_conserved[id + n_cells * grid_enum::momentum_x] / d_gas; - vy = dev_conserved[id + n_cells * grid_enum::momentum_y] / d_gas; - vz = dev_conserved[id + n_cells * grid_enum::momentum_z] / d_gas; + velocity_x = dev_conserved[id + n_cells * grid_enum::momentum_x] / density_gas; + velocity_y = dev_conserved[id + n_cells * grid_enum::momentum_y] / density_gas; + velocity_z = dev_conserved[id + n_cells * grid_enum::momentum_z] / density_gas; #ifdef DE - ge = dev_conserved[id + n_cells * grid_enum::GasEnergy] / d_gas; - ge = fmax(ge, (Real)TINY_NUMBER); + energy_gas = dev_conserved[id + n_cells * grid_enum::GasEnergy] / density_gas; + energy_gas = fmax(ge, (Real)TINY_NUMBER); #endif // DE // calculate physical quantities - P = hydro_utilities::Calc_Pressure_Primitive(E, d_gas, vx, vy, vz, gamma); + pressure = hydro_utilities::Calc_Pressure_Primitive(energy, density_gas, velocity_x, velocity_y, velocity_z, gamma); - Real T_init; - T_init = hydro_utilities::Calc_Temp(P, n); + Real temperature_init; + temperature_init = hydro_utilities::Calc_Temp(pressure, number_density); #ifdef DE - T_init = hydro_utilities::Calc_Temp_DE(d_gas, ge, gamma, n); + temperature_init = hydro_utilities::Calc_Temp_DE(density_gas, energy_gas, gamma, number_density); #endif // DE // if dual energy is turned on use temp from total internal energy - T = T_init; + temperature = temperature_init; - Real tau_sp = calc_tau_sp(n, T) / TIME_UNIT; // sputtering timescale, kyr (sim units) + Real tau_sp = + Calc_Sputtering_Timescale(number_density, temperature) / TIME_UNIT; // sputtering timescale, kyr (sim units) - dd_dt = calc_dd_dt(d_dust, tau_sp); // rate of change in dust density at current timestep - dd = dd_dt * dt; // change in dust density at current timestep + dd_dt = Calc_dd_dt(density_dust, tau_sp); // rate of change in dust density at current timestep + dd = dd_dt * dt; // change in dust density at current timestep // ensure that dust density is not changing too rapidly - while (dd / d_dust > dd_max) { - dt_sub = dd_max * d_dust / dd_dt; - d_dust += dt_sub * dd_dt; + while (dd / density_dust > dd_max) { + dt_sub = dd_max * density_dust / dd_dt; + density_dust += dt_sub * dd_dt; dt -= dt_sub; - dd_dt = calc_dd_dt(d_dust, tau_sp); + dd_dt = Calc_dd_dt(density_dust, tau_sp); dd = dt * dd_dt; } // update dust density - d_dust += dd; + density_dust += dd; - dev_conserved[id + n_cells * grid_enum::dust_density] = d_dust; + dev_conserved[id + n_cells * grid_enum::dust_density] = density_dust; #ifdef DE - dev_conserved[id + n_cells * grid_enum::GasEnergy] = d_dust * ge; + dev_conserved[id + n_cells * grid_enum::GasEnergy] = density_dust * energy_gas; #endif } } -// McKinnon et al. (2017) -__device__ __host__ Real calc_tau_sp(Real n, Real T) +// McKinnon et al. (2017) sputtering timescale +__device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real temperature) { - Real YR_IN_S = 3.154e7; - Real a1 = 1; // dust grain size in units of 0.1 micrometers - Real d0 = n / (6e-4); // gas density in units of 10^-27 g/cm^3 - Real T_0 = 2e6; // K - Real omega = 2.5; - Real A = 0.17e9 * YR_IN_S; // 0.17 Gyr in s + Real grain_radius = 1; // dust grain size in units of 0.1 micrometers + Real temperature_0 = 2e6; // temp above which the sputtering rate is ~constant in K + Real omega = 2.5; // controls the low-temperature scaling of the sputtering rate + Real A = 5.3618e15; // 0.17 Gyr in s - Real tau_sp = A * (a1 / d0) * (pow(T_0 / T, omega) + 1); // sputtering timescale, s + number_density /= (6e-4); // gas number density in units of 10^-27 g/cm^3 + + // sputtering timescale, s + Real tau_sp = A * (grain_radius / number_density) * (pow(temperature_0 / temperature, omega) + 1); return tau_sp; } -// McKinnon et al. (2017) -__device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp) { return -d_dust / (tau_sp / 3); } +// McKinnon et al. (2017) sputtering model +__device__ __host__ Real Calc_dd_dt(Real density_dust, Real tau_sp) { return -density_dust / (tau_sp / 3); } #endif // DUST diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index aab4c7db4..fb72007ac 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -48,22 +48,22 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g /*! * \brief Compute the sputtering timescale based on a cell's density and temperature. * - * \param[in] n Gas number density in cm^-3 - * \param[in] T Gas temperature in K + * \param[in] number_density Gas number density in cm^-3 + * \param[in] temperature Gas temperature in K * * \return Real Sputtering timescale in seconds (McKinnon et al. 2017) */ -__device__ __host__ Real calc_tau_sp(Real n, Real T); +__device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real temperature); /*! * \brief Compute the rate of change in dust density based on the current dust density and sputtering timescale. * - * \param[in] d_dust Dust mass density in M_sun/kpc^3 + * \param[in] density_dust Dust mass density in M_sun/kpc^3 * \param[in] tau_sp Sputtering timescale in kyr * * \return Real Dust density rate of change (McKinnon et al. 2017) */ -__device__ __host__ Real calc_dd_dt(Real d_dust, Real tau_sp); +__device__ __host__ Real Calc_dd_dt(Real density_dust, Real tau_sp); #endif // DUST_CUDA_H #endif // DUST \ No newline at end of file diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp index 03bd8111f..8790c1f4a 100644 --- a/src/dust/dust_cuda_tests.cpp +++ b/src/dust/dust_cuda_tests.cpp @@ -25,13 +25,12 @@ TEST(tDUSTTestSputteringTimescale, CorrectInputExpectCorrectOutput) // test suite name, test name { // Parameters - Real YR_IN_S = 3.154e7; - Real const k_test_n = 1; - Real const k_test_T = pow(10, 5.0); + Real YR_IN_S = 3.154e7; + Real const k_test_number_density = 1; + Real const k_test_temperature = pow(10, 5.0); + Real const k_fiducial_num = 182565146.96398282; - Real const k_fiducial_num = 182565146.96398282; - - Real test_num = calc_tau_sp(k_test_n, k_test_T) / YR_IN_S; // yr + Real test_num = Calc_Sputtering_Timescale(k_test_number_density, k_test_temperature) / YR_IN_S; // yr double abs_diff; int64_t ulps_diff; @@ -50,13 +49,12 @@ TEST(tDUSTTestSputteringGrowthRate, CorrectInputExpectCorrectOutput) // test suite name, test name { // Parameters - Real YR_IN_S = 3.154e7; - Real const k_test_tau_sp = 0.17e6; // kyr - Real const k_test_d_dust = 1e-26 / DENSITY_UNIT; // sim units - - Real const k_fiducial_num = -2.6073835738056728; + Real YR_IN_S = 3.154e7; + Real const k_test_tau_sp = 0.17e6; // kyr + Real const k_test_density_dust = 1e-26 / DENSITY_UNIT; // sim units + Real const k_fiducial_num = -2.6073835738056728; - Real test_num = calc_dd_dt(k_test_d_dust, k_test_tau_sp); + Real test_num = Calc_dd_dt(k_test_density_dust, k_test_tau_sp); double abs_diff; int64_t ulps_diff; diff --git a/src/global/global.cpp b/src/global/global.cpp index a47f9e78b..a4c697d3c 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -97,6 +97,7 @@ char *trim(char *s) return s; } +// NOLINTNEXTLINE(cert-err58-cpp) const std::set optionalParams = { "flag_delta", "ddelta_dt", "n_delta", "Lz", "Lx", "phi", "theta", "delta", "nzr", "nxr", "H0", "Omega_M", "Omega_L", "Init_redshift", @@ -107,8 +108,9 @@ const std::set optionalParams = { * "warnings" in output. */ int is_param_valid(const char *param_name) { - for (auto it = optionalParams.begin(); it != optionalParams.end(); ++it) { - if (strcmp(param_name, *it) == 0) { + // for (auto optionalParam = optionalParams.begin(); optionalParam != optionalParams.end(); ++optionalParam) { + for (const auto *optionalParam : optionalParams) { + if (strcmp(param_name, optionalParam) == 0) { return 1; } } @@ -363,6 +365,8 @@ void parse_param(char *name, char *value, struct parameters *parms) parms->radius = atof(value); } else if (strcmp(name, "P_blast") == 0) { parms->P_blast = atof(value); + } else if (strcmp(name, "wave_length") == 0) { + parms->wave_length = atof(value); #ifdef PARTICLES } else if (strcmp(name, "prng_seed") == 0) { parms->prng_seed = atoi(value); diff --git a/src/global/global.h b/src/global/global.h index 17e7d7b73..b037c931d 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -265,6 +265,7 @@ struct parameters { Real polarization = 0; Real radius = 0; Real P_blast = 0; + Real wave_length = 1.0; #ifdef PARTICLES // The random seed for particle simulations. With the default of 0 then a // machine dependent seed will be generated. diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 1f4a08f7f..70eb749c9 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -200,8 +200,8 @@ Real Grav3D::Get_Average_Density() Get_OMP_Grid_Indxs(nz_local, n_omp_procs, omp_id, &g_start, &g_end); dens_sum_all[omp_id] = Get_Average_Density_function(g_start, g_end); } - for (int i = 0; i < N_OMP_THREADS; i++) { - dens_sum += dens_sum_all[i]; + for (Real dens_sum_all_element : dens_sum_all) { + dens_sum += dens_sum_all_element; } #endif diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.cu b/src/gravity/paris/PoissonZero3DBlockedGPU.cu index 4b9e74e4c..5ad31406e 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.cu +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.cu @@ -131,9 +131,9 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou assert(density); assert(potential); - double *const ua = potential; - double *const ub = density; - cufftDoubleComplex *const uc = reinterpret_cast(ub); + double *const ua = potential; + double *const ub = density; + auto *const uc = reinterpret_cast(ub); const double ddi = ddi_; const double ddj = ddj_; diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index 50c55126d..eca473fdb 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -151,12 +151,6 @@ int Grid3D::Check_Custom_Boundary(int *flags, struct parameters P) } for (int i = 0; i < 6; i++) { - if (!((flags[i] >= 0) && (flags[i] <= 5))) { - chprintf( - "Invalid boundary conditions. Must select between 1 (periodic), 2 " - "(reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n"); - chexit(-1); - } if (flags[i] == 4) { /*custom boundaries*/ return 1; diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index dac2795c8..9010da354 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -72,14 +72,8 @@ Grid3D::Grid3D(void) #endif #ifdef MHD - // Set the number of ghost cells high enough for MHD - if (H.n_ghost < 3) { - chprintf( - "Insufficient number of ghost cells for MHD. H.n_ghost was %i, setting " - "to 3.\n", - H.n_ghost); - H.n_ghost = 3; - } + // Set the number of ghost cells high enough for MHD. MHD needs one extra for the left most face + H.n_ghost++; #endif // MHD } @@ -271,11 +265,7 @@ void Grid3D::Initialize(struct parameters *P) #endif #ifdef COSMOLOGY - if (P->scale_outputs_file[0] == '\0') { - H.OUTPUT_SCALE_FACOR = false; - } else { - H.OUTPUT_SCALE_FACOR = true; - } + H.OUTPUT_SCALE_FACOR = not P->scale_outputs_file[0] == '\0'; #endif H.Output_Initial = true; @@ -300,6 +290,9 @@ void Grid3D::AllocateMemory(void) #ifdef BASIC_SCALAR C.basic_scalar = &(C.host[H.n_cells * grid_enum::basic_scalar]); #endif + #ifdef DUST + C.dust_density = &(C.host[H.n_cells * grid_enum::dust_density]); + #endif #endif // SCALAR #ifdef MHD C.magnetic_x = &(C.host[grid_enum::magnetic_x * H.n_cells]); @@ -323,6 +316,9 @@ void Grid3D::AllocateMemory(void) #ifdef BASIC_SCALAR C.d_basic_scalar = &(C.device[H.n_cells * grid_enum::basic_scalar]); #endif + #ifdef DUST + C.d_dust_density = &(C.device[H.n_cells * grid_enum::dust_density]); + #endif #endif // SCALAR #ifdef MHD C.d_magnetic_x = &(C.device[(grid_enum::magnetic_x)*H.n_cells]); @@ -426,6 +422,10 @@ Real Grid3D::Update_Grid(void) U_floor /= Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; #endif +#ifdef CPU_TIME + Timer.Hydro_Integrator.Start(); +#endif // CPU_TIME + // Run the hydro integrator on the grid if (H.nx > 1 && H.ny == 1 && H.nz == 1) // 1D { @@ -468,11 +468,22 @@ Real Grid3D::Update_Grid(void) chexit(-1); } +#ifdef CPU_TIME + Timer.Hydro_Integrator.End(); +#endif // CPU_TIME + #ifdef CUDA #ifdef COOLING_GPU + #ifdef CPU_TIME + Timer.Cooling_GPU.Start(); + #endif // ==Apply Cooling from cooling/cooling_cuda.h== Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); + #ifdef CPU_TIME + Timer.Cooling_GPU.End(); + #endif + #endif // COOLING_GPU #ifdef DUST @@ -558,11 +569,11 @@ Real Grid3D::Update_Hydro_Grid() #ifdef COOLING_GRACKLE #ifdef CPU_TIME - Timer.Cooling.Start(); + Timer.Cooling_Grackle.Start(); #endif // CPU_TIME Do_Cooling_Step_Grackle(); #ifdef CPU_TIME - Timer.Cooling.End(); + Timer.Cooling_Grackle.End(); #endif // CPU_TIME #endif // COOLING_GRACKLE diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index 6e9b7e5bc..e679415d9 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -360,6 +360,12 @@ class Grid3D */ Real *basic_scalar; #endif + #ifdef DUST + /*! \var dust_density + * \brief Array containing the dust densities. + */ + Real *dust_density; + #endif #endif // SCALAR #ifdef MHD @@ -405,8 +411,8 @@ class Grid3D /*! pointer to conserved variable on device */ Real *device; - Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, *d_Energy, *d_scalar, *d_basic_scalar, *d_magnetic_x, - *d_magnetic_y, *d_magnetic_z, *d_GasEnergy; + Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, *d_Energy, *d_scalar, *d_basic_scalar, + *d_dust_density, *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, *d_GasEnergy; /*! pointer to gravitational potential on device */ Real *d_Grav_potential; @@ -529,54 +535,41 @@ class Grid3D * \brief Free the memory for the density array. */ void FreeMemory(void); - /*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P) - * \brief Constant gas properties. */ - void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz); + /*! + * \brief Constant gas properties. + * + * \param[in] P the parameters struct. + */ + void Constant(parameters const &P); - /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) - * \brief Sine wave perturbation. */ - void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); + /*! + * \brief Sine wave perturbation. + * + * \param[in] P the parameters struct. + */ + void Sound_Wave(parameters const &P); /*! * \brief Initialize the grid with a simple linear wave. * - * \param[in] rho The background density - * \param[in] vx The background velocity in the X-direction - * \param[in] vy The background velocity in the Y-direction - * \param[in] vz The background velocity in the Z-direction - * \param[in] P The background pressure - * \param[in] A The amplitude of the wave - * \param[in] Bx The background magnetic field in the X-direction - * \param[in] By The background magnetic field in the Y-direction - * \param[in] Bz The background magnetic field in the Z-direction - * \param[in] rEigenVec_rho The right eigenvector component for the density - * \param[in] rEigenVec_MomentumX The right eigenvector component for the - * velocity in the X-direction \param[in] rEigenVec_MomentumY The right - * eigenvector component for the velocity in the Y-direction \param[in] - * rEigenVec_MomentumZ The right eigenvector component for the velocity in the - * Z-direction \param[in] rEigenVec_E The right eigenvector component for the - * energy \param[in] rEigenVec_Bx The right eigenvector component for the - * magnetic field in the X-direction \param[in] rEigenVec_By The right - * eigenvector component for the magnetic field in the Y-direction \param[in] - * rEigenVec_Bz The right eigenvector component for the magnetic field in the - * Z-direction \param[in] pitch The pitch angle of the linear wave \param[in] - * yaw The yaw angle of the linear wave + * \param[in] P the parameters struct. + */ + void Linear_Wave(parameters const &P); + + /*! + * \brief Square wave density perturbation with amplitude A*rho in pressure + * equilibrium. + * + * \param[in] P the parameters struct. + */ + void Square_Wave(parameters const &P); + + /*! + * \brief Initialize the grid with a Riemann problem. + * + * \param[in] P the parameters struct. */ - void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Real Bx, Real By, Real Bz, Real rEigenVec_rho, - Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, Real rEigenVec_MomentumZ, Real rEigenVec_E, - Real rEigenVec_Bx, Real rEigenVec_By, Real rEigenVec_Bz, Real pitch, Real yaw); - - /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) - * \brief Square wave density perturbation with amplitude A*rho in pressure - * equilibrium. */ - void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); - - /*! \fn void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, - Real Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, Real vz_r, - Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) - * \brief Initialize the grid with a Riemann problem. */ - void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, Real rho_r, - Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph); + void Riemann(parameters const &P); /*! \fn void Shu_Osher() * \brief Initialize the grid with the Shu-Osher shock tube problem. See diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 86f478faf..38967e4b7 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -19,6 +19,7 @@ #include "../io/io.h" #include "../mpi/mpi_routines.h" #include "../utils/error_handling.h" +#include "../utils/hydro_utilities.h" #include "../utils/math_utilities.h" #include "../utils/mhd_utilities.h" @@ -31,18 +32,15 @@ void Grid3D::Set_Initial_Conditions(parameters P) Set_Gammas(P.gamma); if (strcmp(P.init, "Constant") == 0) { - Constant(P.rho, P.vx, P.vy, P.vz, P.P, P.Bx, P.By, P.Bz); + Constant(P); } else if (strcmp(P.init, "Sound_Wave") == 0) { - Sound_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); + Sound_Wave(P); } else if (strcmp(P.init, "Linear_Wave") == 0) { - Linear_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A, P.Bx, P.By, P.Bz, P.rEigenVec_rho, P.rEigenVec_MomentumX, - P.rEigenVec_MomentumY, P.rEigenVec_MomentumZ, P.rEigenVec_E, P.rEigenVec_Bx, P.rEigenVec_By, - P.rEigenVec_Bz, P.pitch, P.yaw); + Linear_Wave(P); } else if (strcmp(P.init, "Square_Wave") == 0) { - Square_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); + Square_Wave(P); } else if (strcmp(P.init, "Riemann") == 0) { - Riemann(P.rho_l, P.vx_l, P.vy_l, P.vz_l, P.P_l, P.Bx_l, P.By_l, P.Bz_l, P.rho_r, P.vx_r, P.vy_r, P.vz_r, P.P_r, - P.Bx_r, P.By_r, P.Bz_r, P.diaph); + Riemann(P); } else if (strcmp(P.init, "Shu_Osher") == 0) { Shu_Osher(); } else if (strcmp(P.init, "Blast_1D") == 0) { @@ -179,7 +177,7 @@ void Grid3D::Set_Domain_Properties(struct parameters P) /*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real * By, Real Bz) \brief Constant gas properties. */ -void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz) +void Grid3D::Constant(parameters const &P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -214,26 +212,26 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real // Set the magnetic field including the rightmost ghost cell on the // left side which is really the left face of the first grid cell #ifdef MHD - C.magnetic_x[id] = Bx; - C.magnetic_y[id] = By; - C.magnetic_z[id] = Bz; + C.magnetic_x[id] = P.Bx; + C.magnetic_y[id] = P.By; + C.magnetic_z[id] = P.Bz; #endif // MHD // Exclude the rightmost ghost cell on the "left" side if ((k >= kstart) and (j >= jstart) and (i >= istart)) { // set constant initial states - C.density[id] = rho; - C.momentum_x[id] = rho * vx; - C.momentum_y[id] = rho * vy; - C.momentum_z[id] = rho * vz; - C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); + C.density[id] = P.rho; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; + C.Energy[id] = P.P / (gama - 1.0) + 0.5 * P.rho * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz); #ifdef DE - C.GasEnergy[id] = P / (gama - 1.0); + C.GasEnergy[id] = P.P / (gama - 1.0); #endif // DE } if (i == istart && j == jstart && k == kstart) { - n = rho * DENSITY_UNIT / (mu * MP); - T = P * PRESSURE_UNIT / (n * KB); + n = P.rho * DENSITY_UNIT / (mu * MP); + T = P.P * PRESSURE_UNIT / (n * KB); printf("Initial n = %e, T = %e\n", n, T); } } @@ -243,7 +241,7 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Sine wave perturbation. */ -void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) +void Grid3D::Sound_Wave(parameters const &P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -277,22 +275,22 @@ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); // set constant initial states - C.density[id] = rho; - C.momentum_x[id] = rho * vx; - C.momentum_y[id] = rho * vy; - C.momentum_z[id] = rho * vz; - C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); + C.density[id] = P.rho; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; + C.Energy[id] = P.P / (gama - 1.0) + 0.5 * P.rho * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz); // add small-amplitude perturbations - C.density[id] = C.density[id] + A * sin(2.0 * M_PI * x_pos); - C.momentum_x[id] = C.momentum_x[id] + A * sin(2.0 * M_PI * x_pos); - C.momentum_y[id] = C.momentum_y[id] + A * sin(2.0 * M_PI * x_pos); - C.momentum_z[id] = C.momentum_z[id] + A * sin(2.0 * M_PI * x_pos); - C.Energy[id] = C.Energy[id] + A * (1.5) * sin(2 * M_PI * x_pos); + C.density[id] = C.density[id] + P.A * sin(2.0 * M_PI * x_pos); + C.momentum_x[id] = C.momentum_x[id] + P.A * sin(2.0 * M_PI * x_pos); + C.momentum_y[id] = C.momentum_y[id] + P.A * sin(2.0 * M_PI * x_pos); + C.momentum_z[id] = C.momentum_z[id] + P.A * sin(2.0 * M_PI * x_pos); + C.Energy[id] = C.Energy[id] + P.A * (1.5) * sin(2 * M_PI * x_pos); #ifdef DE - C.GasEnergy[id] = P / (gama - 1.0); + C.GasEnergy[id] = P.P / (gama - 1.0); #endif // DE #ifdef DE - C.GasEnergy[id] = P / (gama - 1.0); + C.GasEnergy[id] = P.P / (gama - 1.0); #endif // DE } } @@ -301,48 +299,132 @@ void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) /*! \fn void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Sine wave perturbation. */ -void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Real Bx, Real By, Real Bz, - Real rEigenVec_rho, Real rEigenVec_MomentumX, Real rEigenVec_MomentumY, - Real rEigenVec_MomentumZ, Real rEigenVec_E, Real rEigenVec_Bx, Real rEigenVec_By, - Real rEigenVec_Bz, Real pitch, Real yaw) +void Grid3D::Linear_Wave(parameters const &P) { - auto [stagger, junk1, junk2] = math_utils::rotateCoords(H.dx / 2, H.dy / 2, H.dz / 2, pitch, yaw); + // Compute any test parameters needed + // ================================== + // Angles + Real const sin_yaw = std::sin(P.yaw); + Real const cos_yaw = std::cos(P.yaw); + Real const sin_pitch = std::sin(P.pitch); + Real const cos_pitch = std::cos(P.pitch); + + Real const wavenumber = 2.0 * M_PI / P.wave_length; // the angular wave number k + +#ifdef MHD + // TODO: This method of setting the magnetic fields via the vector potential should work but instead leads to small + // TODO: errors in the magnetic field that tend to amplify over time until the solution diverges. I don't know why + // TODO: that is the case and can't figure out the reason. Without this we can't run linear waves at an angle to the + // TODO: grid. + // // Compute the vector potential + // // ============================ + // std::vector vectorPotential(3 * H.n_cells, 0); + + // // lambda function for computing the vector potential + // auto Compute_Vector_Potential = [&](Real const &x_loc, Real const &y_loc, Real const &z_loc) { + // // The "_rot" variables are the rotated version + // Real const x_rot = x_loc * cos_pitch * cos_yaw + y_loc * cos_pitch * sin_yaw + z_loc * sin_pitch; + // Real const y_rot = -x_loc * sin_yaw + y_loc * cos_yaw; + + // Real const a_y = P.Bz * x_rot - (P.A * P.rEigenVec_Bz / wavenumber) * std::cos(wavenumber * x_rot); + // Real const a_z = -P.By * x_rot + (P.A * P.rEigenVec_By / wavenumber) * std::cos(wavenumber * x_rot) + P.Bx * + // y_rot; + + // return std::make_pair(a_y, a_z); + // }; + + // for (size_t k = 0; k < H.nz; k++) { + // for (size_t j = 0; j < H.ny; j++) { + // for (size_t i = 0; i < H.nx; i++) { + // // Get cell index + // size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Real x, y, z; + // Get_Position(i, j, k, &x, &y, &z); + + // auto vectorPot = Compute_Vector_Potential(x, y + H.dy / 2., z + H.dz / 2.); + // vectorPotential.at(id + 0 * H.n_cells) = -vectorPot.first * sin_yaw - vectorPot.second * sin_pitch * cos_yaw; + + // vectorPot = Compute_Vector_Potential(x + H.dx / 2., y, z + H.dz / 2.); + // vectorPotential.at(id + 1 * H.n_cells) = vectorPot.first * cos_yaw - vectorPot.second * sin_pitch * sin_yaw; + + // vectorPot = Compute_Vector_Potential(x + H.dx / 2., y + H.dy / 2., z); + // vectorPotential.at(id + 2 * H.n_cells) = vectorPot.second * cos_pitch; + // } + // } + // } + + // // Compute the magnetic field from the vector potential + // // ==================================================== + // mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential); + + Real shift = H.dx; + size_t dir = 0; + if (sin_yaw == 1.0) { + shift = H.dy; + dir = 1; + } else if (sin_pitch == 1.0) { + shift = H.dz; + dir = 2; + } // set initial values of conserved variables for (int k = H.n_ghost; k < H.nz - H.n_ghost; k++) { for (int j = H.n_ghost; j < H.ny - H.n_ghost; j++) { for (int i = H.n_ghost; i < H.nx - H.n_ghost; i++) { - // Rotate the indices - auto [i_rot, j_rot, k_rot] = math_utils::rotateCoords(i, j, k, pitch, yaw); + // get cell index + size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // get cell-centered position + Real x_pos, y_pos, z_pos; + Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); + Real const x_pos_rot = cos_pitch * (x_pos * cos_yaw + y_pos * sin_yaw) + z_pos * sin_pitch; + + Real const sine_x = std::sin(x_pos_rot * wavenumber); + + Real bx = P.Bx + P.A * P.rEigenVec_Bx * sine_x; + Real by = P.By + P.A * P.rEigenVec_By * sine_x; + Real bz = P.Bz + P.A * P.rEigenVec_Bz * sine_x; + C.magnetic_x[id] = bx * cos_pitch * cos_yaw - by * sin_yaw - bz * sin_pitch * cos_yaw; + C.magnetic_y[id] = bx * cos_pitch * sin_yaw + by * cos_yaw - bz * sin_pitch * sin_yaw; + C.magnetic_z[id] = bx * sin_pitch + bz * cos_pitch; + } + } + } +#endif // MHD + + // Compute the hydro variables + // =========================== + for (size_t k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (size_t j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (size_t i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { // get cell index - int id = i + j * H.nx + k * H.nx * H.ny; + size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); // get cell-centered position Real x_pos, y_pos, z_pos; - Get_Position(i_rot, j_rot, k_rot, &x_pos, &y_pos, &z_pos); + Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); + Real const x_pos_rot = cos_pitch * (x_pos * cos_yaw + y_pos * sin_yaw) + z_pos * sin_pitch; - // set constant initial states. Note that mhd::utils::computeEnergy - // computes the hydro energy if MHD is turned off - Real sine_wave = std::sin(2.0 * M_PI * x_pos); + Real const sine_x = std::sin(x_pos_rot * wavenumber); - C.density[id] = rho; - C.momentum_x[id] = rho * vx; - C.momentum_y[id] = rho * vy; - C.momentum_z[id] = rho * vz; - C.Energy[id] = mhd::utils::computeEnergy(P, rho, vx, vy, vz, Bx, By, Bz, gama); - // add small-amplitude perturbations - C.density[id] += A * rEigenVec_rho * sine_wave; - C.momentum_x[id] += A * rEigenVec_MomentumX * sine_wave; - C.momentum_y[id] += A * rEigenVec_MomentumY * sine_wave; - C.momentum_z[id] += A * rEigenVec_MomentumZ * sine_wave; - C.Energy[id] += A * rEigenVec_E * sine_wave; + // Density + C.density[id] = P.rho + P.A * P.rEigenVec_rho * sine_x; + // Momenta + Real mx = P.rho * P.vx + P.A * P.rEigenVec_MomentumX * sine_x; + Real my = P.A * P.rEigenVec_MomentumY * sine_x; + Real mz = P.A * P.rEigenVec_MomentumZ * sine_x; + + C.momentum_x[id] = mx * cos_pitch * cos_yaw - my * sin_yaw - mz * sin_pitch * cos_yaw; + C.momentum_y[id] = mx * cos_pitch * sin_yaw + my * cos_yaw - mz * sin_pitch * sin_yaw; + C.momentum_z[id] = mx * sin_pitch + mz * cos_pitch; + + // Energy + C.Energy[id] = P.P / (P.gamma - 1.0) + 0.5 * P.rho * P.vx * P.vx + P.A * sine_x * P.rEigenVec_E; #ifdef MHD - sine_wave = std::sin(2.0 * M_PI * (x_pos + stagger)); - C.magnetic_x[id] = Bx + A * rEigenVec_Bx * sine_wave; - C.magnetic_y[id] = By + A * rEigenVec_By * sine_wave; - C.magnetic_z[id] = Bz + A * rEigenVec_Bz * sine_wave; + C.Energy[id] += 0.5 * (P.Bx * P.Bx + P.By * P.By + P.Bz * P.Bz); #endif // MHD } } @@ -352,7 +434,7 @@ void Grid3D::Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A, Re /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Square wave density perturbation with amplitude A*rho in pressure * equilibrium. */ -void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) +void Grid3D::Square_Wave(parameters const &P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -385,15 +467,15 @@ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) // get cell-centered position Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); - C.density[id] = rho; + C.density[id] = P.rho; // C.momentum_x[id] = 0.0; - C.momentum_x[id] = rho * vx; - C.momentum_y[id] = rho * vy; - C.momentum_z[id] = rho * vz; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; // C.momentum_z[id] = rho_l * v_l; - C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * (vx * vx + vy * vy + vz * vz); + C.Energy[id] = P.P / (gama - 1.0) + 0.5 * P.rho * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz); #ifdef DE - C.GasEnergy[id] = P / (gama - 1.0); + C.GasEnergy[id] = P.P / (gama - 1.0); #endif #ifdef SCALAR #ifdef BASIC_SCALAR @@ -401,13 +483,13 @@ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) #endif #endif if (x_pos > 0.25 * H.xdglobal && x_pos < 0.75 * H.xdglobal) { - C.density[id] = rho * A; - C.momentum_x[id] = rho * A * vx; - C.momentum_y[id] = rho * A * vy; - C.momentum_z[id] = rho * A * vz; - C.Energy[id] = P / (gama - 1.0) + 0.5 * rho * A * (vx * vx + vy * vy + vz * vz); + C.density[id] = P.rho * P.A; + C.momentum_x[id] = P.rho * P.A * P.vx; + C.momentum_y[id] = P.rho * P.A * P.vy; + C.momentum_z[id] = P.rho * P.A * P.vz; + C.Energy[id] = P.P / (gama - 1.0) + 0.5 * P.rho * P.A * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz); #ifdef DE - C.GasEnergy[id] = P / (gama - 1.0); + C.GasEnergy[id] = P.P / (gama - 1.0); #endif #ifdef SCALAR #ifdef BASIC_SCALAR @@ -424,25 +506,20 @@ void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) * \brief Initialize the grid with a Riemann problem. */ -void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, Real rho_r, - Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) +void Grid3D::Riemann(parameters const &P) { - int i, j, k, id; - int istart, jstart, kstart, iend, jend, kend; - Real x_pos, y_pos, z_pos; - Real v, P, cs; - - istart = H.n_ghost; - iend = H.nx - H.n_ghost; + size_t const istart = H.n_ghost - 1; + size_t const iend = H.nx - H.n_ghost; + size_t jstart, kstart, jend, kend; if (H.ny > 1) { - jstart = H.n_ghost; + jstart = H.n_ghost - 1; jend = H.ny - H.n_ghost; } else { jstart = 0; jend = H.ny; } if (H.nz > 1) { - kstart = H.n_ghost; + kstart = H.n_ghost - 1; kend = H.nz - H.n_ghost; } else { kstart = 0; @@ -450,58 +527,62 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real } // set initial values of conserved variables - for (k = kstart - 1; k < kend; k++) { - for (j = jstart - 1; j < jend; j++) { - for (i = istart - 1; i < iend; i++) { + for (size_t k = kstart; k < kend; k++) { + for (size_t j = jstart; j < jend; j++) { + for (size_t i = istart; i < iend; i++) { // get cell index - id = i + j * H.nx + k * H.nx * H.ny; + size_t const id = i + j * H.nx + k * H.nx * H.ny; // get cell-centered position + Real x_pos, y_pos, z_pos; Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); #ifdef MHD // Set the magnetic field including the rightmost ghost cell on the // left side which is really the left face of the first grid cell - if (x_pos < diaph) { - C.magnetic_x[id] = Bx_l; - C.magnetic_y[id] = By_l; - C.magnetic_z[id] = Bz_l; + // WARNING: Only correct in 3-D + if (x_pos < P.diaph) { + C.magnetic_x[id] = P.Bx_l; + C.magnetic_y[id] = P.By_l; + C.magnetic_z[id] = P.Bz_l; } else { - C.magnetic_x[id] = Bx_r; - C.magnetic_y[id] = By_r; - C.magnetic_z[id] = Bz_r; + C.magnetic_x[id] = P.Bx_r; + C.magnetic_y[id] = P.By_r; + C.magnetic_z[id] = P.Bz_r; } #endif // MHD // Exclude the rightmost ghost cell on the "left" side if ((k >= kstart) and (j >= jstart) and (i >= istart)) { - if (x_pos < diaph) { - C.density[id] = rho_l; - C.momentum_x[id] = rho_l * vx_l; - C.momentum_y[id] = rho_l * vy_l; - C.momentum_z[id] = rho_l * vz_l; - C.Energy[id] = mhd::utils::computeEnergy(P_l, rho_l, vx_l, vy_l, vz_l, Bx_l, By_l, Bz_l, gama); + if (x_pos < P.diaph) { + C.density[id] = P.rho_l; + C.momentum_x[id] = P.rho_l * P.vx_l; + C.momentum_y[id] = P.rho_l * P.vy_l; + C.momentum_z[id] = P.rho_l * P.vz_l; + C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P.P_l, P.rho_l, P.vx_l, P.vy_l, P.vz_l, gama, P.Bx_l, + P.By_l, P.Bz_l); #ifdef SCALAR #ifdef BASIC_SCALAR - C.basic_scalar[id] = 1.0 * rho_l; + C.basic_scalar[id] = 1.0 * P.rho_l; #endif #endif // SCALAR #ifdef DE - C.GasEnergy[id] = P_l / (gama - 1.0); + C.GasEnergy[id] = P.P_l / (gama - 1.0); #endif // DE } else { - C.density[id] = rho_r; - C.momentum_x[id] = rho_r * vx_r; - C.momentum_y[id] = rho_r * vy_r; - C.momentum_z[id] = rho_r * vz_r; - C.Energy[id] = mhd::utils::computeEnergy(P_r, rho_r, vx_r, vy_r, vz_r, Bx_r, By_r, Bz_r, gama); + C.density[id] = P.rho_r; + C.momentum_x[id] = P.rho_r * P.vx_r; + C.momentum_y[id] = P.rho_r * P.vy_r; + C.momentum_z[id] = P.rho_r * P.vz_r; + C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P.P_r, P.rho_r, P.vx_r, P.vy_r, P.vz_r, gama, P.Bx_r, + P.By_r, P.Bz_r); #ifdef SCALAR #ifdef BASIC_SCALAR - C.basic_scalar[id] = 0.0 * rho_r; + C.basic_scalar[id] = 0.0 * P.rho_r; #endif #endif // SCALAR #ifdef DE - C.GasEnergy[id] = P_r / (gama - 1.0); + C.GasEnergy[id] = P.P_r / (gama - 1.0); #endif // DE } } @@ -1309,8 +1390,8 @@ void Grid3D::Clouds() C.GasEnergy[id] = p_bg / (gama - 1.0); #endif #ifdef SCALAR - #ifdef BASIC_SCALAR - C.basic_scalar[id] = C.density[id] * 0.0; + #ifdef DUST + C.host[id + H.n_cells * grid_enum::dust_density] = 0.0; #endif #endif // add clouds @@ -1693,9 +1774,9 @@ void Grid3D::Circularly_Polarized_Alfven_Wave(struct parameters const P) // Compute the Energy auto const magnetic_centered = mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny); - Real const energy = mhd::utils::computeEnergy( - pressure, density, momentum_x_rot / density, momentum_y_rot / density, momentum_z_rot / density, - magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); + Real const energy = hydro_utilities::Calc_Energy_Conserved(pressure, density, momentum_x_rot, momentum_y_rot, + momentum_z_rot, ::gama, magnetic_centered.x, + magnetic_centered.y, magnetic_centered.z); // Final assignment C.density[id] = density; @@ -1766,9 +1847,9 @@ void Grid3D::Advecting_Field_Loop(struct parameters const P) C.momentum_x[id] = P.rho * P.vx; C.momentum_y[id] = P.rho * P.vy; C.momentum_z[id] = P.rho * P.vz; - C.Energy[id] = mhd::utils::computeEnergy(P.P, P.rho, C.momentum_x[id] / P.rho, C.momentum_y[id] / P.rho, - C.momentum_z[id] / P.rho, magnetic_centered.x, magnetic_centered.y, - magnetic_centered.z, ::gama); + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved(P.P, P.rho, C.momentum_x[id], C.momentum_y[id], + C.momentum_z[id], ::gama, magnetic_centered.x, + magnetic_centered.y, magnetic_centered.z); } } } @@ -1820,16 +1901,16 @@ void Grid3D::MHD_Spherical_Blast(struct parameters const P) mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny); // Set the field(s) that do depend on pressure. That's just energy - Real radius = std::hypot(x, y, z); + Real const radius = std::hypot(x, y, z); + Real pressure; if (radius < P.radius) { - C.Energy[id] = mhd::utils::computeEnergy( - P.P_blast, C.density[id], C.momentum_x[id] / C.density[id], C.momentum_y[id] / C.density[id], - C.momentum_z[id] / C.density[id], magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); + pressure = P.P_blast; } else { - C.Energy[id] = mhd::utils::computeEnergy( - P.P, C.density[id], C.momentum_x[id] / C.density[id], C.momentum_y[id] / C.density[id], - C.momentum_z[id] / C.density[id], magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); + pressure = P.P; } + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved( + pressure, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], ::gama, magnetic_centered.x, + magnetic_centered.y, magnetic_centered.z); } } } @@ -1857,8 +1938,8 @@ void Grid3D::Orszag_Tang_Vortex() Get_Position(i, j, k, &x, &y, &z); // Z vector potential - vectorPotential.at(id + 2 * H.n_cells) = (magnetic_background / 4.0 * M_PI) * std::cos(4.0 * M_PI * x) - - (magnetic_background / 2.0 * M_PI) * std::cos(2.0 * M_PI * y); + vectorPotential.at(id + 2 * H.n_cells) = + magnetic_background / (4.0 * M_PI) * (std::cos(4.0 * M_PI * x) + 2.0 * std::cos(2.0 * M_PI * y)); } } } @@ -1886,11 +1967,11 @@ void Grid3D::Orszag_Tang_Vortex() C.momentum_x[id] = density_background * velocity_background * std::sin(2.0 * M_PI * y); C.momentum_y[id] = -density_background * velocity_background * std::sin(2.0 * M_PI * x); C.momentum_z[id] = 0.0; - C.Energy[id] = mhd::utils::computeEnergy(pressure_background, C.density[id], C.momentum_x[id] / C.density[id], - C.momentum_y[id] / C.density[id], C.momentum_z[id] / C.density[id], - magnetic_centered.x, magnetic_centered.y, magnetic_centered.z, ::gama); + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved( + pressure_background, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], ::gama, + magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); } } } } -#endif // MHD \ No newline at end of file +#endif // MHD diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index f53836fb7..1f4b91fd3 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -421,7 +421,7 @@ __device__ __host__ Real mhdInverseCrossingTime(Real const &E, Real const &d, Re Real const &gamma) { // Compute the gas pressure and fast magnetosonic speed - Real gasP = mhd::utils::computeGasPressure(E, d, vx * d, vy * d, vz * d, avgBx, avgBy, avgBz, gamma); + Real gasP = hydro_utilities::Calc_Pressure_Primitive(E, d, vx, vy, vz, gamma, avgBx, avgBy, avgBz); Real cf = mhd::utils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); // Find maximum inverse crossing time in the cell (i.e. minimum crossing time) @@ -787,8 +787,8 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D(Real *dev_conserved, E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); #ifdef MHD // Add the magnetic energy - auto [centeredBx, centeredBy, centeredBz] = mhd::utils::cellCenteredMagneticFields( - dev_conserved, id, xid, yid, zid, n_cells, nx, ny) E_kin += mhd::utils::computeMagneticEnergy(magX, magY, magZ); + auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + E_kin += mhd::utils::computeMagneticEnergy(magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); #endif // MHD P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); P = fmax(P, (Real)TINY_NUMBER); diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index 99463f927..57c1a9c65 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -93,8 +93,8 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, + n_fields); #endif #ifdef PLMP hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, @@ -105,8 +105,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea gama, 0, n_fields); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); + hipLaunchKernelGGL(PPMC_VL, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); #endif CudaCheckError(); diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index 79d410033..27677f61d 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -103,10 +103,10 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of dt, gama, 1, n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, - dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, - dt, gama, 1, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, + 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, + 1, n_fields); #endif #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, @@ -115,10 +115,8 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of dt, gama, 1, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, - dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, - dt, gama, 1, n_fields); + hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); + hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1); #endif // PPMC CudaCheckError(); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 3fed3a9bf..6ccc814c2 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -218,12 +218,12 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int dt, gama, 2, n_fields); #endif // PLMP #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, - dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, - dt, gama, 1, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, - dt, gama, 2, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, + 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, + 1, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, + 2, n_fields); #endif // PLMC #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, @@ -234,12 +234,9 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int dt, gama, 2, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, - dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, - dt, gama, 1, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, - dt, gama, 2, n_fields); + hipLaunchKernelGGL(PPMC_VL, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); + hipLaunchKernelGGL(PPMC_VL, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1); + hipLaunchKernelGGL(PPMC_VL, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, gama, 2); #endif // PPMC CudaCheckError(); @@ -401,8 +398,8 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de #ifdef MHD // Add the magnetic energy auto const [centeredBx, centeredBy, centeredBz] = - mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny) E_kin += - mhd::utils::computeMagneticEnergy(centeredBx, centeredBy, centeredBz); + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + E_kin += mhd::utils::computeMagneticEnergy(centeredBx, centeredBy, centeredBz); #endif // MHD P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); P = fmax(P, (Real)TINY_NUMBER); diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index 8e622b85c..3be5ba40a 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -66,8 +66,8 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, CudaCheckError(); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, - 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, + n_fields); CudaCheckError(); #endif #ifdef PPMP @@ -76,8 +76,7 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, CudaCheckError(); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, - 0, n_fields); + hipLaunchKernelGGL(PPMC_CTU, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); CudaCheckError(); #endif diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index bf75e97cc..b9d11b180 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -67,10 +67,10 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int gama, 1, n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, - gama, 1, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, + n_fields); #endif #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, @@ -79,10 +79,8 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int gama, 1, n_fields); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, - gama, 1, n_fields); + hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); + hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); #endif CudaCheckError(); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 4a24bb362..3c8dc13e9 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -100,12 +100,12 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, gama, 2, n_fields); #endif // PLMP #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, - gama, 1, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, - gama, 2, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2, + n_fields); #endif #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, @@ -116,12 +116,9 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, gama, 2, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, - gama, 1, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, - gama, 2, n_fields); + hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); + hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); + hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2); CudaCheckError(); #endif // PPMC diff --git a/src/io/io.cpp b/src/io/io.cpp index 34c0d3438..420982309 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -190,7 +190,7 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) #elif defined HDF5 filename += ".h5"; #else - strcat(filename, ".txt"); + filename += ".txt"; if (G.H.nx * G.H.ny * G.H.nz > 1000) printf("Ascii outputs only recommended for small problems!\n"); #endif #ifdef MPI_CHOLLA @@ -240,7 +240,7 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) #else // open the file for txt writes FILE *out; - out = fopen(filename, "w"); + out = fopen(filename.data(), "w"); if (out == NULL) { printf("Error opening output file.\n"); exit(-1); @@ -259,6 +259,7 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) void OutputFloat32(Grid3D &G, struct parameters P, int nfile) { +#ifdef HDF5 Header H = G.H; // Do nothing in 1-D and 2-D case if (H.ny_real == 1) { @@ -276,9 +277,9 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) std::string filename(P.outdir); filename += std::to_string(nfile); filename += ".float32.h5"; -#ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA filename += "." + std::to_string(procID); -#endif + #endif // MPI_CHOLLA // create hdf5 file hid_t file_id; /* file identifier */ @@ -301,65 +302,64 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) // Need a larger device buffer for MHD. In the future, if other fields need // a larger device buffer, choose the maximum of the sizes. If the buffer is // too large, it does not cause bugs (Oct 6 2022) -#ifdef MHD + #ifdef MHD buffer_size = (nx_dset + 1) * (ny_dset + 1) * (nz_dset + 1); -#else + #else buffer_size = nx_dset * ny_dset * nz_dset; -#endif + #endif // MHD // Using static DeviceVector here automatically allocates the buffer the // first time it is needed It persists until program exit, and then calls // Free upon destruction cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; - float *device_dataset_buffer = device_dataset_vector.data(); - float *dataset_buffer = (float *)malloc(buffer_size * sizeof(float)); + auto *dataset_buffer = (float *)malloc(buffer_size * sizeof(float)); if (P.out_float32_density > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - G.C.d_density, "/density"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_density, "/density"); } if (P.out_float32_momentum_x > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - G.C.d_momentum_x, "/momentum_x"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_momentum_x, "/momentum_x"); } if (P.out_float32_momentum_y > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - G.C.d_momentum_y, "/momentum_y"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_momentum_y, "/momentum_y"); } if (P.out_float32_momentum_z > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - G.C.d_momentum_z, "/momentum_z"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_momentum_z, "/momentum_z"); } if (P.out_float32_Energy > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - G.C.d_Energy, "/Energy"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_Energy, "/Energy"); } -#ifdef DE + #ifdef DE if (P.out_float32_GasEnergy > 0) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, - G.C.d_GasEnergy, "/GasEnergy"); + WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_GasEnergy, "/GasEnergy"); } -#endif // DE -#ifdef MHD + #endif // DE + #ifdef MHD // TODO (by Alwin, for anyone) : Repair output format if needed and remove these chprintfs when appropriate if (P.out_float32_magnetic_x > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, - device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x"); + device_dataset_vector.data(), G.C.d_magnetic_x, "/magnetic_x"); } if (P.out_float32_magnetic_y > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, - device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y"); + device_dataset_vector.data(), G.C.d_magnetic_y, "/magnetic_y"); } if (P.out_float32_magnetic_z > 0) { chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); WriteHDF5Field3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, - device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); + device_dataset_vector.data(), G.C.d_magnetic_z, "/magnetic_z"); } -#endif + #endif // MHD free(dataset_buffer); @@ -371,6 +371,7 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) // close the file status = H5Fclose(file_id); +#endif // HDF5 } /* Output a projection of the grid data to file. */ @@ -1370,13 +1371,13 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_ENERGY output_energy = true; #else // not OUTPUT_ENERGY - output_energy = false; + output_energy = false; #endif // OUTPUT_ENERGY #ifdef OUTPUT_MOMENTUM output_momentum = true; #else // not OUTPUT_MOMENTUM - output_momentum = false; + output_momentum = false; #endif // OUTPUT_MOMENTUM #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) @@ -1409,29 +1410,33 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) size_t buffer_size = nx_dset * ny_dset * nz_dset; #endif cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; - Real *device_dataset_buffer = device_dataset_vector.data(); - dataset_buffer = (Real *)malloc(buffer_size * sizeof(Real)); + dataset_buffer = (Real *)malloc(buffer_size * sizeof(Real)); // Start writing fields - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_density, "/density"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_density, "/density"); if (output_momentum || H.Output_Complete_Data) { - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_x, "/momentum_x"); - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_y, "/momentum_y"); - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_z, "/momentum_z"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_momentum_x, "/momentum_x"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_momentum_y, "/momentum_y"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_momentum_z, "/momentum_z"); } if (output_energy || H.Output_Complete_Data) { - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_Energy, "/Energy"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_Energy, "/Energy"); #ifdef DE - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_GasEnergy, "/GasEnergy"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_GasEnergy, "/GasEnergy"); #endif } #ifdef SCALAR #ifdef BASIC_SCALAR - Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_buffer, C.d_basic_scalar, "/scalar0"); - #endif + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_basic_scalar, "/scalar0"); + #endif // BASIC_SCALAR + + #ifdef DUST + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_dust_density, + "/dust_density"); + #endif // DUST #ifdef OUTPUT_CHEMISTRY #ifdef CHEMISTRY_GPU @@ -1480,18 +1485,18 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #if defined(GRAVITY) && defined(OUTPUT_POTENTIAL) Write_Generic_HDF5_Field_GPU(Grav.nx_local + 2 * N_GHOST_POTENTIAL, Grav.ny_local + 2 * N_GHOST_POTENTIAL, Grav.nz_local + 2 * N_GHOST_POTENTIAL, Grav.nx_local, Grav.ny_local, Grav.nz_local, - N_GHOST_POTENTIAL, file_id, dataset_buffer, device_dataset_buffer, Grav.F.potential_d, - "/grav_potential"); + N_GHOST_POTENTIAL, file_id, dataset_buffer, device_dataset_vector.data(), + Grav.F.potential_d, "/grav_potential"); #endif // GRAVITY and OUTPUT_POTENTIAL #ifdef MHD if (H.Output_Complete_Data) { WriteHDF5Field3D(H.nx, H.ny, H.nx_real + 1, H.ny_real, H.nz_real, H.n_ghost, file_id, dataset_buffer, - device_dataset_buffer, C.d_magnetic_x, "/magnetic_x", 0); + device_dataset_vector.data(), C.d_magnetic_x, "/magnetic_x", 0); WriteHDF5Field3D(H.nx, H.ny, H.nx_real, H.ny_real + 1, H.nz_real, H.n_ghost, file_id, dataset_buffer, - device_dataset_buffer, C.d_magnetic_y, "/magnetic_y", 1); + device_dataset_vector.data(), C.d_magnetic_y, "/magnetic_y", 1); WriteHDF5Field3D(H.nx, H.ny, H.nx_real, H.ny_real, H.nz_real + 1, H.n_ghost, file_id, dataset_buffer, - device_dataset_buffer, C.d_magnetic_z, "/magnetic_z", 2); + device_dataset_vector.data(), C.d_magnetic_z, "/magnetic_z", 2); } #endif // MHD } @@ -1512,6 +1517,10 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) Real *dataset_buffer_Txy, *dataset_buffer_Txz; herr_t status; Real dxy, dxz, Txy, Txz, n, T; + #ifdef DUST + Real dust_xy, dust_xz; + Real *dataset_buffer_dust_xy, *dataset_buffer_dust_xz; + #endif n = T = 0; Real mu = 0.6; @@ -1526,6 +1535,10 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) dataset_buffer_dxz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); dataset_buffer_Txy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); dataset_buffer_Txz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #ifdef DUST + dataset_buffer_dust_xy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_dust_xz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #endif // Create the data space for the datasets dims[0] = nx_dset; @@ -1539,11 +1552,17 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) for (i = 0; i < H.nx_real; i++) { dxy = 0; Txy = 0; + #ifdef DUST + dust_xy = 0; + #endif // for each xy element, sum over the z column for (k = 0; k < H.nz_real; k++) { id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; // sum density dxy += C.density[id] * H.dz; + #ifdef DUST + dust_xy += C.dust_density[id] * H.dz; + #endif // calculate number density n = C.density[id] * DENSITY_UNIT / (mu * MP); // calculate temperature @@ -1562,6 +1581,9 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) buf_id = j + i * H.ny_real; dataset_buffer_dxy[buf_id] = dxy; dataset_buffer_Txy[buf_id] = Txy; + #ifdef DUST + dataset_buffer_dust_xy[buf_id] = dust_xy; + #endif } } @@ -1570,11 +1592,17 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) for (i = 0; i < H.nx_real; i++) { dxz = 0; Txz = 0; + #ifdef DUST + dust_xz = 0; + #endif // for each xz element, sum over the y column for (j = 0; j < H.ny_real; j++) { id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; // sum density dxz += C.density[id] * H.dy; + #ifdef DUST + dust_xz += C.dust_density[id] * H.dy; + #endif // calculate number density n = C.density[id] * DENSITY_UNIT / (mu * MP); // calculate temperature @@ -1593,6 +1621,9 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) buf_id = k + i * H.nz_real; dataset_buffer_dxz[buf_id] = dxz; dataset_buffer_Txz[buf_id] = Txz; + #ifdef DUST + dataset_buffer_dust_xz[buf_id] = dust_xz; + #endif } } @@ -1600,7 +1631,11 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dxy, "/d_xy"); status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dxz, "/d_xz"); status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txy, "/T_xy"); - status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txz, "/T_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_Txz, "/T_xz"); + #ifdef DUST + status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dust_xy, "/d_dust_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dust_xz, "/d_dust_xz"); + #endif // Free the dataspace ids status = H5Sclose(dataspace_xz_id); @@ -1613,6 +1648,10 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) free(dataset_buffer_dxz); free(dataset_buffer_Txy); free(dataset_buffer_Txz); + #ifdef DUST + free(dataset_buffer_dust_xy); + free(dataset_buffer_dust_xz); + #endif // DUST } #endif // HDF5 @@ -2383,7 +2422,11 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P) #ifdef BASIC_SCALAR Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.scalar, "/scalar0"); - #endif + #endif // BASIC_SCALAR + + #ifdef DUST + Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.dust_density, "/dust_density"); + #endif // DUST #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HI_density, "/HI_density"); diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp index 1063bfbf8..70a97e974 100644 --- a/src/io/io_parallel.cpp +++ b/src/io/io_parallel.cpp @@ -71,12 +71,15 @@ void Grid3D::Read_Grid_Cat(struct parameters P) exit(0); } - // TODO (written by Alwin, for anyone to do) : Need to consider how or whether to read attributes. - // even without read gamma from file, it is set in initial_conditions.cpp - // if I do not set t or n_step it is set to 0 in grid/grid3D.cpp - // This should be okay to start with. - // Choosing not to read attributes is because - // Parallel-reading attributes can be slow without collective calls. + // TODO (written by Alwin, for anyone to do) : + // Consider using collective calls if this part is slow at scale + hid_t attribute_id; + attribute_id = H5Aopen(file_id, "t", H5P_DEFAULT); + status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.t); + status = H5Aclose(attribute_id); + attribute_id = H5Aopen(file_id, "n_step", H5P_DEFAULT); + status = H5Aread(attribute_id, H5T_NATIVE_INT, &H.n_step); + status = H5Aclose(attribute_id); // Offsets are global variables from mpi_routines.h hsize_t offset[3]; @@ -109,6 +112,9 @@ void Grid3D::Read_Grid_Cat(struct parameters P) #ifdef BASIC_SCALAR Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.basic_scalar, "/scalar0"); #endif + #ifdef DUST + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.dust_density, "/dust_density"); + #endif #endif // TODO (Alwin) : add scalar stuff diff --git a/src/main.cpp b/src/main.cpp index ae299c024..b33ee8eba 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -80,7 +80,16 @@ int main(int argc, char *argv[]) "Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, " "boundaries = %d %d %d %d %d %d\n", P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); + + bool is_restart = false; if (strcmp(P.init, "Read_Grid") == 0) { + is_restart = true; + } + if (strcmp(P.init, "Read_Grid_Cat") == 0) { + is_restart = true; + } + + if (is_restart) { chprintf("Input directory: %s\n", P.indir); } chprintf("Output directory: %s\n", P.outdir); @@ -107,8 +116,8 @@ int main(int argc, char *argv[]) chprintf("Setting initial conditions...\n"); G.Set_Initial_Conditions(P); chprintf("Initial conditions set.\n"); - // set main variables for Read_Grid initial conditions - if (strcmp(P.init, "Read_Grid") == 0) { + // set main variables for Read_Grid and Read_Grid_Cat initial conditions + if (is_restart) { outtime += G.H.t; nfile = P.nfile; } @@ -192,7 +201,7 @@ int main(int argc, char *argv[]) chprintf("Nstep = %d Simulation time = %f\n", G.H.n_step, G.H.t); #ifdef OUTPUT - if (strcmp(P.init, "Read_Grid") != 0 || G.H.Output_Now) { + if (!is_restart || G.H.Output_Now) { // write the initial conditions to file chprintf("Writing initial conditions to file...\n"); WriteData(G, P, nfile); diff --git a/src/main_tests.cpp b/src/main_tests.cpp index 3be97f3eb..ee58fbd06 100644 --- a/src/main_tests.cpp +++ b/src/main_tests.cpp @@ -128,17 +128,8 @@ int main(int argc, char **argv) globalMpiLauncher.init("mpirun -np"); } - if (input.cmdOptionExists("--runCholla=false")) { - globalRunCholla = false; - } else { - globalRunCholla = true; - } - - if (input.cmdOptionExists("--compareSystemTestResults=false")) { - globalCompareSystemTestResults = false; - } else { - globalCompareSystemTestResults = true; - } + globalRunCholla = not input.cmdOptionExists("--runCholla=false"); + globalCompareSystemTestResults = not input.cmdOptionExists("--compareSystemTestResults=false"); // Run test and return result return RUN_ALL_TESTS(); diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu index a57f8afe2..afbaada66 100644 --- a/src/mhd/ct_electric_fields_tests.cu +++ b/src/mhd/ct_electric_fields_tests.cu @@ -39,17 +39,14 @@ class tMHDCalculateCTElectricFields : public ::testing::Test * */ tMHDCalculateCTElectricFields() - : nx(2), - ny(nx), - nz(nx), - n_cells(nx * ny * nz), + : n_cells(nx * ny * nz), fluxX(n_cells * (grid_enum::num_flux_fields)), fluxY(n_cells * (grid_enum::num_flux_fields)), fluxZ(n_cells * (grid_enum::num_flux_fields)), grid(n_cells * (grid_enum::num_fields)), testCTElectricFields(n_cells * 3, -999.), fiducialData(n_cells * 3, -999.), - dimGrid((n_cells + TPB - 1), 1, 1), + dimGrid((n_cells + TPB - 1) / TPB, 1, 1), dimBlock(TPB, 1, 1) { // Allocate device arrays @@ -71,7 +68,7 @@ class tMHDCalculateCTElectricFields : public ::testing::Test protected: // Initialize the test grid and other state variables - size_t const nx, ny, nz; + size_t const nx = 2, ny = nx, nz = nx; size_t const n_cells; // Launch Parameters diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu index 9393c2498..c1c44a9a7 100644 --- a/src/mhd/magnetic_divergence_tests.cu +++ b/src/mhd/magnetic_divergence_tests.cu @@ -46,8 +46,8 @@ TEST(tMHDGrid3DcheckMagneticDivergence, CorrectInputExpectCorrectOutput) std::vector host_grid(G.H.n_cells * G.H.n_fields); std::mt19937 prng(1); std::uniform_real_distribution doubleRand(1, 5); - for (size_t i = 0; i < host_grid.size(); i++) { - host_grid.at(i) = doubleRand(prng) / 1E15; + for (double& host_data : host_grid) { + host_data = doubleRand(prng) / 1E15; } // Allocating and copying to device diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu index 79dc81db7..9b78a8f5d 100644 --- a/src/mhd/magnetic_update_tests.cu +++ b/src/mhd/magnetic_update_tests.cu @@ -34,19 +34,12 @@ class tMHDUpdateMagneticField3D : public ::testing::Test * */ tMHDUpdateMagneticField3D() - : nx(3), - ny(nx), - nz(nx), - n_cells(nx * ny * nz), - dt(3.2), - dx(2.5), - dy(2.5), - dz(2.5), + : n_cells(nx * ny * nz), sourceGrid(n_cells * (grid_enum::num_fields)), destinationGrid(n_cells * (grid_enum::num_fields), -999.), ctElectricFields(n_cells * 3), fiducialData(n_cells * (grid_enum::num_fields), -999.), - dimGrid((n_cells + TPB - 1), 1, 1), + dimGrid((n_cells + TPB - 1) / TPB, 1, 1), dimBlock(TPB, 1, 1) { // Allocate device arrays @@ -64,9 +57,9 @@ class tMHDUpdateMagneticField3D : public ::testing::Test protected: // Initialize the test grid and other state variables - size_t const nx, ny, nz; + size_t const nx = 3, ny = nx, nz = nx; size_t const n_cells; - Real const dt, dx, dy, dz; + Real const dt = 3.2, dx = 2.5, dy = dx, dz = dx; // Launch Parameters dim3 const dimGrid; // How many blocks in the grid diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 0250080ea..513d29056 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -827,6 +827,24 @@ int greatest_prime_factor(int n) return np; } +/* + tile MPI processes in a block arrangement for the 3D case + */ +void TileBlockDecomposition3D(int number, int &np_x, int &np_y, int &np_z) +{ + int dims[3] = {1, 1, 1}; + size_t index = 0; + while (number > 1) { + int gpf = greatest_prime_factor(number); + number /= gpf; + dims[index % 3] *= gpf; + index += 1; + } + np_x = dims[0]; + np_y = dims[1]; + np_z = dims[2]; +} + /*tile MPI processes in a block arrangement*/ void TileBlockDecomposition(void) { @@ -867,43 +885,7 @@ void TileBlockDecomposition(void) return; } - /*base decomposition on whether n_gpf==2*/ - if (n_gpf != 2) { - /*we are in 3-d, so split remainder evenly*/ - np_x = n_gpf; - n_gpf = greatest_prime_factor(nproc / n_gpf); - if (n_gpf != 2) { - /*the next greatest prime is odd, so just split*/ - np_y = n_gpf; - np_z = nproc / (np_x * np_y); - } else { - /*increase ny, nz round-robin*/ - while (np_x * np_y * np_z < nproc) { - np_y *= 2; - if (np_x * np_y * np_z == nproc) { - break; - } - np_z *= 2; - } - } - } else { - /*nproc is a power of 2*/ - /*we are in 3-d, so split remainder evenly*/ - - /*increase nx, ny, nz round-robin*/ - while (np_x * np_y * np_z < nproc) { - np_x *= 2; - if (np_x * np_y * np_z == nproc) { - break; - } - np_y *= 2; - if (np_x * np_y * np_z == nproc) { - break; - } - np_z *= 2; - } - } - + TileBlockDecomposition3D(nproc, np_x, np_y, np_z); // reorder x, y, z int n_tmp; diff --git a/src/particles/density_CIC_gpu.cu b/src/particles/density_CIC_gpu.cu index 86ddd7e36..c33544046 100644 --- a/src/particles/density_CIC_gpu.cu +++ b/src/particles/density_CIC_gpu.cu @@ -155,7 +155,7 @@ void Particles_3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particl Real *pos_y_dev, Real *pos_z_dev, Real *mass_dev) { // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu index 286ae92e3..75bf1f5e8 100644 --- a/src/particles/feedback_CIC_gpu.cu +++ b/src/particles/feedback_CIC_gpu.cu @@ -38,8 +38,8 @@ int snr_n; #ifndef O_HIP __device__ double atomicMax(double* address, double val) { - unsigned long long int* address_as_ull = (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; + auto* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; do { assumed = old; old = atomicCAS(address_as_ull, assumed, __double_as_longlong(fmax(val, __longlong_as_double(assumed)))); @@ -133,7 +133,7 @@ void supernova::initState(struct parameters* P, part_int_t n_local, Real allocat n_states = n_local * allocation_factor; cudaMalloc((void**)&randStates, n_states * sizeof(feedback_prng_t)); - int ngrid = (n_states + TPB_FEEDBACK - 1) / TPB_FEEDBACK; + int ngrid = (n_states - 1) / TPB_FEEDBACK + 1; dim3 grid(ngrid); dim3 block(TPB_FEEDBACK); diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu index 19aee8941..e66eb928e 100644 --- a/src/particles/gravity_CIC_gpu.cu +++ b/src/particles/gravity_CIC_gpu.cu @@ -273,7 +273,7 @@ void Particles_3D::Get_Gravity_CIC_GPU_function(part_int_t n_local, int nx_local Real *gravity_z_dev) { // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index f2b56f62c..e8ac74dbe 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -516,7 +516,7 @@ void Particles_3D::Initialize_Sphere(struct parameters *P) Real *temp_mass = (Real *)malloc(particles_array_size * sizeof(Real)); #endif #ifdef PARTICLE_IDS - part_int_t *temp_id = (part_int_t *)malloc(particles_array_size * sizeof(part_int_t)); + auto *temp_id = (part_int_t *)malloc(particles_array_size * sizeof(part_int_t)); #endif chprintf(" Allocated GPU memory for particle data\n"); diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu index fc5210f77..0a4915d5c 100644 --- a/src/particles/particles_3D_gpu.cu +++ b/src/particles/particles_3D_gpu.cu @@ -74,7 +74,7 @@ void __global__ Copy_Device_to_Device_Kernel(T *src_array_dev, T *dst_array_dev, template void Copy_Device_to_Device(T *src_array_dev, T *dst_array_dev, part_int_t size) { - int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (size - 1) / TPB_PARTICLES + 1; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB_PARTICLES, 1, 1); hipLaunchKernelGGL(Copy_Device_to_Device_Kernel, dim1dGrid, dim1dBlock, 0, 0, src_array_dev, dst_array_dev, size); @@ -186,7 +186,7 @@ __global__ void Set_Particles_Array_Real_Kernel(Real value, Real *array_dev, par void Particles_3D::Set_Particles_Array_Real(Real value, Real *array_dev, part_int_t size) { // set values for GPU kernels - int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (size - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block diff --git a/src/particles/particles_dynamics.cpp b/src/particles/particles_dynamics.cpp index 9baba2cc5..977fd936c 100644 --- a/src/particles/particles_dynamics.cpp +++ b/src/particles/particles_dynamics.cpp @@ -63,7 +63,7 @@ Real Grid3D::Calc_Particles_dt() Real Grid3D::Calc_Particles_dt_GPU() { // set values for GPU kernels - int ngrid = (Particles.n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (Particles.n_local - 1) / TPB_PARTICLES + 1; if (ngrid > Particles.G.size_blocks_array) { chprintf(" Error: particles dt_array too small\n"); diff --git a/src/particles/particles_dynamics_gpu.cu b/src/particles/particles_dynamics_gpu.cu index d0552abe6..665be8ff3 100644 --- a/src/particles/particles_dynamics_gpu.cu +++ b/src/particles/particles_dynamics_gpu.cu @@ -80,7 +80,7 @@ Real Particles_3D::Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_partic Real *dti_array_dev) { // // set values for GPU kernels - // int ngrid = (Particles.n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + // int ngrid = (Particles.n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -150,7 +150,7 @@ void Particles_3D::Advance_Particles_KDK_Step1_GPU_function(part_int_t n_local, Real *grav_y_dev, Real *grav_z_dev) { // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -169,7 +169,7 @@ void Particles_3D::Advance_Particles_KDK_Step2_GPU_function(part_int_t n_local, Real *grav_y_dev, Real *grav_z_dev) { // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -265,7 +265,7 @@ void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function(part_int_t n_l Real Omega_K) { // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -288,7 +288,7 @@ void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function(part_int_t n_l Real Omega_K) { // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 787449f21..41a5ae505 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -2,661 +2,296 @@ * \brief Definitions of the piecewise linear reconstruction functions with limiting applied in the characteristic variables, as described in Stone et al., 2008. */ -#ifdef CUDA - #ifdef PLMC - #include +#include - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../reconstruction/plmc_cuda.h" - #include "../utils/gpu.hpp" +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../reconstruction/plmc_cuda.h" +#include "../reconstruction/reconstruction.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" - #ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" - #endif // DE +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif // DE /*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real - *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real + *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real gamma, int dir) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using plm. */ -__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) +__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields) { - int n_cells = nx * ny * nz; - int o1, o2, o3; - if (dir == 0) { - o1 = 1; - o2 = 2; - o3 = 3; - } - if (dir == 1) { - o1 = 2; - o2 = 3; - o3 = 1; - } - if (dir == 2) { - o1 = 3; - o2 = 1; - o3 = 2; + // get a thread ID + int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); + + // Ensure that we are only operating on cells that will be used + if (reconstruction::Thread_Guard<2>(nx, ny, nz, xid, yid, zid)) { + return; } - // declare primitive variables for each stencil - // these will be placed into registers for each thread - Real d_i, vx_i, vy_i, vz_i, p_i; - Real d_imo, vx_imo, vy_imo, vz_imo, p_imo; - Real d_ipo, vx_ipo, vy_ipo, vz_ipo, p_ipo; - - // declare other variables to be used - Real a_i; - Real del_d_L, del_vx_L, del_vy_L, del_vz_L, del_p_L; - Real del_d_R, del_vx_R, del_vy_R, del_vz_R, del_p_R; - Real del_d_C, del_vx_C, del_vy_C, del_vz_C, del_p_C; - Real del_d_G, del_vx_G, del_vy_G, del_vz_G, del_p_G; - Real del_a_0_L, del_a_1_L, del_a_2_L, del_a_3_L, del_a_4_L; - Real del_a_0_R, del_a_1_R, del_a_2_R, del_a_3_R, del_a_4_R; - Real del_a_0_C, del_a_1_C, del_a_2_C, del_a_3_C, del_a_4_C; - Real del_a_0_G, del_a_1_G, del_a_2_G, del_a_3_G, del_a_4_G; - Real del_a_0_m, del_a_1_m, del_a_2_m, del_a_3_m, del_a_4_m; - Real lim_slope_a, lim_slope_b; - Real del_d_m_i, del_vx_m_i, del_vy_m_i, del_vz_m_i, del_p_m_i; - Real d_L_iph, vx_L_iph, vy_L_iph, vz_L_iph, p_L_iph; - Real d_R_imh, vx_R_imh, vy_R_imh, vz_R_imh, p_R_imh; - Real C; - #ifndef VL - Real dtodx = dt / dx; - Real lambda_m, lambda_0, lambda_p; - Real qx; - Real lamdiff; - Real sum_0, sum_1, sum_2, sum_3, sum_4; - #endif // not VL - #ifdef DE - Real ge_i, ge_imo, ge_ipo; - Real del_ge_L, del_ge_R, del_ge_C, del_ge_G; - Real del_ge_m_i; - Real ge_L_iph, ge_R_imh; - Real E, E_kin, dge; - #ifndef VL - Real sum_ge; - #endif // CTU - #endif // DE - #ifdef SCALAR - Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; - Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; - Real del_scalar_m_i[NSCALARS]; - Real scalar_L_iph[NSCALARS], scalar_R_imh[NSCALARS]; - #ifndef VL - Real sum_scalar[NSCALARS]; - #endif // CTU - #endif // SCALAR + // Compute the total number of cells + int const n_cells = nx * ny * nz; - // get a thread ID - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int id; - int zid = tid / (nx * ny); - int yid = (tid - zid * nx * ny) / nx; - int xid = tid - zid * nx * ny - yid * nx; - - int xs, xe, ys, ye, zs, ze; - if (dir == 0) { - xs = 1; - xe = nx - 2; - ys = 0; - ye = ny; - zs = 0; - ze = nz; - } - if (dir == 1) { - xs = 0; - xe = nx; - ys = 1; - ye = ny - 2; - zs = 0; - ze = nz; - } - if (dir == 2) { - xs = 0; - xe = nx; - ys = 0; - ye = ny; - zs = 1; - ze = nz - 2; + // Set the field indices for the various directions + int o1, o2, o3; + switch (dir) { + case 0: + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + break; + case 1: + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + break; + case 2: + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; + break; } - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { - // load the 3-cell stencil into registers - // cell i - id = xid + yid * nx + zid * nx * ny; - d_i = dev_conserved[id]; - vx_i = dev_conserved[o1 * n_cells + id] / d_i; - vy_i = dev_conserved[o2 * n_cells + id] / d_i; - vz_i = dev_conserved[o3 * n_cells + id] / d_i; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); - #endif // PRESSURE_DE - p_i = fmax(p_i, (Real)TINY_NUMBER); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; - } - #endif // SCALAR - #ifdef DE - ge_i = dge / d_i; - #endif // DE - // cell i-1 - if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny; - d_imo = dev_conserved[id]; - vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; - vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; - vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE - p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * - (gamma - 1.0); - #endif // PRESSURE_DE - p_imo = fmax(p_imo, (Real)TINY_NUMBER); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; - } - #endif // SCALAR - #ifdef DE - ge_imo = dge / d_imo; - #endif // DE - // cell i+1 - if (dir == 0) id = xid + 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid + 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid + 1) * nx * ny; - d_ipo = dev_conserved[id]; - vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; - vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; - vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE - p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * - (gamma - 1.0); - #endif // PRESSURE_DE - p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; - } - #endif // SCALAR - #ifdef DE - ge_ipo = dge / d_ipo; - #endif // DE - - // calculate the adiabatic sound speed in cell i - a_i = sqrt(gamma * p_i / d_i); - - // Compute the eigenvalues of the linearized equations in the - // primitive variables using the cell-centered primitive variables - #ifndef VL - lambda_m = vx_i - a_i; - lambda_0 = vx_i; - lambda_p = vx_i + a_i; - #endif // VL - - // Compute the left, right, centered, and van Leer differences of the - // primitive variables Note that here L and R refer to locations relative to - // the cell center - - // left - del_d_L = d_i - d_imo; - del_vx_L = vx_i - vx_imo; - del_vy_L = vy_i - vy_imo; - del_vz_L = vz_i - vz_imo; - del_p_L = p_i - p_imo; - - // right - del_d_R = d_ipo - d_i; - del_vx_R = vx_ipo - vx_i; - del_vy_R = vy_ipo - vy_i; - del_vz_R = vz_ipo - vz_i; - del_p_R = p_ipo - p_i; - - // centered - del_d_C = 0.5 * (d_ipo - d_imo); - del_vx_C = 0.5 * (vx_ipo - vx_imo); - del_vy_C = 0.5 * (vy_ipo - vy_imo); - del_vz_C = 0.5 * (vz_ipo - vz_imo); - del_p_C = 0.5 * (p_ipo - p_imo); - - // Van Leer - if (del_d_L * del_d_R > 0.0) { - del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); - } else { - del_d_G = 0.0; - } - if (del_vx_L * del_vx_R > 0.0) { - del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); - } else { - del_vx_G = 0.0; - } - if (del_vy_L * del_vy_R > 0.0) { - del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); - } else { - del_vy_G = 0.0; - } - if (del_vz_L * del_vz_R > 0.0) { - del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); - } else { - del_vz_G = 0.0; - } - if (del_p_L * del_p_R > 0.0) { - del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); - } else { - del_p_G = 0.0; - } - - #ifdef DE - del_ge_L = ge_i - ge_imo; - del_ge_R = ge_ipo - ge_i; - del_ge_C = 0.5 * (ge_ipo - ge_imo); - if (del_ge_L * del_ge_R > 0.0) { - del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); - } else { - del_ge_G = 0.0; - } - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = scalar_i[i] - scalar_imo[i]; - del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; - del_scalar_C[i] = 0.5 * (scalar_ipo[i] - scalar_imo[i]); - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); - } else { - del_scalar_G[i] = 0.0; - } - } - #endif // SCALAR - - // Project the left, right, centered and van Leer differences onto the - // characteristic variables Stone Eqn 37 (del_a are differences in - // characteristic variables, see Stone for notation) Use the eigenvectors - // given in Stone 2008, Appendix A - del_a_0_L = -d_i * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); - del_a_1_L = del_d_L - del_p_L / (a_i * a_i); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = d_i * del_vx_L / (2 * a_i) + del_p_L / (2 * a_i * a_i); - - del_a_0_R = -d_i * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); - del_a_1_R = del_d_R - del_p_R / (a_i * a_i); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = d_i * del_vx_R / (2 * a_i) + del_p_R / (2 * a_i * a_i); - - del_a_0_C = -d_i * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); - del_a_1_C = del_d_C - del_p_C / (a_i * a_i); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = d_i * del_vx_C / (2 * a_i) + del_p_C / (2 * a_i * a_i); - - del_a_0_G = -d_i * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); - del_a_1_G = del_d_G - del_p_G / (a_i * a_i); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = d_i * del_vx_G / (2 * a_i) + del_p_G / (2 * a_i * a_i); - - // Apply monotonicity constraints to the differences in the characteristic - // variables - - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - - if (del_a_0_L * del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_1_L * del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_2_L * del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_3_L * del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_4_L * del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - #ifdef DE - del_ge_m_i = 0.0; - if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_i = sgn_CUDA(del_ge_C) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_m_i[i] = 0.0; - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin(2.0 * lim_slope_a, lim_slope_b); - } - } - #endif // SCALAR - - // Project the monotonized difference in the characteristic variables back - // onto the primitive variables Stone Eqn 39 - del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -a_i * del_a_0_m / d_i + a_i * del_a_4_m / d_i; - del_vy_m_i = del_a_2_m; - del_vz_m_i = del_a_3_m; - del_p_m_i = a_i * a_i * del_a_0_m + a_i * a_i * del_a_4_m; - - // Compute the left and right interface values using the monotonized - // difference in the primitive variables - - d_R_imh = d_i - 0.5 * del_d_m_i; - vx_R_imh = vx_i - 0.5 * del_vx_m_i; - vy_R_imh = vy_i - 0.5 * del_vy_m_i; - vz_R_imh = vz_i - 0.5 * del_vz_m_i; - p_R_imh = p_i - 0.5 * del_p_m_i; - - d_L_iph = d_i + 0.5 * del_d_m_i; - vx_L_iph = vx_i + 0.5 * del_vx_m_i; - vy_L_iph = vy_i + 0.5 * del_vy_m_i; - vz_L_iph = vz_i + 0.5 * del_vz_m_i; - p_L_iph = p_i + 0.5 * del_p_m_i; - - #ifdef DE - ge_R_imh = ge_i - 0.5 * del_ge_m_i; - ge_L_iph = ge_i + 0.5 * del_ge_m_i; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_R_imh[i] = scalar_i[i] - 0.5 * del_scalar_m_i[i]; - scalar_L_iph[i] = scalar_i[i] + 0.5 * del_scalar_m_i[i]; - } - #endif // SCALAR - - C = d_R_imh + d_L_iph; - d_R_imh = fmax(fmin(d_i, d_imo), d_R_imh); - d_R_imh = fmin(fmax(d_i, d_imo), d_R_imh); - d_L_iph = C - d_R_imh; - d_L_iph = fmax(fmin(d_i, d_ipo), d_L_iph); - d_L_iph = fmin(fmax(d_i, d_ipo), d_L_iph); - d_R_imh = C - d_L_iph; - - C = vx_R_imh + vx_L_iph; - vx_R_imh = fmax(fmin(vx_i, vx_imo), vx_R_imh); - vx_R_imh = fmin(fmax(vx_i, vx_imo), vx_R_imh); - vx_L_iph = C - vx_R_imh; - vx_L_iph = fmax(fmin(vx_i, vx_ipo), vx_L_iph); - vx_L_iph = fmin(fmax(vx_i, vx_ipo), vx_L_iph); - vx_R_imh = C - vx_L_iph; - - C = vy_R_imh + vy_L_iph; - vy_R_imh = fmax(fmin(vy_i, vy_imo), vy_R_imh); - vy_R_imh = fmin(fmax(vy_i, vy_imo), vy_R_imh); - vy_L_iph = C - vy_R_imh; - vy_L_iph = fmax(fmin(vy_i, vy_ipo), vy_L_iph); - vy_L_iph = fmin(fmax(vy_i, vy_ipo), vy_L_iph); - vy_R_imh = C - vy_L_iph; - - C = vz_R_imh + vz_L_iph; - vz_R_imh = fmax(fmin(vz_i, vz_imo), vz_R_imh); - vz_R_imh = fmin(fmax(vz_i, vz_imo), vz_R_imh); - vz_L_iph = C - vz_R_imh; - vz_L_iph = fmax(fmin(vz_i, vz_ipo), vz_L_iph); - vz_L_iph = fmin(fmax(vz_i, vz_ipo), vz_L_iph); - vz_R_imh = C - vz_L_iph; - - C = p_R_imh + p_L_iph; - p_R_imh = fmax(fmin(p_i, p_imo), p_R_imh); - p_R_imh = fmin(fmax(p_i, p_imo), p_R_imh); - p_L_iph = C - p_R_imh; - p_L_iph = fmax(fmin(p_i, p_ipo), p_L_iph); - p_L_iph = fmin(fmax(p_i, p_ipo), p_L_iph); - p_R_imh = C - p_L_iph; - - del_d_m_i = d_L_iph - d_R_imh; - del_vx_m_i = vx_L_iph - vx_R_imh; - del_vy_m_i = vy_L_iph - vy_R_imh; - del_vz_m_i = vz_L_iph - vz_R_imh; - del_p_m_i = p_L_iph - p_R_imh; - - #ifdef DE - C = ge_R_imh + ge_L_iph; - ge_R_imh = fmax(fmin(ge_i, ge_imo), ge_R_imh); - ge_R_imh = fmin(fmax(ge_i, ge_imo), ge_R_imh); - ge_L_iph = C - ge_R_imh; - ge_L_iph = fmax(fmin(ge_i, ge_ipo), ge_L_iph); - ge_L_iph = fmin(fmax(ge_i, ge_ipo), ge_L_iph); - ge_R_imh = C - ge_L_iph; - del_ge_m_i = ge_L_iph - ge_R_imh; - #endif // DE - - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - C = scalar_R_imh[i] + scalar_L_iph[i]; - scalar_R_imh[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); - scalar_R_imh[i] = fmin(fmax(scalar_i[i], scalar_imo[i]), scalar_R_imh[i]); - scalar_L_iph[i] = C - scalar_R_imh[i]; - scalar_L_iph[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i]); - scalar_L_iph[i] = fmin(fmax(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i]); - scalar_R_imh[i] = C - scalar_L_iph[i]; - del_scalar_m_i[i] = scalar_L_iph[i] - scalar_R_imh[i]; - } - #endif // SCALAR - - #ifndef VL - // Integrate linear interpolation function over domain of dependence - // defined by max(min) eigenvalue - qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; - d_R_imh = d_R_imh + qx * del_d_m_i; - vx_R_imh = vx_R_imh + qx * del_vx_m_i; - vy_R_imh = vy_R_imh + qx * del_vy_m_i; - vz_R_imh = vz_R_imh + qx * del_vz_m_i; - p_R_imh = p_R_imh + qx * del_p_m_i; - - qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; - d_L_iph = d_L_iph - qx * del_d_m_i; - vx_L_iph = vx_L_iph - qx * del_vx_m_i; - vy_L_iph = vy_L_iph - qx * del_vy_m_i; - vz_L_iph = vz_L_iph - qx * del_vz_m_i; - p_L_iph = p_L_iph - qx * del_p_m_i; - - #ifdef DE - ge_R_imh = ge_R_imh + qx * del_ge_m_i; - ge_L_iph = ge_L_iph - qx * del_ge_m_i; - #endif // DE - - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_R_imh[i] = scalar_R_imh[i] + qx * del_scalar_m_i[i]; - scalar_L_iph[i] = scalar_L_iph[i] - qx * del_scalar_m_i[i]; - } - #endif // SCALAR + // load the 3-cell stencil into registers + // cell i + reconstruction::Primitive const cell_i = + reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i-1. The equality checks the direction and will subtract one from the correct direction + reconstruction::Primitive const cell_imo = reconstruction::Load_Data( + dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i+1. The equality checks the direction and add one to the correct direction + reconstruction::Primitive const cell_ipo = reconstruction::Load_Data( + dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // calculate the adiabatic sound speed in cell i + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + +// Compute the eigenvectors +#ifdef MHD + reconstruction::EigenVecs const eigenvectors = + reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma); +#else + reconstruction::EigenVecs eigenvectors; +#endif // MHD + + // Compute the left, right, centered, and van Leer differences of the + // primitive variables Note that here L and R refer to locations relative to + // the cell center + + // left + reconstruction::Primitive const del_L = reconstruction::Compute_Slope(cell_imo, cell_i); + + // right + reconstruction::Primitive const del_R = reconstruction::Compute_Slope(cell_i, cell_ipo); + + // centered + reconstruction::Primitive const del_C = reconstruction::Compute_Slope(cell_imo, cell_ipo, 0.5); + + // Van Leer + reconstruction::Primitive const del_G = reconstruction::Van_Leer_Slope(del_L, del_R); + + // Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + reconstruction::Characteristic const del_a_L = + reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, sound_speed, sound_speed_squared, gamma); + + reconstruction::Characteristic const del_a_R = + reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, sound_speed, sound_speed_squared, gamma); + + reconstruction::Characteristic const del_a_C = + reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, sound_speed, sound_speed_squared, gamma); + + reconstruction::Characteristic const del_a_G = + reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized + // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 + reconstruction::Primitive del_m_i = reconstruction::Monotonize_Characteristic_Return_Primitive( + cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, + sound_speed_squared, gamma); + + // Compute the left and right interface values using the monotonized difference in the primitive variables + reconstruction::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, 1.0); + reconstruction::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, -1.0); + +#ifndef VL + + Real const dtodx = dt / dx; + + // Compute the eigenvalues of the linearized equations in the + // primitive variables using the cell-centered primitive variables + Real const lambda_m = cell_i.velocity_x - sound_speed; + Real const lambda_0 = cell_i.velocity_x; + Real const lambda_p = cell_i.velocity_x + sound_speed; + + // Integrate linear interpolation function over domain of dependence + // defined by max(min) eigenvalue + Real qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; + interface_R_imh.density = interface_R_imh.density + qx * del_m_i.density; + interface_R_imh.velocity_x = interface_R_imh.velocity_x + qx * del_m_i.velocity_x; + interface_R_imh.velocity_y = interface_R_imh.velocity_y + qx * del_m_i.velocity_y; + interface_R_imh.velocity_z = interface_R_imh.velocity_z + qx * del_m_i.velocity_z; + interface_R_imh.pressure = interface_R_imh.pressure + qx * del_m_i.pressure; + + qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; + interface_L_iph.density = interface_L_iph.density - qx * del_m_i.density; + interface_L_iph.velocity_x = interface_L_iph.velocity_x - qx * del_m_i.velocity_x; + interface_L_iph.velocity_y = interface_L_iph.velocity_y - qx * del_m_i.velocity_y; + interface_L_iph.velocity_z = interface_L_iph.velocity_z - qx * del_m_i.velocity_z; + interface_L_iph.pressure = interface_L_iph.pressure - qx * del_m_i.pressure; + + #ifdef DE + interface_R_imh.gas_energy = interface_R_imh.gas_energy + qx * del_m_i.gas_energy; + interface_L_iph.gas_energy = interface_L_iph.gas_energy - qx * del_m_i.gas_energy; + #endif // DE + + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_R_imh.scalar[i] = interface_R_imh.scalar[i] + qx * del_m_i.scalar[i]; + interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - qx * del_m_i.scalar[i]; + } + #endif // SCALAR - // Perform the characteristic tracing - // Stone Eqns 42 & 43 + // Perform the characteristic tracing + // Stone Eqns 42 & 43 - // left-hand interface value, i+1/2 - sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; - #ifdef DE - sum_ge = 0; - #endif // DE - #ifdef SCALAR + // left-hand interface value, i+1/2 + Real sum_0 = 0.0, sum_1 = 0.0, sum_2 = 0.0, sum_3 = 0.0, sum_4 = 0.0; + #ifdef DE + Real sum_ge = 0; + #endif // DE + #ifdef SCALAR + Real sum_scalar[NSCALARS]; + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] = 0.0; + } + #endif // SCALAR + if (lambda_m >= 0) { + Real lamdiff = lambda_p - lambda_m; + + sum_0 += lamdiff * + (-cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 - del_m_i.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0); + } + if (lambda_0 >= 0) { + Real lamdiff = lambda_p - lambda_0; + + sum_0 += lamdiff * (del_m_i.density - del_m_i.pressure / (sound_speed_squared)); + sum_2 += lamdiff * del_m_i.velocity_y; + sum_3 += lamdiff * del_m_i.velocity_z; + #ifdef DE + sum_ge += lamdiff * del_m_i.gas_energy; + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0.0; + sum_scalar[i] += lamdiff * del_m_i.scalar[i]; } - #endif // SCALAR - if (lambda_m >= 0) { - lamdiff = lambda_p - lambda_m; - - sum_0 += lamdiff * (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * d_i)); - sum_4 += lamdiff * (-d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); - } - if (lambda_0 >= 0) { - lamdiff = lambda_p - lambda_0; - - sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); - sum_2 += lamdiff * del_vy_m_i; - sum_3 += lamdiff * del_vz_m_i; - #ifdef DE - sum_ge += lamdiff * del_ge_m_i; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_scalar_m_i[i]; - } - #endif // SCALAR - } - if (lambda_p >= 0) { - lamdiff = lambda_p - lambda_p; + #endif // SCALAR + } + if (lambda_p >= 0) { + Real lamdiff = lambda_p - lambda_p; - sum_0 += lamdiff * (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * d_i)); - sum_4 += lamdiff * (d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); - } + sum_0 += lamdiff * + (cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 + del_m_i.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0); + } - // add the corrections to the initial guesses for the interface values - d_L_iph += 0.5 * dtodx * sum_0; - vx_L_iph += 0.5 * dtodx * sum_1; - vy_L_iph += 0.5 * dtodx * sum_2; - vz_L_iph += 0.5 * dtodx * sum_3; - p_L_iph += 0.5 * dtodx * sum_4; - #ifdef DE - ge_L_iph += 0.5 * dtodx * sum_ge; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_L_iph[i] += 0.5 * dtodx * sum_scalar[i]; - } - #endif // SCALAR - - // right-hand interface value, i-1/2 - sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; - #ifdef DE - sum_ge = 0; - #endif // DE - #ifdef SCALAR + // add the corrections to the initial guesses for the interface values + interface_L_iph.density += 0.5 * dtodx * sum_0; + interface_L_iph.velocity_x += 0.5 * dtodx * sum_1; + interface_L_iph.velocity_y += 0.5 * dtodx * sum_2; + interface_L_iph.velocity_z += 0.5 * dtodx * sum_3; + interface_L_iph.pressure += 0.5 * dtodx * sum_4; + #ifdef DE + interface_L_iph.gas_energy += 0.5 * dtodx * sum_ge; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_L_iph.scalar[i] += 0.5 * dtodx * sum_scalar[i]; + } + #endif // SCALAR + + // right-hand interface value, i-1/2 + sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; + #ifdef DE + sum_ge = 0; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] = 0; + } + #endif // SCALAR + if (lambda_m <= 0) { + Real lamdiff = lambda_m - lambda_m; + + sum_0 += lamdiff * + (-cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 - del_m_i.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0); + } + if (lambda_0 <= 0) { + Real lamdiff = lambda_m - lambda_0; + + sum_0 += lamdiff * (del_m_i.density - del_m_i.pressure / (sound_speed_squared)); + sum_2 += lamdiff * del_m_i.velocity_y; + sum_3 += lamdiff * del_m_i.velocity_z; + #ifdef DE + sum_ge += lamdiff * del_m_i.gas_energy; + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0; - } - #endif // SCALAR - if (lambda_m <= 0) { - lamdiff = lambda_m - lambda_m; - - sum_0 += lamdiff * (-d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 - del_p_m_i / (2 * a_i * d_i)); - sum_4 += lamdiff * (-d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); - } - if (lambda_0 <= 0) { - lamdiff = lambda_m - lambda_0; - - sum_0 += lamdiff * (del_d_m_i - del_p_m_i / (a_i * a_i)); - sum_2 += lamdiff * del_vy_m_i; - sum_3 += lamdiff * del_vz_m_i; - #ifdef DE - sum_ge += lamdiff * del_ge_m_i; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_scalar_m_i[i]; - } - #endif // SCALAR + sum_scalar[i] += lamdiff * del_m_i.scalar[i]; } - if (lambda_p <= 0) { - lamdiff = lambda_m - lambda_p; + #endif // SCALAR + } + if (lambda_p <= 0) { + Real lamdiff = lambda_m - lambda_p; - sum_0 += lamdiff * (d_i * del_vx_m_i / (2 * a_i) + del_p_m_i / (2 * a_i * a_i)); - sum_1 += lamdiff * (del_vx_m_i / 2.0 + del_p_m_i / (2 * a_i * d_i)); - sum_4 += lamdiff * (d_i * del_vx_m_i * a_i / 2.0 + del_p_m_i / 2.0); - } + sum_0 += lamdiff * + (cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 + del_m_i.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0); + } - // add the corrections - d_R_imh += 0.5 * dtodx * sum_0; - vx_R_imh += 0.5 * dtodx * sum_1; - vy_R_imh += 0.5 * dtodx * sum_2; - vz_R_imh += 0.5 * dtodx * sum_3; - p_R_imh += 0.5 * dtodx * sum_4; - #ifdef DE - ge_R_imh += 0.5 * dtodx * sum_ge; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_R_imh[i] += 0.5 * dtodx * sum_scalar[i]; - } - #endif // SCALAR - #endif // CTU - - // apply minimum constraints - d_R_imh = fmax(d_R_imh, (Real)TINY_NUMBER); - d_L_iph = fmax(d_L_iph, (Real)TINY_NUMBER); - p_R_imh = fmax(p_R_imh, (Real)TINY_NUMBER); - p_L_iph = fmax(p_L_iph, (Real)TINY_NUMBER); - - // Convert the left and right states in the primitive to the conserved - // variables send final values back from kernel bounds_R refers to the right - // side of the i-1/2 interface - if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny; - dev_bounds_R[id] = d_R_imh; - dev_bounds_R[o1 * n_cells + id] = d_R_imh * vx_R_imh; - dev_bounds_R[o2 * n_cells + id] = d_R_imh * vy_R_imh; - dev_bounds_R[o3 * n_cells + id] = d_R_imh * vz_R_imh; - dev_bounds_R[4 * n_cells + id] = - (p_R_imh / (gamma - 1.0)) + 0.5 * d_R_imh * (vx_R_imh * vx_R_imh + vy_R_imh * vy_R_imh + vz_R_imh * vz_R_imh); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_R[(5 + i) * n_cells + id] = d_R_imh * scalar_R_imh[i]; - } - #endif // SCALAR - #ifdef DE - dev_bounds_R[(n_fields - 1) * n_cells + id] = d_R_imh * ge_R_imh; - #endif // DE - // bounds_L refers to the left side of the i+1/2 interface - id = xid + yid * nx + zid * nx * ny; - dev_bounds_L[id] = d_L_iph; - dev_bounds_L[o1 * n_cells + id] = d_L_iph * vx_L_iph; - dev_bounds_L[o2 * n_cells + id] = d_L_iph * vy_L_iph; - dev_bounds_L[o3 * n_cells + id] = d_L_iph * vz_L_iph; - dev_bounds_L[4 * n_cells + id] = - (p_L_iph / (gamma - 1.0)) + 0.5 * d_L_iph * (vx_L_iph * vx_L_iph + vy_L_iph * vy_L_iph + vz_L_iph * vz_L_iph); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_L[(5 + i) * n_cells + id] = d_L_iph * scalar_L_iph[i]; - } - #endif // SCALAR - #ifdef DE - dev_bounds_L[(n_fields - 1) * n_cells + id] = d_L_iph * ge_L_iph; - #endif // DE + // add the corrections + interface_R_imh.density += 0.5 * dtodx * sum_0; + interface_R_imh.velocity_x += 0.5 * dtodx * sum_1; + interface_R_imh.velocity_y += 0.5 * dtodx * sum_2; + interface_R_imh.velocity_z += 0.5 * dtodx * sum_3; + interface_R_imh.pressure += 0.5 * dtodx * sum_4; + #ifdef DE + interface_R_imh.gas_energy += 0.5 * dtodx * sum_ge; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_R_imh.scalar[i] += 0.5 * dtodx * sum_scalar[i]; } + #endif // SCALAR +#endif // CTU + + // apply minimum constraints + interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); + interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); + interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); + interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); + + // Convert the left and right states in the primitive to the conserved variables send final values back from kernel + // bounds_R refers to the right side of the i-1/2 interface + size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); + + id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); + reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); } - - #endif // PLMC -#endif // CUDA diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 36c707354..c2d25df84 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -2,22 +2,20 @@ * \brief Declarations of the cuda plm kernels, characteristic reconstruction * version. */ -#ifdef CUDA - #ifdef PLMC +#ifndef PLMC_CUDA_H +#define PLMC_CUDA_H - #ifndef PLMC_CUDA_H - #define PLMC_CUDA_H - - #include "../global/global.h" +#include "../global/global.h" +#include "../grid/grid_enum.h" +#include "../utils/hydro_utilities.h" +#include "../utils/mhd_utilities.h" /*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir) * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using plm. */ -__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); +__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields); - #endif // PLMC_CUDA_H - #endif // PLMC -#endif // CUDA +#endif // PLMC_CUDA_H diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu new file mode 100644 index 000000000..3616d2d0a --- /dev/null +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -0,0 +1,280 @@ +/*! + * \file plmc_cuda_tests.cu + * \brief Tests for the contents of plmc_cuda.h and plmc_cuda.cu + * + */ + +// STL Includes +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include + +#include "../global/global.h" +#include "../io/io.h" +#include "../reconstruction/plmc_cuda.h" +#include "../utils/DeviceVector.h" +#include "../utils/hydro_utilities.h" +#include "../utils/testing_utilities.h" + +TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); + + // Mock up needed information + size_t const nx = 5; + size_t const ny = 4; + size_t const nz = 4; + size_t const n_fields = 5; + double const dx = doubleRand(prng); + double const dt = doubleRand(prng); + double const gamma = 5.0 / 3.0; + + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(nx * ny * nz * n_fields); + for (Real &val : host_grid) { + val = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + // Fiducial Data + std::vector> fiducial_interface_left = {{{26, 2.1584359129984056}, + {27, 0.70033864721549188}, + {106, 2.2476363309467553}, + {107, 3.0633780053857027}, + {186, 2.2245934101106259}, + {187, 2.1015872413794123}, + {266, 2.1263341057778309}, + {267, 3.9675148506537838}, + {346, 3.3640057502842691}, + {347, 21.091316282933843}}, + {{21, 0.72430827309279655}, + {37, 0.19457128219588618}, + {101, 5.4739527659741896}, + {117, 4.4286255636679313}, + {181, 0.12703829036056602}, + {197, 2.2851440769830953}, + {261, 1.5337035731959561}, + {277, 2.697375839048191}, + {341, 22.319601655044117}, + {357, 82.515887983144168}}, + {{25, 2.2863650183226212}, + {29, 1.686415421301841}, + {105, 0.72340346106443465}, + {109, 5.4713687086831388}, + {185, 3.929100145230096}, + {189, 4.9166140516911483}, + {265, 0.95177493689267167}, + {269, 0.46056494878491938}, + {345, 3.6886096301452787}, + {349, 16.105488797582133}}}; + std::vector> fiducial_interface_right = {{{25, 3.8877922383184833}, + {26, 0.70033864721549188}, + {105, 1.5947787943675635}, + {106, 3.0633780053857027}, + {185, 4.0069556576401011}, + {186, 2.1015872413794123}, + {265, 1.7883678016935785}, + {266, 3.9675148506537838}, + {345, 2.8032969746372527}, + {346, 21.091316282933843}}, + {{17, 0.43265217076853835}, + {33, 0.19457128219588618}, + {97, 3.2697645945288754}, + {113, 4.4286255636679313}, + {177, 0.07588397666718491}, + {193, 2.2851440769830953}, + {257, 0.91612950577699748}, + {273, 2.697375839048191}, + {337, 13.332201861384396}, + {353, 82.515887983144168}}, + {{5, 2.2863650183226212}, + {9, 1.686415421301841}, + {85, 0.72340346106443465}, + {89, 1.7792505446336098}, + {165, 5.3997753452111859}, + {169, 1.4379190463124139}, + {245, 0.95177493689267167}, + {249, 0.46056494878491938}, + {325, 6.6889498465051407}, + {329, 1.6145084086614281}}}; + + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Assign the shape + size_t nx_rot, ny_rot, nz_rot; + switch (direction) { + case 0: + nx_rot = nx; + ny_rot = ny; + nz_rot = nz; + break; + case 1: + nx_rot = ny; + ny_rot = nz; + nz_rot = nx; + break; + case 2: + nx_rot = nz; + ny_rot = nx; + nz_rot = ny; + break; + } + + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(host_grid.size(), true); + cuda_utilities::DeviceVector dev_interface_right(host_grid.size(), true); + + // Launch kernel + hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, direction, n_fields); + CudaCheckError(); + CHECK(cudaDeviceSynchronize()); + + // Perform Comparison + for (size_t i = 0; i < host_grid.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} + +TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); + + // Mock up needed information + size_t const nx = 4, ny = nx, nz = nx; + size_t const n_fields = 8; + size_t const n_cells_grid = nx * ny * nz * n_fields; + size_t const n_cells_interface = nx * ny * nz * (n_fields - 1); + double const dx = doubleRand(prng); + double const dt = doubleRand(prng); + double const gamma = 5.0 / 3.0; + + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(n_cells_grid); + for (Real &val : host_grid) { + val = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + // Fiducial Data + std::vector> fiducial_interface_left = {{{21, 0.59023012197434721}, + {85, 3.0043379408547275}, + {149, 2.6320759184913625}, + {213, 0.9487867623146744}, + {277, 18.551193003661723}, + {341, 1.8587936590169301}, + {405, 2.1583975283044725}}, + {{21, 0.73640639402573249}, + {85, 3.3462413154443715}, + {149, 2.1945584994458125}, + {213, 0.67418839414138987}, + {277, 16.909618487528142}, + {341, 2.1533768050263267}, + {405, 1.6994195863331925}}, + {{21, 0.25340904981266843}, + {85, 2.0441984720128734}, + {149, 1.9959059157695584}, + {213, 0.45377591914009824}, + {277, 23.677832869261188}, + {341, 1.5437923271692418}, + {405, 1.8141353672443383}}}; + std::vector> fiducial_interface_right = {{{20, 0.59023012197434721}, + {84, 3.0043379408547275}, + {148, 2.6320759184913625}, + {212, 0.9487867623146744}, + {276, 22.111134849009044}, + {340, 1.8587936590169301}, + {404, 2.1583975283044725}}, + { + {17, 0.44405384992296193}, + {81, 2.5027813113931279}, + {145, 2.6371119205792346}, + {209, 1.0210845222961809}, + {273, 21.360010722689488}, + {337, 2.1634182515826184}, + {401, 1.7073441775673177}, + }, + { + {5, 0.92705119413602599}, + {69, 1.9592598982258778}, + {133, 0.96653490574340428}, + {197, 1.3203867992383289}, + {261, 8.0057564947791793}, + {325, 1.8629714367312684}, + {389, 1.9034519507895218}, + }}; + + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(n_cells_interface, true); + cuda_utilities::DeviceVector dev_interface_right(n_cells_interface, true); + + // Launch kernel + hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction, n_fields); + CudaCheckError(); + CHECK(cudaDeviceSynchronize()); + + // Perform Comparison + for (size_t i = 0; i < dev_interface_right.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 10c6a788d..4db993d70 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -1,1262 +1,696 @@ /*! \file ppmc_cuda.cu * \brief Functions definitions for the ppm kernels, using characteristic tracing. Written following Stone et al. 2008. */ -#ifdef CUDA - #ifdef PPMC - #include +#include - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../reconstruction/ppmc_cuda.h" - #include "../utils/gpu.hpp" - #include "../utils/hydro_utilities.h" +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../reconstruction/ppmc_cuda.h" +#include "../reconstruction/reconstruction.h" +#include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" - #ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" - #endif +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif -/*! \fn void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real - *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real - gamma, int dir, int n_fields) +// ===================================================================================================================== +/*! * \brief When passed a stencil of conserved variables, returns the left and right boundary values for the interface calculated using ppm. */ -__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) +__global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int dir) { - int n_cells = nx * ny * nz; - int o1, o2, o3; - if (dir == 0) { - o1 = 1; - o2 = 2; - o3 = 3; - } - if (dir == 1) { - o1 = 2; - o2 = 3; - o3 = 1; - } - if (dir == 2) { - o1 = 3; - o2 = 1; - o3 = 2; + // get a thread ID + int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); + + if (reconstruction::Thread_Guard<3>(nx, ny, nz, xid, yid, zid)) { + return; } - // declare primitive variables for each stencil - // these will be placed into registers for each thread - Real d_i, vx_i, vy_i, vz_i, p_i; - Real d_imo, vx_imo, vy_imo, vz_imo, p_imo; - Real d_ipo, vx_ipo, vy_ipo, vz_ipo, p_ipo; - Real d_imt, vx_imt, vy_imt, vz_imt, p_imt; - Real d_ipt, vx_ipt, vy_ipt, vz_ipt, p_ipt; - - // declare other variables to be used - Real a; - Real del_d_L, del_vx_L, del_vy_L, del_vz_L, del_p_L; - Real del_d_R, del_vx_R, del_vy_R, del_vz_R, del_p_R; - Real del_d_C, del_vx_C, del_vy_C, del_vz_C, del_p_C; - Real del_d_G, del_vx_G, del_vy_G, del_vz_G, del_p_G; - Real del_a_0_L, del_a_1_L, del_a_2_L, del_a_3_L, del_a_4_L; - Real del_a_0_R, del_a_1_R, del_a_2_R, del_a_3_R, del_a_4_R; - Real del_a_0_C, del_a_1_C, del_a_2_C, del_a_3_C, del_a_4_C; - Real del_a_0_G, del_a_1_G, del_a_2_G, del_a_3_G, del_a_4_G; - Real del_a_0_m, del_a_1_m, del_a_2_m, del_a_3_m, del_a_4_m; - Real lim_slope_a, lim_slope_b; - Real del_d_m_imo, del_vx_m_imo, del_vy_m_imo, del_vz_m_imo, del_p_m_imo; - Real del_d_m_i, del_vx_m_i, del_vy_m_i, del_vz_m_i, del_p_m_i; - Real del_d_m_ipo, del_vx_m_ipo, del_vy_m_ipo, del_vz_m_ipo, del_p_m_ipo; - Real d_L, vx_L, vy_L, vz_L, p_L; - Real d_R, vx_R, vy_R, vz_R, p_R; - - // #ifdef CTU - #ifndef VL - Real dtodx = dt / dx; - Real d_6, vx_6, vy_6, vz_6, p_6; - Real lambda_m, lambda_0, lambda_p; - Real lambda_max, lambda_min; - Real A, B, C, D; - Real chi_1, chi_2, chi_3, chi_4, chi_5; - Real sum_1, sum_2, sum_3, sum_4, sum_5; - #endif // VL - - #ifdef DE - Real ge_i, ge_imo, ge_ipo, ge_imt, ge_ipt; - Real del_ge_L, del_ge_R, del_ge_C, del_ge_G; - Real del_ge_m_imo, del_ge_m_i, del_ge_m_ipo; - Real ge_L, ge_R; - Real E_kin, E, dge; - // #ifdef CTU - #ifndef VL - Real chi_ge, sum_ge, ge_6; - #endif // VL - #endif // DE - #ifdef SCALAR - Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; - Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; - Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], del_scalar_m_ipo[NSCALARS]; - Real scalar_L[NSCALARS], scalar_R[NSCALARS]; - // #ifdef CTU - #ifndef VL - Real chi_scalar[NSCALARS], sum_scalar[NSCALARS], scalar_6[NSCALARS]; - #endif // VL - #endif // SCALAR + // Compute the total number of cells + int const n_cells = nx * ny * nz; - // get a thread ID - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int id; - int zid = tid / (nx * ny); - int yid = (tid - zid * nx * ny) / nx; - int xid = tid - zid * nx * ny - yid * nx; - - int xs, xe, ys, ye, zs, ze; - if (dir == 0) { - xs = 2; - xe = nx - 3; - ys = 0; - ye = ny; - zs = 0; - ze = nz; - } - if (dir == 1) { - xs = 0; - xe = nx; - ys = 2; - ye = ny - 3; - zs = 0; - ze = nz; - } - if (dir == 2) { - xs = 0; - xe = nx; - ys = 0; - ye = ny; - zs = 2; - ze = nz - 3; + // Set the field indices for the various directions + int o1, o2, o3; + switch (dir) { + case 0: + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + break; + case 1: + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + break; + case 2: + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; + break; } - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { - // load the 5-cell stencil into registers - // cell i - id = xid + yid * nx + zid * nx * ny; - d_i = dev_conserved[id]; - vx_i = dev_conserved[o1 * n_cells + id] / d_i; - vy_i = dev_conserved[o2 * n_cells + id] / d_i; - vz_i = dev_conserved[o3 * n_cells + id] / d_i; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); - #endif // PRESSURE_DE - p_i = fmax(p_i, (Real)TINY_NUMBER); - #ifdef DE - ge_i = dge / d_i; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; - } - #endif // SCALAR - // cell i-1 - if (dir == 0) { - id = xid - 1 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid - 1) * nx + zid * nx * ny; - } - if (dir == 2) { - id = xid + yid * nx + (zid - 1) * nx * ny; - } - d_imo = dev_conserved[id]; - vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; - vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; - vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE - p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * - (gamma - 1.0); - #endif // PRESSURE_DE - p_imo = fmax(p_imo, (Real)TINY_NUMBER); - #ifdef DE - ge_imo = dge / d_imo; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; - } - #endif // SCALAR - // cell i+1 - if (dir == 0) { - id = xid + 1 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid + 1) * nx + zid * nx * ny; - } - if (dir == 2) { - id = xid + yid * nx + (zid + 1) * nx * ny; - } - d_ipo = dev_conserved[id]; - vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; - vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; - vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE - p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * - (gamma - 1.0); - #endif // PRESSURE_DE - p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); - #ifdef DE - ge_ipo = dge / d_ipo; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; - } - #endif // SCALAR - // cell i-2 - if (dir == 0) { - id = xid - 2 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid - 2) * nx + zid * nx * ny; - } - if (dir == 2) { - id = xid + yid * nx + (zid - 2) * nx * ny; - } - d_imt = dev_conserved[id]; - vx_imt = dev_conserved[o1 * n_cells + id] / d_imt; - vy_imt = dev_conserved[o2 * n_cells + id] / d_imt; - vz_imt = dev_conserved[o3 * n_cells + id] / d_imt; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_imt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE - p_imt = (dev_conserved[4 * n_cells + id] - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) * - (gamma - 1.0); - #endif // PRESSURE_DE - p_imt = fmax(p_imt, (Real)TINY_NUMBER); - #ifdef DE - ge_imt = dge / d_imt; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_imt[i] = dev_conserved[(5 + i) * n_cells + id] / d_imt; - } - #endif // SCALAR - // cell i+2 - if (dir == 0) { - id = xid + 2 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid + 2) * nx + zid * nx * ny; - } - if (dir == 2) { - id = xid + yid * nx + (zid + 2) * nx * ny; - } - d_ipt = dev_conserved[id]; - vx_ipt = dev_conserved[o1 * n_cells + id] / d_ipt; - vy_ipt = dev_conserved[o2 * n_cells + id] / d_ipt; - vz_ipt = dev_conserved[o3 * n_cells + id] / d_ipt; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_ipt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else // not DE - p_ipt = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) * - (gamma - 1.0); - #endif // PRESSURE_DE - p_ipt = fmax(p_ipt, (Real)TINY_NUMBER); - #ifdef DE - ge_ipt = dge / d_ipt; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_ipt[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipt; - } - #endif // SCALAR - - // printf("%d %d %d %f %f %f %f %f\n", xid, yid, zid, d_i, vx_i, vy_i, vz_i, - // p_i); - - // Steps 2 - 5 are repeated for cell i-1, i, and i+1 - // Step 2 - Compute the left, right, centered, and van Leer differences of - // the primitive variables - // Note that here L and R refer to locations relative to the cell - // center Stone Eqn 36 - - // calculate the adiabatic sound speed in cell imo - a = sqrt(gamma * p_imo / d_imo); - - // left - del_d_L = d_imo - d_imt; - del_vx_L = vx_imo - vx_imt; - del_vy_L = vy_imo - vy_imt; - del_vz_L = vz_imo - vz_imt; - del_p_L = p_imo - p_imt; - - // right - del_d_R = d_i - d_imo; - del_vx_R = vx_i - vx_imo; - del_vy_R = vy_i - vy_imo; - del_vz_R = vz_i - vz_imo; - del_p_R = p_i - p_imo; - - // centered - del_d_C = 0.5 * (d_i - d_imt); - del_vx_C = 0.5 * (vx_i - vx_imt); - del_vy_C = 0.5 * (vy_i - vy_imt); - del_vz_C = 0.5 * (vz_i - vz_imt); - del_p_C = 0.5 * (p_i - p_imt); - - // Van Leer - if (del_d_L * del_d_R > 0.0) { - del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); - } else { - del_d_G = 0.0; - } - if (del_vx_L * del_vx_R > 0.0) { - del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); - } else { - del_vx_G = 0.0; - } - if (del_vy_L * del_vy_R > 0.0) { - del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); - } else { - del_vy_G = 0.0; - } - if (del_vz_L * del_vz_R > 0.0) { - del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); - } else { - del_vz_G = 0.0; - } - if (del_p_L * del_p_R > 0.0) { - del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); - } else { - del_p_G = 0.0; - } + // load the 5-cell stencil into registers + // cell i + reconstruction::Primitive const cell_i = + reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); - #ifdef DE - del_ge_L = ge_imo - ge_imt; - del_ge_R = ge_i - ge_imo; - del_ge_C = 0.5 * (ge_i - ge_imt); - if (del_ge_L * del_ge_R > 0.0) { - del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); - } else { - del_ge_G = 0.0; - } - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = scalar_imo[i] - scalar_imt[i]; - del_scalar_R[i] = scalar_i[i] - scalar_imo[i]; - del_scalar_C[i] = 0.5 * (scalar_i[i] - scalar_imt[i]); - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); - } else { - del_scalar_G[i] = 0.0; - } - } - #endif // SCALAR - - // Step 3 - Project the left, right, centered and van Leer differences onto - // the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, - // see Stone for notation) Use the eigenvectors given in Stone - // 2008, Appendix A - - del_a_0_L = -0.5 * d_imo * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_1_L = del_d_L - del_p_L / (a * a); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * d_imo * del_vx_L / a + 0.5 * del_p_L / (a * a); - - del_a_0_R = -0.5 * d_imo * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_1_R = del_d_R - del_p_R / (a * a); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * d_imo * del_vx_R / a + 0.5 * del_p_R / (a * a); - - del_a_0_C = -0.5 * d_imo * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_1_C = del_d_C - del_p_C / (a * a); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * d_imo * del_vx_C / a + 0.5 * del_p_C / (a * a); - - del_a_0_G = -0.5 * d_imo * del_vx_G / a + 0.5 * del_p_G / (a * a); - del_a_1_G = del_d_G - del_p_G / (a * a); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * d_imo * del_vx_G / a + 0.5 * del_p_G / (a * a); - - // Step 4 - Apply monotonicity constraints to the differences in the - // characteristic variables - // Stone Eqn 38 - - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - - if (del_a_0_L * del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_1_L * del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_2_L * del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_3_L * del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_4_L * del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - #ifdef DE - if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_imo = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_ge_m_imo = 0.0; - } - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_imo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_scalar_m_imo[i] = 0.0; - } - } - #endif // SCALAR - - // Step 5 - Project the monotonized difference in the characteristic - // variables back onto the - // primitive variables - // Stone Eqn 39 - - del_d_m_imo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_imo = -a * del_a_0_m / d_imo + a * del_a_4_m / d_imo; - del_vy_m_imo = del_a_2_m; - del_vz_m_imo = del_a_3_m; - del_p_m_imo = a * a * del_a_0_m + a * a * del_a_4_m; - - // Step 2 - Compute the left, right, centered, and van Leer differences of - // the primitive variables - // Note that here L and R refer to locations relative to the cell - // center Stone Eqn 36 - - // calculate the adiabatic sound speed in cell i - a = sqrt(gamma * p_i / d_i); - - // left - del_d_L = d_i - d_imo; - del_vx_L = vx_i - vx_imo; - del_vy_L = vy_i - vy_imo; - del_vz_L = vz_i - vz_imo; - del_p_L = p_i - p_imo; - - // right - del_d_R = d_ipo - d_i; - del_vx_R = vx_ipo - vx_i; - del_vy_R = vy_ipo - vy_i; - del_vz_R = vz_ipo - vz_i; - del_p_R = p_ipo - p_i; - - // centered - del_d_C = 0.5 * (d_ipo - d_imo); - del_vx_C = 0.5 * (vx_ipo - vx_imo); - del_vy_C = 0.5 * (vy_ipo - vy_imo); - del_vz_C = 0.5 * (vz_ipo - vz_imo); - del_p_C = 0.5 * (p_ipo - p_imo); - - // van Leer - if (del_d_L * del_d_R > 0.0) { - del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); - } else { - del_d_G = 0.0; - } - if (del_vx_L * del_vx_R > 0.0) { - del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); - } else { - del_vx_G = 0.0; - } - if (del_vy_L * del_vy_R > 0.0) { - del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); - } else { - del_vy_G = 0.0; - } - if (del_vz_L * del_vz_R > 0.0) { - del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); - } else { - del_vz_G = 0.0; - } - if (del_p_L * del_p_R > 0.0) { - del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); - } else { - del_p_G = 0.0; - } + // cell i-1. The equality checks check the direction and subtracts one from the direction + // im1 stands for "i minus 1" + reconstruction::Primitive const cell_im1 = reconstruction::Load_Data( + dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - #ifdef DE - del_ge_L = ge_i - ge_imo; - del_ge_R = ge_ipo - ge_i; - del_ge_C = 0.5 * (ge_ipo - ge_imo); - if (del_ge_L * del_ge_R > 0.0) { - del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); - } else { - del_ge_G = 0.0; - } - #endif // DE + // cell i+1. The equality checks check the direction and adds one to the direction + // ip1 stands for "i plus 1" + reconstruction::Primitive const cell_ip1 = reconstruction::Load_Data( + dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = scalar_i[i] - scalar_imo[i]; - del_scalar_R[i] = scalar_ipo[i] - scalar_i[i]; - del_scalar_C[i] = 0.5 * (scalar_ipo[i] - scalar_imo[i]); - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); - } else { - del_scalar_G[i] = 0.0; - } - } - #endif // SCALAR - - // Step 3 - Project the left, right, centered, and van Leer differences onto - // the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, - // see Stone for notation) Use the eigenvectors given in Stone - // 2008, Appendix A - - del_a_0_L = -0.5 * d_i * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_1_L = del_d_L - del_p_L / (a * a); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * d_i * del_vx_L / a + 0.5 * del_p_L / (a * a); - - del_a_0_R = -0.5 * d_i * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_1_R = del_d_R - del_p_R / (a * a); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * d_i * del_vx_R / a + 0.5 * del_p_R / (a * a); - - del_a_0_C = -0.5 * d_i * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_1_C = del_d_C - del_p_C / (a * a); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * d_i * del_vx_C / a + 0.5 * del_p_C / (a * a); - - del_a_0_G = -0.5 * d_i * del_vx_G / a + 0.5 * del_p_G / (a * a); - del_a_1_G = del_d_G - del_p_G / (a * a); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * d_i * del_vx_G / a + 0.5 * del_p_G / (a * a); - - // Step 4 - Apply monotonicity constraints to the differences in the - // characteristic variables - // Stone Eqn 38 - - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - - if (del_a_0_L * del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_1_L * del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_2_L * del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_3_L * del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_4_L * del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - #ifdef DE - if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_i = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_ge_m_i = 0.0; - } - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_scalar_m_i[i] = 0.0; - } - } - #endif // SCALAR - - // Step 5 - Project the monotonized difference in the characteristic - // variables back onto the - // primitive variables - // Stone Eqn 39 - - del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -a * del_a_0_m / d_i + a * del_a_4_m / d_i; - del_vy_m_i = del_a_2_m; - del_vz_m_i = del_a_3_m; - del_p_m_i = a * a * del_a_0_m + a * a * del_a_4_m; - - // Step 2 - Compute the left, right, centered, and van Leer differences of - // the primitive variables - // Note that here L and R refer to locations relative to the cell - // center Stone Eqn 36 - - // calculate the adiabatic sound speed in cell ipo - a = sqrt(gamma * p_ipo / d_ipo); - - // left - del_d_L = d_ipo - d_i; - del_vx_L = vx_ipo - vx_i; - del_vy_L = vy_ipo - vy_i; - del_vz_L = vz_ipo - vz_i; - del_p_L = p_ipo - p_i; - - // right - del_d_R = d_ipt - d_ipo; - del_vx_R = vx_ipt - vx_ipo; - del_vy_R = vy_ipt - vy_ipo; - del_vz_R = vz_ipt - vz_ipo; - del_p_R = p_ipt - p_ipo; - - // centered - del_d_C = 0.5 * (d_ipt - d_i); - del_vx_C = 0.5 * (vx_ipt - vx_i); - del_vy_C = 0.5 * (vy_ipt - vy_i); - del_vz_C = 0.5 * (vz_ipt - vz_i); - del_p_C = 0.5 * (p_ipt - p_i); - - // van Leer - if (del_d_L * del_d_R > 0.0) { - del_d_G = 2.0 * del_d_L * del_d_R / (del_d_L + del_d_R); - } else { - del_d_G = 0.0; - } - if (del_vx_L * del_vx_R > 0.0) { - del_vx_G = 2.0 * del_vx_L * del_vx_R / (del_vx_L + del_vx_R); - } else { - del_vx_G = 0.0; - } - if (del_vy_L * del_vy_R > 0.0) { - del_vy_G = 2.0 * del_vy_L * del_vy_R / (del_vy_L + del_vy_R); - } else { - del_vy_G = 0.0; - } - if (del_vz_L * del_vz_R > 0.0) { - del_vz_G = 2.0 * del_vz_L * del_vz_R / (del_vz_L + del_vz_R); - } else { - del_vz_G = 0.0; - } - if (del_p_L * del_p_R > 0.0) { - del_p_G = 2.0 * del_p_L * del_p_R / (del_p_L + del_p_R); - } else { - del_p_G = 0.0; - } + // cell i-2. The equality checks check the direction and subtracts one from the direction + // im2 stands for "i minus 2" + reconstruction::Primitive const cell_im2 = + reconstruction::Load_Data(dev_conserved, xid - 2 * int(dir == 0), yid - 2 * int(dir == 1), + zid - 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - #ifdef DE - del_ge_L = ge_ipo - ge_i; - del_ge_R = ge_ipt - ge_ipo; - del_ge_C = 0.5 * (ge_ipt - ge_i); - if (del_ge_L * del_ge_R > 0.0) { - del_ge_G = 2.0 * del_ge_L * del_ge_R / (del_ge_L + del_ge_R); - } else { - del_ge_G = 0.0; - } - #endif // DE + // cell i+2. The equality checks check the direction and adds one to the direction + // ip2 stands for "i plus 2" + reconstruction::Primitive const cell_ip2 = + reconstruction::Load_Data(dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1), + zid + 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_L[i] = scalar_ipo[i] - scalar_i[i]; - del_scalar_R[i] = scalar_ipt[i] - scalar_ipo[i]; - del_scalar_C[i] = 0.5 * (scalar_ipt[i] - scalar_i[i]); - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - del_scalar_G[i] = 2.0 * del_scalar_L[i] * del_scalar_R[i] / (del_scalar_L[i] + del_scalar_R[i]); - } else { - del_scalar_G[i] = 0.0; - } - } - #endif // SCALAR - - // Step 3 - Project the left, right, centered, and van Leer differences onto - // the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, - // see Stone for notation) Use the eigenvectors given in Stone - // 2008, Appendix A - - del_a_0_L = -0.5 * d_ipo * del_vx_L / a + 0.5 * del_p_L / (a * a); - del_a_1_L = del_d_L - del_p_L / (a * a); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = 0.5 * d_ipo * del_vx_L / a + 0.5 * del_p_L / (a * a); - - del_a_0_R = -0.5 * d_ipo * del_vx_R / a + 0.5 * del_p_R / (a * a); - del_a_1_R = del_d_R - del_p_R / (a * a); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = 0.5 * d_ipo * del_vx_R / a + 0.5 * del_p_R / (a * a); - - del_a_0_C = -0.5 * d_ipo * del_vx_C / a + 0.5 * del_p_C / (a * a); - del_a_1_C = del_d_C - del_p_C / (a * a); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = 0.5 * d_ipo * del_vx_C / a + 0.5 * del_p_C / (a * a); - - del_a_0_G = -0.5 * d_ipo * del_vx_G / a + 0.5 * del_p_G / (a * a); - del_a_1_G = del_d_G - del_p_G / (a * a); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = 0.5 * d_ipo * del_vx_G / a + 0.5 * del_p_G / (a * a); - - // Step 4 - Apply monotonicity constraints to the differences in the - // characteristic variables - // Stone Eqn 38 - - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - - if (del_a_0_L * del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_1_L * del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_2_L * del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_3_L * del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - if (del_a_4_L * del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } - #ifdef DE - if (del_ge_L * del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_ipo = sgn_CUDA(del_ge_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_ge_m_ipo = 0.0; - } - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - if (del_scalar_L[i] * del_scalar_R[i] > 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_ipo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - } else { - del_scalar_m_ipo[i] = 0.0; - } - } - #endif // SCALAR - - // Step 5 - Project the monotonized difference in the characteristic - // variables back onto the - // primitive variables - // Stone Eqn 39 - - del_d_m_ipo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_ipo = -a * del_a_0_m / d_ipo + a * del_a_4_m / d_ipo; - del_vy_m_ipo = del_a_2_m; - del_vz_m_ipo = del_a_3_m; - del_p_m_ipo = a * a * del_a_0_m + a * a * del_a_4_m; - - // Step 6 - Use parabolic interpolation to compute values at the left and - // right of each cell center - // Here, the subscripts L and R refer to the left and right side of - // the ith cell center Stone Eqn 46 - - d_L = 0.5 * (d_i + d_imo) - (del_d_m_i - del_d_m_imo) / 6.0; - vx_L = 0.5 * (vx_i + vx_imo) - (del_vx_m_i - del_vx_m_imo) / 6.0; - vy_L = 0.5 * (vy_i + vy_imo) - (del_vy_m_i - del_vy_m_imo) / 6.0; - vz_L = 0.5 * (vz_i + vz_imo) - (del_vz_m_i - del_vz_m_imo) / 6.0; - p_L = 0.5 * (p_i + p_imo) - (del_p_m_i - del_p_m_imo) / 6.0; - - d_R = 0.5 * (d_ipo + d_i) - (del_d_m_ipo - del_d_m_i) / 6.0; - vx_R = 0.5 * (vx_ipo + vx_i) - (del_vx_m_ipo - del_vx_m_i) / 6.0; - vy_R = 0.5 * (vy_ipo + vy_i) - (del_vy_m_ipo - del_vy_m_i) / 6.0; - vz_R = 0.5 * (vz_ipo + vz_i) - (del_vz_m_ipo - del_vz_m_i) / 6.0; - p_R = 0.5 * (p_ipo + p_i) - (del_p_m_ipo - del_p_m_i) / 6.0; - - #ifdef DE - ge_L = 0.5 * (ge_i + ge_imo) - (del_ge_m_i - del_ge_m_imo) / 6.0; - ge_R = 0.5 * (ge_ipo + ge_i) - (del_ge_m_ipo - del_ge_m_i) / 6.0; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_L[i] = 0.5 * (scalar_i[i] + scalar_imo[i]) - (del_scalar_m_i[i] - del_scalar_m_imo[i]) / 6.0; - scalar_R[i] = 0.5 * (scalar_ipo[i] + scalar_i[i]) - (del_scalar_m_ipo[i] - del_scalar_m_i[i]) / 6.0; - } - #endif // SCALAR + // Steps 2 - 5 are repeated for cell i-1, i, and i+1 - // Step 7 - Apply further monotonicity constraints to ensure the values on - // the left and right side - // of cell center lie between neighboring cell-centered values - // Stone Eqns 47 - 53 + // =============== + // Cell i-1 slopes + // =============== - if ((d_R - d_i) * (d_i - d_L) <= 0) { - d_L = d_R = d_i; - } - if ((vx_R - vx_i) * (vx_i - vx_L) <= 0) { - vx_L = vx_R = vx_i; - } - if ((vy_R - vy_i) * (vy_i - vy_L) <= 0) { - vy_L = vy_R = vy_i; - } - if ((vz_R - vz_i) * (vz_i - vz_L) <= 0) { - vz_L = vz_R = vz_i; - } - if ((p_R - p_i) * (p_i - p_L) <= 0) { - p_L = p_R = p_i; - } + // calculate the adiabatic sound speed in cell im1 + Real sound_speed = hydro_utilities::Calc_Sound_Speed(cell_im1.pressure, cell_im1.density, gamma); + // this isn't actually used and the compiler should optimize it away but since this is the only reconstruction + // function that won't use it it was easier to add it here as an unused variable + reconstruction::EigenVecs eigenvector; - if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) > (d_R - d_L) * (d_R - d_L)) { - d_L = 3.0 * d_i - 2.0 * d_R; - } - if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) > (vx_R - vx_L) * (vx_R - vx_L)) { - vx_L = 3.0 * vx_i - 2.0 * vx_R; - } - if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) > (vy_R - vy_L) * (vy_R - vy_L)) { - vy_L = 3.0 * vy_i - 2.0 * vy_R; - } - if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) > (vz_R - vz_L) * (vz_R - vz_L)) { - vz_L = 3.0 * vz_i - 2.0 * vz_R; - } - if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) > (p_R - p_L) * (p_R - p_L)) { - p_L = 3.0 * p_i - 2.0 * p_R; - } + // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L + // and R refer to locations relative to the cell center Stone Eqn 36 - if (6.0 * (d_R - d_L) * (d_i - 0.5 * (d_L + d_R)) < -(d_R - d_L) * (d_R - d_L)) { - d_R = 3.0 * d_i - 2.0 * d_L; - } - if (6.0 * (vx_R - vx_L) * (vx_i - 0.5 * (vx_L + vx_R)) < -(vx_R - vx_L) * (vx_R - vx_L)) { - vx_R = 3.0 * vx_i - 2.0 * vx_L; - } - if (6.0 * (vy_R - vy_L) * (vy_i - 0.5 * (vy_L + vy_R)) < -(vy_R - vy_L) * (vy_R - vy_L)) { - vy_R = 3.0 * vy_i - 2.0 * vy_L; - } - if (6.0 * (vz_R - vz_L) * (vz_i - 0.5 * (vz_L + vz_R)) < -(vz_R - vz_L) * (vz_R - vz_L)) { - vz_R = 3.0 * vz_i - 2.0 * vz_L; - } - if (6.0 * (p_R - p_L) * (p_i - 0.5 * (p_L + p_R)) < -(p_R - p_L) * (p_R - p_L)) { - p_R = 3.0 * p_i - 2.0 * p_L; - } + // left + reconstruction::Primitive del_L = reconstruction::Compute_Slope(cell_im2, cell_im1); - d_L = fmax(fmin(d_i, d_imo), d_L); - d_L = fmin(fmax(d_i, d_imo), d_L); - d_R = fmax(fmin(d_i, d_ipo), d_R); - d_R = fmin(fmax(d_i, d_ipo), d_R); - vx_L = fmax(fmin(vx_i, vx_imo), vx_L); - vx_L = fmin(fmax(vx_i, vx_imo), vx_L); - vx_R = fmax(fmin(vx_i, vx_ipo), vx_R); - vx_R = fmin(fmax(vx_i, vx_ipo), vx_R); - vy_L = fmax(fmin(vy_i, vy_imo), vy_L); - vy_L = fmin(fmax(vy_i, vy_imo), vy_L); - vy_R = fmax(fmin(vy_i, vy_ipo), vy_R); - vy_R = fmin(fmax(vy_i, vy_ipo), vy_R); - vz_L = fmax(fmin(vz_i, vz_imo), vz_L); - vz_L = fmin(fmax(vz_i, vz_imo), vz_L); - vz_R = fmax(fmin(vz_i, vz_ipo), vz_R); - vz_R = fmin(fmax(vz_i, vz_ipo), vz_R); - p_L = fmax(fmin(p_i, p_imo), p_L); - p_L = fmin(fmax(p_i, p_imo), p_L); - p_R = fmax(fmin(p_i, p_ipo), p_R); - p_R = fmin(fmax(p_i, p_ipo), p_R); - - #ifdef DE - if ((ge_R - ge_i) * (ge_i - ge_L) <= 0) { - ge_L = ge_R = ge_i; - } - if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) > (ge_R - ge_L) * (ge_R - ge_L)) { - ge_L = 3.0 * ge_i - 2.0 * ge_R; - } - if (6.0 * (ge_R - ge_L) * (ge_i - 0.5 * (ge_L + ge_R)) < -(ge_R - ge_L) * (ge_R - ge_L)) { - ge_R = 3.0 * ge_i - 2.0 * ge_L; - } - ge_L = fmax(fmin(ge_i, ge_imo), ge_L); - ge_L = fmin(fmax(ge_i, ge_imo), ge_L); - ge_R = fmax(fmin(ge_i, ge_ipo), ge_R); - ge_R = fmin(fmax(ge_i, ge_ipo), ge_R); - #endif // DE + // right + reconstruction::Primitive del_R = reconstruction::Compute_Slope(cell_im1, cell_i); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - if ((scalar_R[i] - scalar_i[i]) * (scalar_i[i] - scalar_L[i]) <= 0) { - scalar_L[i] = scalar_R[i] = scalar_i[i]; - } - if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) > - (scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { - scalar_L[i] = 3.0 * scalar_i[i] - 2.0 * scalar_R[i]; - } - if (6.0 * (scalar_R[i] - scalar_L[i]) * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])) < - -(scalar_R[i] - scalar_L[i]) * (scalar_R[i] - scalar_L[i])) { - scalar_R[i] = 3.0 * scalar_i[i] - 2.0 * scalar_L[i]; - } - scalar_L[i] = fmax(fmin(scalar_i[i], scalar_imo[i]), scalar_L[i]); - scalar_L[i] = fmin(fmax(scalar_i[i], scalar_imo[i]), scalar_L[i]); - scalar_R[i] = fmax(fmin(scalar_i[i], scalar_ipo[i]), scalar_R[i]); - scalar_R[i] = fmin(fmax(scalar_i[i], scalar_ipo[i]), scalar_R[i]); - } - #endif // SCALAR + // centered + reconstruction::Primitive del_C = reconstruction::Compute_Slope(cell_im2, cell_i, 0.5); - // #ifdef CTU - #ifndef VL + // Van Leer + reconstruction::Primitive del_G = reconstruction::Van_Leer_Slope(del_L, del_R); - // Step 8 - Compute the coefficients for the monotonized parabolic - // interpolation function - // Stone Eqn 54 + // Step 3 - Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + reconstruction::Characteristic del_a_L = reconstruction::Primitive_To_Characteristic( + cell_im1, del_L, eigenvector, sound_speed, sound_speed * sound_speed, gamma); - del_d_m_i = d_R - d_L; - del_vx_m_i = vx_R - vx_L; - del_vy_m_i = vy_R - vy_L; - del_vz_m_i = vz_R - vz_L; - del_p_m_i = p_R - p_L; + reconstruction::Characteristic del_a_R = reconstruction::Primitive_To_Characteristic( + cell_im1, del_R, eigenvector, sound_speed, sound_speed * sound_speed, gamma); - d_6 = 6.0 * (d_i - 0.5 * (d_L + d_R)); - vx_6 = 6.0 * (vx_i - 0.5 * (vx_L + vx_R)); - vy_6 = 6.0 * (vy_i - 0.5 * (vy_L + vy_R)); - vz_6 = 6.0 * (vz_i - 0.5 * (vz_L + vz_R)); - p_6 = 6.0 * (p_i - 0.5 * (p_L + p_R)); + reconstruction::Characteristic del_a_C = reconstruction::Primitive_To_Characteristic( + cell_im1, del_C, eigenvector, sound_speed, sound_speed * sound_speed, gamma); - #ifdef DE - del_ge_m_i = ge_R - ge_L; - ge_6 = 6.0 * (ge_i - 0.5 * (ge_L + ge_R)); - #endif // DE + reconstruction::Characteristic del_a_G = reconstruction::Primitive_To_Characteristic( + cell_im1, del_G, eigenvector, sound_speed, sound_speed * sound_speed, gamma); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar_m_i[i] = scalar_R[i] - scalar_L[i]; - scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); - } - #endif // SCALAR - - // Compute the eigenvalues of the linearized equations in the - // primitive variables using the cell-centered primitive variables - - // recalculate the adiabatic sound speed in cell i - a = sqrt(gamma * p_i / d_i); - - lambda_m = vx_i - a; - lambda_0 = vx_i; - lambda_p = vx_i + a; - - // Step 9 - Compute the left and right interface values using monotonized - // parabolic interpolation - // Stone Eqns 55 & 56 - - // largest eigenvalue - lambda_max = fmax(lambda_p, (Real)0); - // smallest eigenvalue - lambda_min = fmin(lambda_m, (Real)0); - - // left interface value, i+1/2 - d_R = d_R - lambda_max * (0.5 * dtodx) * (del_d_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); - vx_R = vx_R - lambda_max * (0.5 * dtodx) * (del_vx_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); - vy_R = vy_R - lambda_max * (0.5 * dtodx) * (del_vy_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); - vz_R = vz_R - lambda_max * (0.5 * dtodx) * (del_vz_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); - p_R = p_R - lambda_max * (0.5 * dtodx) * (del_p_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); - - // right interface value, i-1/2 - d_L = d_L - lambda_min * (0.5 * dtodx) * (del_d_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); - vx_L = vx_L - lambda_min * (0.5 * dtodx) * (del_vx_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); - vy_L = vy_L - lambda_min * (0.5 * dtodx) * (del_vy_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); - vz_L = vz_L - lambda_min * (0.5 * dtodx) * (del_vz_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); - p_L = p_L - lambda_min * (0.5 * dtodx) * (del_p_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); - - #ifdef DE - ge_R = ge_R - lambda_max * (0.5 * dtodx) * (del_ge_m_i - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); - ge_L = ge_L - lambda_min * (0.5 * dtodx) * (del_ge_m_i + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); - #endif // DE - - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_R[i] = scalar_R[i] - lambda_max * (0.5 * dtodx) * - (del_scalar_m_i[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); - scalar_L[i] = scalar_L[i] - lambda_min * (0.5 * dtodx) * - (del_scalar_m_i[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); - } - #endif // SCALAR - - // Step 10 - Perform the characteristic tracing - // Stone Eqns 57 - 60 - - // left-hand interface value, i+1/2 - sum_1 = 0; - sum_2 = 0; - sum_3 = 0; - sum_4 = 0; - sum_5 = 0; - #ifdef DE - sum_ge = 0; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0; - } - #endif // SCALAR + // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables + // Stone Eqn 39 + reconstruction::Primitive const del_m_im1 = reconstruction::Monotonize_Characteristic_Return_Primitive( + cell_im1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); - if (lambda_m >= 0) { - A = (0.5 * dtodx) * (lambda_p - lambda_m); - B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_m * lambda_m); + // ============= + // Cell i slopes + // ============= - chi_1 = A * (del_d_m_i - d_6) + B * d_6; - chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; - chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; - chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; - chi_5 = A * (del_p_m_i - p_6) + B * p_6; + // calculate the adiabatic sound speed in cell i + sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); - sum_1 += -0.5 * (d_i * chi_2 / a - chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 - chi_5 / (a * d_i)); - sum_5 += -0.5 * (d_i * chi_2 * a - chi_5); - } - if (lambda_0 >= 0) { - A = (0.5 * dtodx) * (lambda_p - lambda_0); - B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_0 * lambda_0); - - chi_1 = A * (del_d_m_i - d_6) + B * d_6; - chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; - chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; - chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; - chi_5 = A * (del_p_m_i - p_6) + B * p_6; - #ifdef DE - chi_ge = A * (del_ge_m_i - ge_6) + B * ge_6; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - chi_scalar[i] = A * (del_scalar_m_i[i] - scalar_6[i]) + B * scalar_6[i]; - } - #endif // SCALAR - - sum_1 += chi_1 - chi_5 / (a * a); - sum_3 += chi_3; - sum_4 += chi_4; - #ifdef DE - sum_ge += chi_ge; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += chi_scalar[i]; - } - #endif // SCALAR - } - if (lambda_p >= 0) { - A = (0.5 * dtodx) * (lambda_p - lambda_p); - B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_p * lambda_p); - - chi_1 = A * (del_d_m_i - d_6) + B * d_6; - chi_2 = A * (del_vx_m_i - vx_6) + B * vx_6; - chi_3 = A * (del_vy_m_i - vy_6) + B * vy_6; - chi_4 = A * (del_vz_m_i - vz_6) + B * vz_6; - chi_5 = A * (del_p_m_i - p_6) + B * p_6; - - sum_1 += 0.5 * (d_i * chi_2 / a + chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 + chi_5 / (a * d_i)); - sum_5 += 0.5 * (d_i * chi_2 * a + chi_5); - } + // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L + // and R refer to locations relative to the cell center Stone Eqn 36 - // add the corrections to the initial guesses for the interface values - d_R += sum_1; - vx_R += sum_2; - vy_R += sum_3; - vz_R += sum_4; - p_R += sum_5; - #ifdef DE - ge_R += sum_ge; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_R[i] += sum_scalar[i]; - } - #endif // SCALAR - - // right-hand interface value, i-1/2 - sum_1 = 0; - sum_2 = 0; - sum_3 = 0; - sum_4 = 0; - sum_5 = 0; - #ifdef DE - sum_ge = 0; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0; - } - #endif // SCALAR - if (lambda_m <= 0) { - C = (0.5 * dtodx) * (lambda_m - lambda_m); - D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); - - chi_1 = C * (del_d_m_i + d_6) + D * d_6; - chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; - chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; - chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; - chi_5 = C * (del_p_m_i + p_6) + D * p_6; - - sum_1 += -0.5 * (d_i * chi_2 / a - chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 - chi_5 / (a * d_i)); - sum_5 += -0.5 * (d_i * chi_2 * a - chi_5); - } - if (lambda_0 <= 0) { - C = (0.5 * dtodx) * (lambda_m - lambda_0); - D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_0 * lambda_0); - - chi_1 = C * (del_d_m_i + d_6) + D * d_6; - chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; - chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; - chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; - chi_5 = C * (del_p_m_i + p_6) + D * p_6; - #ifdef DE - chi_ge = C * (del_ge_m_i + ge_6) + D * ge_6; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - chi_scalar[i] = C * (del_scalar_m_i[i] + scalar_6[i]) + D * scalar_6[i]; - } - #endif // SCALAR - - sum_1 += chi_1 - chi_5 / (a * a); - sum_3 += chi_3; - sum_4 += chi_4; - #ifdef DE - sum_ge += chi_ge; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += chi_scalar[i]; - } - #endif // SCALAR - } - if (lambda_p <= 0) { - C = (0.5 * dtodx) * (lambda_m - lambda_p); - D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_p * lambda_p); - - chi_1 = C * (del_d_m_i + d_6) + D * d_6; - chi_2 = C * (del_vx_m_i + vx_6) + D * vx_6; - chi_3 = C * (del_vy_m_i + vy_6) + D * vy_6; - chi_4 = C * (del_vz_m_i + vz_6) + D * vz_6; - chi_5 = C * (del_p_m_i + p_6) + D * p_6; - - sum_1 += 0.5 * (d_i * chi_2 / a + chi_5 / (a * a)); - sum_2 += 0.5 * (chi_2 + chi_5 / (a * d_i)); - sum_5 += 0.5 * (d_i * chi_2 * a + chi_5); - } + // left + del_L = reconstruction::Compute_Slope(cell_im1, cell_i); - // add the corrections - d_L += sum_1; - vx_L += sum_2; - vy_L += sum_3; - vz_L += sum_4; - p_L += sum_5; - #ifdef DE - ge_L += sum_ge; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_L[i] += sum_scalar[i]; - } - #endif // SCALAR + // right + del_R = reconstruction::Compute_Slope(cell_i, cell_ip1); - #endif // VL, i.e. CTU was used for this section + // centered + del_C = reconstruction::Compute_Slope(cell_im1, cell_ip1, 0.5); - // enforce minimum values - d_L = fmax(d_L, (Real)TINY_NUMBER); - d_R = fmax(d_R, (Real)TINY_NUMBER); - p_L = fmax(p_L, (Real)TINY_NUMBER); - p_R = fmax(p_R, (Real)TINY_NUMBER); + // Van Leer + del_G = reconstruction::Van_Leer_Slope(del_L, del_R); + + // Step 3 - Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + del_a_L = reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); + + del_a_R = reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); + + del_a_C = reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); + + del_a_G = reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); + + // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables + // Stone Eqn 39 + reconstruction::Primitive del_m_i = reconstruction::Monotonize_Characteristic_Return_Primitive( + cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); + + // =============== + // Cell i+1 slopes + // =============== - // Step 11 - Send final values back from kernel + // calculate the adiabatic sound speed in cell ipo + sound_speed = hydro_utilities::Calc_Sound_Speed(cell_ip1.pressure, cell_ip1.density, gamma); - // bounds_R refers to the right side of the i-1/2 interface - if (dir == 0) { - id = xid - 1 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid - 1) * nx + zid * nx * ny; + // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L + // and R refer to locations relative to the cell center Stone Eqn 36 + + // left + del_L = reconstruction::Compute_Slope(cell_i, cell_ip1); + + // right + del_R = reconstruction::Compute_Slope(cell_ip1, cell_ip2); + + // centered + del_C = reconstruction::Compute_Slope(cell_i, cell_ip2, 0.5); + + // Van Leer + del_G = reconstruction::Van_Leer_Slope(del_L, del_R); + + // Step 3 - Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + del_a_L = reconstruction::Primitive_To_Characteristic(cell_ip1, del_L, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); + + del_a_R = reconstruction::Primitive_To_Characteristic(cell_ip1, del_R, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); + + del_a_C = reconstruction::Primitive_To_Characteristic(cell_ip1, del_C, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); + + del_a_G = reconstruction::Primitive_To_Characteristic(cell_ip1, del_G, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); + + // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables + // Stone Eqn 39 + reconstruction::Primitive const del_m_ip1 = reconstruction::Monotonize_Characteristic_Return_Primitive( + cell_ip1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); + + // Step 6 - Use parabolic interpolation to compute values at the left and right of each cell center Here, the + // subscripts L and R refer to the left and right side of the ith cell center Stone Eqn 46 + reconstruction::Primitive interface_L_iph = + reconstruction::Calc_Interface_Parabolic(cell_ip1, cell_i, del_m_ip1, del_m_i); + + reconstruction::Primitive interface_R_imh = + reconstruction::Calc_Interface_Parabolic(cell_i, cell_im1, del_m_i, del_m_im1); + + // Step 7 - Apply further monotonicity constraints to ensure the values on the left and right side of cell center lie + // between neighboring cell-centered values Stone Eqns 47 - 53 + reconstruction::Monotonize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); + + // This is the beginning of the characteristic tracing + // Step 8 - Compute the coefficients for the monotonized parabolic + // interpolation function + // Stone Eqn 54 + + del_m_i.density = interface_L_iph.density - interface_R_imh.density; + del_m_i.velocity_x = interface_L_iph.velocity_x - interface_R_imh.velocity_x; + del_m_i.velocity_y = interface_L_iph.velocity_y - interface_R_imh.velocity_y; + del_m_i.velocity_z = interface_L_iph.velocity_z - interface_R_imh.velocity_z; + del_m_i.pressure = interface_L_iph.pressure - interface_R_imh.pressure; + + Real const d_6 = 6.0 * (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density)); + Real const vx_6 = 6.0 * (cell_i.velocity_x - 0.5 * (interface_R_imh.velocity_x + interface_L_iph.velocity_x)); + Real const vy_6 = 6.0 * (cell_i.velocity_y - 0.5 * (interface_R_imh.velocity_y + interface_L_iph.velocity_y)); + Real const vz_6 = 6.0 * (cell_i.velocity_z - 0.5 * (interface_R_imh.velocity_z + interface_L_iph.velocity_z)); + Real const p_6 = 6.0 * (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)); + +#ifdef DE + del_m_i.gas_energy = interface_L_iph.gas_energy - interface_R_imh.gas_energy; + Real const ge_6 = 6.0 * (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy)); +#endif // DE + +#ifdef SCALAR + Real scalar_6[NSCALARS]; + for (int i = 0; i < NSCALARS; i++) { + del_m_i.scalar[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i]; + scalar_6[i] = 6.0 * (cell_i.scalar[i] - 0.5 * (interface_R_imh.scalar[i] + interface_L_iph.scalar[i])); + } +#endif // SCALAR + + // Compute the eigenvalues of the linearized equations in the + // primitive variables using the cell-centered primitive variables + + // recalculate the adiabatic sound speed in cell i + sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + + Real const lambda_m = cell_i.velocity_x - sound_speed; + Real const lambda_0 = cell_i.velocity_x; + Real const lambda_p = cell_i.velocity_x + sound_speed; + + // Step 9 - Compute the left and right interface values using monotonized + // parabolic interpolation + // Stone Eqns 55 & 56 + + // largest eigenvalue + Real const lambda_max = fmax(lambda_p, (Real)0); + // smallest eigenvalue + Real const lambda_min = fmin(lambda_m, (Real)0); + + // left interface value, i+1/2 + Real const dtodx = dt / dx; + interface_L_iph.density = + interface_L_iph.density - + lambda_max * (0.5 * dtodx) * (del_m_i.density - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); + interface_L_iph.velocity_x = + interface_L_iph.velocity_x - + lambda_max * (0.5 * dtodx) * (del_m_i.velocity_x - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); + interface_L_iph.velocity_y = + interface_L_iph.velocity_y - + lambda_max * (0.5 * dtodx) * (del_m_i.velocity_y - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); + interface_L_iph.velocity_z = + interface_L_iph.velocity_z - + lambda_max * (0.5 * dtodx) * (del_m_i.velocity_z - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); + interface_L_iph.pressure = + interface_L_iph.pressure - + lambda_max * (0.5 * dtodx) * (del_m_i.pressure - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); + + // right interface value, i-1/2 + interface_R_imh.density = + interface_R_imh.density - + lambda_min * (0.5 * dtodx) * (del_m_i.density + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); + interface_R_imh.velocity_x = + interface_R_imh.velocity_x - + lambda_min * (0.5 * dtodx) * (del_m_i.velocity_x + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); + interface_R_imh.velocity_y = + interface_R_imh.velocity_y - + lambda_min * (0.5 * dtodx) * (del_m_i.velocity_y + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); + interface_R_imh.velocity_z = + interface_R_imh.velocity_z - + lambda_min * (0.5 * dtodx) * (del_m_i.velocity_z + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); + interface_R_imh.pressure = + interface_R_imh.pressure - + lambda_min * (0.5 * dtodx) * (del_m_i.pressure + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); + +#ifdef DE + interface_L_iph.gas_energy = + interface_L_iph.gas_energy - + lambda_max * (0.5 * dtodx) * (del_m_i.gas_energy - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); + interface_R_imh.gas_energy = + interface_R_imh.gas_energy - + lambda_min * (0.5 * dtodx) * (del_m_i.gas_energy + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); +#endif // DE + +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_L_iph.scalar[i] = + interface_L_iph.scalar[i] - + lambda_max * (0.5 * dtodx) * (del_m_i.scalar[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); + interface_R_imh.scalar[i] = + interface_R_imh.scalar[i] - + lambda_min * (0.5 * dtodx) * (del_m_i.scalar[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); + } +#endif // SCALAR + + // Step 10 - Perform the characteristic tracing + // Stone Eqns 57 - 60 + + // left-hand interface value, i+1/2 + Real sum_1 = 0, sum_2 = 0, sum_3 = 0, sum_4 = 0, sum_5 = 0; +#ifdef DE + Real sum_ge = 0; + Real chi_ge = 0; +#endif // DE +#ifdef SCALAR + Real chi_scalar[NSCALARS]; + Real sum_scalar[NSCALARS]; + for (Real &val : sum_scalar) { + val = 0; + } +#endif // SCALAR + + if (lambda_m >= 0) { + Real const A = (0.5 * dtodx) * (lambda_p - lambda_m); + Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_m * lambda_m); + + Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6; + Real const chi_2 = A * (del_m_i.velocity_x - vx_6) + B * vx_6; + Real const chi_3 = A * (del_m_i.velocity_y - vy_6) + B * vy_6; + Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6; + Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; + + sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); + sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5); + } + if (lambda_0 >= 0) { + Real const A = (0.5 * dtodx) * (lambda_p - lambda_0); + Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_0 * lambda_0); + + Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6; + Real const chi_2 = A * (del_m_i.velocity_x - vx_6) + B * vx_6; + Real const chi_3 = A * (del_m_i.velocity_y - vy_6) + B * vy_6; + Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6; + Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; +#ifdef DE + chi_ge = A * (del_m_i.gas_energy - ge_6) + B * ge_6; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + chi_scalar[i] = A * (del_m_i.scalar[i] - scalar_6[i]) + B * scalar_6[i]; } - if (dir == 2) { - id = xid + yid * nx + (zid - 1) * nx * ny; +#endif // SCALAR + + sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed); + sum_3 += chi_3; + sum_4 += chi_4; +#ifdef DE + sum_ge += chi_ge; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += chi_scalar[i]; } - dev_bounds_R[id] = d_L; - dev_bounds_R[o1 * n_cells + id] = d_L * vx_L; - dev_bounds_R[o2 * n_cells + id] = d_L * vy_L; - dev_bounds_R[o3 * n_cells + id] = d_L * vz_L; - dev_bounds_R[4 * n_cells + id] = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); - #ifdef SCALAR +#endif // SCALAR + } + if (lambda_p >= 0) { + Real const A = (0.5 * dtodx) * (lambda_p - lambda_p); + Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_p * lambda_p); + + Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6; + Real const chi_2 = A * (del_m_i.velocity_x - vx_6) + B * vx_6; + Real const chi_3 = A * (del_m_i.velocity_y - vy_6) + B * vy_6; + Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6; + Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; + + sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); + sum_5 += 0.5 * (cell_i.density * chi_2 * sound_speed + chi_5); + } + + // add the corrections to the initial guesses for the interface values + interface_L_iph.density += sum_1; + interface_L_iph.velocity_x += sum_2; + interface_L_iph.velocity_y += sum_3; + interface_L_iph.velocity_z += sum_4; + interface_L_iph.pressure += sum_5; +#ifdef DE + interface_L_iph.gas_energy += sum_ge; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_L_iph.scalar[i] += sum_scalar[i]; + } +#endif // SCALAR + + // right-hand interface value, i-1/2 + sum_1 = 0; + sum_2 = 0; + sum_3 = 0; + sum_4 = 0; + sum_5 = 0; +#ifdef DE + sum_ge = 0; +#endif // DE +#ifdef SCALAR + for (Real &val : sum_scalar) { + val = 0; + } +#endif // SCALAR + if (lambda_m <= 0) { + Real const C = (0.5 * dtodx) * (lambda_m - lambda_m); + Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); + + Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6; + Real const chi_2 = C * (del_m_i.velocity_x + vx_6) + D * vx_6; + Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6; + Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6; + Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; + + sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); + sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5); + } + if (lambda_0 <= 0) { + Real const C = (0.5 * dtodx) * (lambda_m - lambda_0); + Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_0 * lambda_0); + + Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6; + Real const chi_2 = C * (del_m_i.velocity_x + vx_6) + D * vx_6; + Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6; + Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6; + Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; +#ifdef DE + chi_ge = C * (del_m_i.gas_energy + ge_6) + D * ge_6; +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - dev_bounds_R[(5 + i) * n_cells + id] = d_L * scalar_L[i]; + chi_scalar[i] = C * (del_m_i.scalar[i] + scalar_6[i]) + D * scalar_6[i]; } - #endif // SCALAR - #ifdef DE - dev_bounds_R[(n_fields - 1) * n_cells + id] = d_L * ge_L; - #endif // DE - // bounds_L refers to the left side of the i+1/2 interface - id = xid + yid * nx + zid * nx * ny; - dev_bounds_L[id] = d_R; - dev_bounds_L[o1 * n_cells + id] = d_R * vx_R; - dev_bounds_L[o2 * n_cells + id] = d_R * vy_R; - dev_bounds_L[o3 * n_cells + id] = d_R * vz_R; - dev_bounds_L[4 * n_cells + id] = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); - #ifdef SCALAR +#endif // SCALAR + + sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed); + sum_3 += chi_3; + sum_4 += chi_4; +#ifdef DE + sum_ge += chi_ge; +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - dev_bounds_L[(5 + i) * n_cells + id] = d_R * scalar_R[i]; + sum_scalar[i] += chi_scalar[i]; } - #endif // SCALAR - #ifdef DE - dev_bounds_L[(n_fields - 1) * n_cells + id] = d_R * ge_R; - #endif // DE +#endif // SCALAR + } + if (lambda_p <= 0) { + Real const C = (0.5 * dtodx) * (lambda_m - lambda_p); + Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_p * lambda_p); + + Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6; + Real const chi_2 = C * (del_m_i.velocity_x + vx_6) + D * vx_6; + Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6; + Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6; + Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; + + sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); + sum_5 += 0.5 * (cell_i.density * chi_2 * sound_speed + chi_5); + } + + // add the corrections + interface_R_imh.density += sum_1; + interface_R_imh.velocity_x += sum_2; + interface_R_imh.velocity_y += sum_3; + interface_R_imh.velocity_z += sum_4; + interface_R_imh.pressure += sum_5; +#ifdef DE + interface_R_imh.gas_energy += sum_ge; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_R_imh.scalar[i] += sum_scalar[i]; } +#endif // SCALAR + + // This is the end of the characteristic tracing + + // enforce minimum values + interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); + interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); + interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); + interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); + + // Step 11 - Send final values back from kernel + + // Convert the left and right states in the primitive to the conserved variables send final values back from kernel + // bounds_R refers to the right side of the i-1/2 interface + size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); + + id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); + reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); } +// ===================================================================================================================== + +// ===================================================================================================================== +__global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real gamma, int dir) +{ + // get a thread ID + int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); + + // Ensure that we are only operating on cells that will be used + if (reconstruction::Thread_Guard<3>(nx, ny, nz, xid, yid, zid)) { + return; + } - #endif // PPMC -#endif // CUDA + // Compute the total number of cells + int const n_cells = nx * ny * nz; + + // Set the field indices for the various directions + int o1, o2, o3; + switch (dir) { + case 0: + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + break; + case 1: + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + break; + case 2: + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; + break; + } + + // load the 5-cell stencil into registers + // cell i + reconstruction::Primitive const cell_i = + reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i-1. The equality checks the direction and will subtract one from the correct direction + // im1 stands for "i minus 1" + reconstruction::Primitive const cell_im1 = reconstruction::Load_Data( + dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i+1. The equality checks the direction and add one to the correct direction + // ip1 stands for "i plus 1" + reconstruction::Primitive const cell_ip1 = reconstruction::Load_Data( + dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i-2. The equality checks the direction and will subtract two from the correct direction + // im2 stands for "i minus 2" + reconstruction::Primitive const cell_im2 = + reconstruction::Load_Data(dev_conserved, xid - 2 * int(dir == 0), yid - 2 * int(dir == 1), + zid - 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i+2. The equality checks the direction and add two to the correct direction + // ip2 stands for "i plus 2" + reconstruction::Primitive const cell_ip2 = + reconstruction::Load_Data(dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1), + zid + 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // Convert to the characteristic variables + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + +#ifdef MHD + reconstruction::EigenVecs eigenvectors = + reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma); +#else + reconstruction::EigenVecs eigenvectors; +#endif // MHD + + // Cell i + reconstruction::Characteristic const cell_i_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_i, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Cell i-1 + reconstruction::Characteristic const cell_im1_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_im1, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Cell i-2 + reconstruction::Characteristic const cell_im2_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_im2, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Cell i+1 + reconstruction::Characteristic const cell_ip1_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_ip1, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Cell i+2 + reconstruction::Characteristic const cell_ip2_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_ip2, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Compute the interface states for each field + reconstruction::Characteristic interface_R_imh_characteristic, interface_L_iph_characteristic; + + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a0, cell_im1_characteristic.a0, cell_i_characteristic.a0, + cell_ip1_characteristic.a0, cell_ip2_characteristic.a0, + interface_L_iph_characteristic.a0, interface_R_imh_characteristic.a0); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a1, cell_im1_characteristic.a1, cell_i_characteristic.a1, + cell_ip1_characteristic.a1, cell_ip2_characteristic.a1, + interface_L_iph_characteristic.a1, interface_R_imh_characteristic.a1); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a2, cell_im1_characteristic.a2, cell_i_characteristic.a2, + cell_ip1_characteristic.a2, cell_ip2_characteristic.a2, + interface_L_iph_characteristic.a2, interface_R_imh_characteristic.a2); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a3, cell_im1_characteristic.a3, cell_i_characteristic.a3, + cell_ip1_characteristic.a3, cell_ip2_characteristic.a3, + interface_L_iph_characteristic.a3, interface_R_imh_characteristic.a3); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a4, cell_im1_characteristic.a4, cell_i_characteristic.a4, + cell_ip1_characteristic.a4, cell_ip2_characteristic.a4, + interface_L_iph_characteristic.a4, interface_R_imh_characteristic.a4); + +#ifdef MHD + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a5, cell_im1_characteristic.a5, cell_i_characteristic.a5, + cell_ip1_characteristic.a5, cell_ip2_characteristic.a5, + interface_L_iph_characteristic.a5, interface_R_imh_characteristic.a5); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a6, cell_im1_characteristic.a6, cell_i_characteristic.a6, + cell_ip1_characteristic.a6, cell_ip2_characteristic.a6, + interface_L_iph_characteristic.a6, interface_R_imh_characteristic.a6); +#endif // MHD + + // Convert back to primitive variables + reconstruction::Primitive interface_L_iph = reconstruction::Characteristic_To_Primitive( + cell_i, interface_L_iph_characteristic, eigenvectors, sound_speed, sound_speed_squared, gamma); + reconstruction::Primitive interface_R_imh = reconstruction::Characteristic_To_Primitive( + cell_i, interface_R_imh_characteristic, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Compute the interfaces for the variables that don't have characteristics +#ifdef DE + reconstruction::PPM_Single_Variable(cell_im2.gas_energy, cell_im1.gas_energy, cell_i.gas_energy, cell_ip1.gas_energy, + cell_ip2.gas_energy, interface_L_iph.gas_energy, interface_R_imh.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + reconstruction::PPM_Single_Variable(cell_im2.scalar[i], cell_im1.scalar[i], cell_i.scalar[i], cell_ip1.scalar[i], + cell_ip2.scalar[i], interface_L_iph.scalar[i], interface_R_imh.scalar[i]); + } +#endif // SCALAR + + // enforce minimum values + interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); + interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); + interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); + interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); + + // Step 11 - Send final values back from kernel + + // Convert the left and right states in the primitive to the conserved variables send final values back from kernel + // bounds_R refers to the right side of the i-1/2 interface + size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); + + id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); + reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); +} +// ===================================================================================================================== diff --git a/src/reconstruction/ppmc_cuda.h b/src/reconstruction/ppmc_cuda.h index fc584ffb7..916853874 100644 --- a/src/reconstruction/ppmc_cuda.h +++ b/src/reconstruction/ppmc_cuda.h @@ -1,21 +1,53 @@ /*! \file ppmc_cuda.h * \brief Declarations of the cuda ppm kernels, characteristic reconstruction * version. */ -#ifdef CUDA - #ifdef PPMC - #ifndef PPMC_CUDA_H - #define PPMC_CUDA_H +#ifndef PPMC_CUDA_H +#define PPMC_CUDA_H - #include "../global/global.h" +#include "../global/global.h" -/*! \fn void PPMC(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, - int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir) - * \brief When passed a stencil of conserved variables, returns the left and - right boundary values for the interface calculated using ppm. */ -__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); +/*! + * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables and + * characteristic tracing. Used for the CTU and SIMPLE integrators. This uses the PPM method described in + * Stone et al. 2008 "Athena: A New Code for Astrophysical MHD". Fundementally this method relies on a Van Leer limiter + * in the characteristic variables to monotonize the slopes followed by limiting the interface states using the limiter + * from Colella & Woodward 1984. + * + * \param[in] dev_conserved The conserved variable array + * \param[out] dev_bounds_L The array of left interfaces + * \param[out] dev_bounds_R The array of right interfaces + * \param[in] nx The number of cells in the X-direction + * \param[in] ny The number of cells in the Y-direction + * \param[in] nz The number of cells in the Z-direction + * \param[in] dx The length of the cells in the `dir` direction + * \param[in] dt The time step + * \param[in] gamma The adiabatic index + * \param[in] dir The direction to reconstruct. 0=X, 1=Y, 2=Z + */ +__global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int dir); - #endif // PPMC_CUDA_H - #endif // PPMC -#endif // CUDA +/*! + * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables. Used for + * the VL (Van Leer) integrators. This uses the PPM method described in + * Felker & Stone 2018 "A fourth-order accurate finite volume method for ideal MHD via upwind constrained transport". + * This method computes the 3rd order interface then applies a mixture of monoticity constraints from from Colella & + * Sekora 2008, McCorquodale & Colella 2011, and Colella et al. 2011; for details see the + * `reconstruction::PPM_Single_Variable` function. We found that this newer method and limiters was more stable, less + * oscillatory, and faster than the method described in Stone et al. 2008 which is used in PPMC_CTU. The difference is + * most pronounced in the Brio & Wu shock tube where the PPM oscillations are much smaller using this method. + * + * \param[in] dev_conserved The conserved variable array + * \param[out] dev_bounds_L The array of left interfaces + * \param[out] dev_bounds_R The array of right interfaces + * \param[in] nx The number of cells in the X-direction + * \param[in] ny The number of cells in the Y-direction + * \param[in] nz The number of cells in the Z-direction + * \param[in] gamma The adiabatic index + * \param[in] dir The direction to reconstruct. 0=X, 1=Y, 2=Z + */ +__global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real gamma, int dir); + +#endif // PPMC_CUDA_H diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu new file mode 100644 index 000000000..1c7515ec0 --- /dev/null +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -0,0 +1,259 @@ +/*! + * \file ppmc_cuda_tests.cu + * \brief Tests for the contents of ppmc_cuda.h and ppmc_cuda.cu + * + */ + +// STL Includes +#include +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global.h" +#include "../io/io.h" +#include "../reconstruction/ppmc_cuda.h" +#include "../utils/DeviceVector.h" +#include "../utils/hydro_utilities.h" +#include "../utils/testing_utilities.h" + +TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); + + // Mock up needed information + size_t const nx = 6; + size_t const ny = 6; + size_t const nz = 6; + size_t const n_fields = 5; + double const dx = doubleRand(prng); + double const dt = doubleRand(prng); + double const gamma = 5.0 / 3.0; + + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(nx * ny * nz * n_fields); + for (double &val : host_grid) { + val = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + // Fiducial Data + std::vector> fiducial_interface_left = {{{86, 2.6558981128823214}, + {302, 0.84399195916314151}, + {518, 2.2002498722761787}, + {734, 1.764334292986655}, + {950, 3.3600925565746804}}, + {{86, 2.4950488327292639}, + {302, 0.79287723513518138}, + {518, 1.7614576990062414}, + {734, 1.8238574169157304}, + {950, 3.14294317122161}}, + {{86, 2.6558981128823214}, + {302, 0.84399195916314151}, + {518, 2.0109603398129137}, + {734, 1.764334292986655}, + {950, 3.2100231679403066}}}; + + std::vector> fiducial_interface_right = {{{85, 2.6558981128823214}, + {301, 0.84399195916314151}, + {517, 1.8381070277226794}, + {733, 1.764334292986655}, + {949, 3.0847691079841209}}, + {{80, 3.1281603739188069}, + {296, 0.99406757727427164}, + {512, 1.8732124042412865}, + {728, 1.6489758692176784}, + {944, 2.8820015278590443}}, + {{50, 2.6558981128823214}, + {266, 0.84399195916314151}, + {482, 2.0109603398129137}, + {698, 1.764334292986655}, + {914, 3.2100231679403066}}}; + + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(host_grid.size(), true); + cuda_utilities::DeviceVector dev_interface_right(host_grid.size(), true); + + // Launch kernel + hipLaunchKernelGGL(PPMC_CTU, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction); + CudaCheckError(); + CHECK(cudaDeviceSynchronize()); + + // Perform Comparison + for (size_t i = 0; i < host_grid.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} + +TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) +{ +#ifdef DE + /// This test doesn't support Dual Energy. It wouldn't be that hard to add support for DE but the DE parts of the + /// reconstructor (loading and PPM_Single_Variable) are well tested elsewhere so there's no need to add the extra + /// complexity here. + GTEST_SKIP(); +#endif // DE + + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); + + // Mock up needed information + size_t const nx = 6; + size_t const ny = 6; + size_t const nz = 6; + double const gamma = 5.0 / 3.0; +#ifdef MHD + size_t const n_fields = 8; +#else // not MHD + size_t const n_fields = 5; +#endif // MHD + + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(nx * ny * nz * n_fields); + for (double &val : host_grid) { + val = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + +// Fiducial Data +#ifdef MHD + std::vector> fiducial_interface_left = {{{86, 3.6926886385390683}, + {302, 2.3022467009220993}, + {518, 2.3207781368125389}, + {734, 2.6544338753333747}, + {950, 11.430630157120799}, + {1166, 0.6428577630032507}, + {1382, 4.1406925096276597}}, + {{86, 3.811691682348938}, + {302, 1.4827993897794758}, + {518, 2.3955690789476871}, + {734, 4.06241130448349}, + {950, 10.552876853630949}, + {1166, 3.5147238706385471}, + {1382, 1.2344879085821312}}, + {{86, 3.1608655959160155}, + {302, 1.5377824007725194}, + {518, 0.41798730655927896}, + {734, 2.2721408530383784}, + {950, 5.6329522765789646}, + {1166, 0.84450832590555991}, + {1382, 1.4279317910797107}}}; + + std::vector> fiducial_interface_right = {{{85, 2.8949509658187838}, + {301, 0.25766140043685887}, + {517, 1.8194165731976308}, + {733, 2.0809921071868756}, + {949, 8.1315538869542046}, + {1165, 0.49708185787322312}, + {1381, 3.2017395511439881}}, + {{80, 2.8600082827930269}, + {296, 0.37343415089084014}, + {512, 1.7974558224423689}, + {728, 0.94369445956099784}, + {944, 7.7011501503138504}, + {1160, 3.5147238706385471}, + {1376, 1.2344879085821312}}, + {{50, 3.1608655959160155}, + {266, 0.32035830490636008}, + {482, 3.1721881746709815}, + {698, 2.2721408530383784}, + {914, 14.017699282483312}, + {1130, 1.5292690020097823}, + {1346, -0.12121484974901264}}}; +#else // not MHD + std::vector> fiducial_interface_left = { + {{86, 4.155160222900312}, {302, 1.1624633361407897}, {518, 1.6379195998743412}, {734, 2.9868746414179093}}, + {{86, 4.1795874335665655}, {302, 2.1094239978455054}, {518, 2.6811988240843849}, {734, 4.2540957888954054}}, + {{86, 2.1772852940944429}, {302, 0.58167501916840214}, {518, 1.3683785996473696}, {734, 0.40276763592716164}}}; + + std::vector> fiducial_interface_right = {{{54, 3.8655260187947502}, + {85, 2.6637168309565289}, + {301, 0.69483650107094164}, + {517, 2.7558388224532218}, + {733, 1.9147729154830744}}, + {{54, 5.7556871317935459}, + {80, 2.6515032256234021}, + {296, 0.39344537106429511}, + {512, 1.6491544916805785}, + {728, 0.85830485311660487}}, + {{50, 2.8254070932730269}, + {54, 2.1884721760267873}, + {266, 0.75482470285166003}, + {482, 1.7757096932649317}, + {698, 3.6101832818706452}}}; +#endif // MHD + + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(nx * ny * nz * (n_fields - 1), true); + cuda_utilities::DeviceVector dev_interface_right(nx * ny * nz * (n_fields - 1), true); + + // Launch kernel + hipLaunchKernelGGL(PPMC_VL, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, gamma, direction); + CudaCheckError(); + CHECK(cudaDeviceSynchronize()); + + // Perform Comparison + for (size_t i = 0; i < dev_interface_left.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; + + testingUtilities::checkResults( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h new file mode 100644 index 000000000..07aae21a6 --- /dev/null +++ b/src/reconstruction/reconstruction.h @@ -0,0 +1,920 @@ +/*! + * \file reconstruction.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contain the various structs and device functions needed for interface reconstruction + * + */ + +#pragma once + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" +#include "../utils/mhd_utilities.h" + +/*! + * \brief Namespace to contain various utilities for the interface reconstruction kernels + * + */ +namespace reconstruction +{ +// ===================================================================================================================== +/*! + * \brief A struct for the primitive variables + * + */ +struct Primitive { + // Hydro variables + Real density, velocity_x, velocity_y, velocity_z, pressure; + +#ifdef MHD + // These are all cell centered values + Real magnetic_x, magnetic_y, magnetic_z; +#endif // MHD + +#ifdef DE + Real gas_energy; +#endif // DE + +#ifdef SCALAR + Real scalar[grid_enum::nscalars]; +#endif // SCALAR +}; +// ===================================================================================================================== + +// ===================================================================================================================== +struct EigenVecs { + Real magnetosonic_speed_fast, magnetosonic_speed_slow, magnetosonic_speed_fast_squared, + magnetosonic_speed_slow_squared; + Real alpha_fast, alpha_slow; + Real beta_y, beta_z; + Real n_fs, sign; + /// The non-primed values are used in the conversion from characteristic to primitive variables + Real q_fast, q_slow; + Real a_fast, a_slow; + /// The primed values are used in the conversion from primitive to characteristic variables + Real q_prime_fast, q_prime_slow; + Real a_prime_fast, a_prime_slow; +}; +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief A struct for the characteristic variables + * + */ +struct Characteristic { + // Hydro variables + Real a0, a1, a2, a3, a4; + +#ifdef MHD + Real a5, a6; +#endif // MHD +}; +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Determine if a thread is within the allowed range + * + * \tparam order The order of the reconstruction. 2 for PLM, 3 for PPM + * \param nx The number of cells in the X-direction + * \param ny The number of cells in the Y-direction + * \param nz The number of cells in the Z-direction + * \param xid The X thread index + * \param yid The Y thread index + * \param zid The Z thread index + * \return true The thread is NOT in the allowed range + * \return false The thread is in the allowed range + */ +template +bool __device__ __host__ __inline__ Thread_Guard(int const &nx, int const &ny, int const &nz, int const &xid, + int const &yid, int const &zid) +{ + // These checks all make sure that the xid is such that the thread won't try to load any memory that is out of bounds + + // X check + bool out_of_bounds_thread = xid < order - 1 or xid >= nx - order; + + // Y check, only used for 2D and 3D + if (ny > 1) { + out_of_bounds_thread = yid < order - 1 or yid >= ny - order or out_of_bounds_thread; + } + + // z check, only used for 3D + if (nz > 1) { + out_of_bounds_thread = zid < order - 1 or zid >= nz - order or out_of_bounds_thread; + } + // This is needed in the case that nz == 1 to avoid overrun + else { + out_of_bounds_thread = zid >= nz or out_of_bounds_thread; + } + + return out_of_bounds_thread; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Load the data for reconstruction + * + * \param[in] dev_conserved The conserved array + * \param[in] xid The xid of the cell to load data from + * \param[in] yid The yid of the cell to load data from + * \param[in] zid The zid of the cell to load data from + * \param[in] nx Size in the X direction + * \param[in] ny Size in the Y direction + * \param[in] n_cells The total number of cells + * \param[in] o1 Directional parameter + * \param[in] o2 Directional parameter + * \param[in] o3 Directional parameter + * \param[in] gamma The adiabatic index + * \return Primitive The loaded cell data + */ +Primitive __device__ __host__ __inline__ Load_Data(Real const *dev_conserved, size_t const &xid, size_t const &yid, + size_t const &zid, size_t const &nx, size_t const &ny, + size_t const &n_cells, size_t const &o1, size_t const &o2, + size_t const &o3, Real const &gamma) +{ // Compute index + size_t const id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + + // Declare the variable we will return + Primitive loaded_data; + + // Load hydro variables except pressure + loaded_data.density = dev_conserved[grid_enum::density * n_cells + id]; + loaded_data.velocity_x = dev_conserved[o1 * n_cells + id] / loaded_data.density; + loaded_data.velocity_y = dev_conserved[o2 * n_cells + id] / loaded_data.density; + loaded_data.velocity_z = dev_conserved[o3 * n_cells + id] / loaded_data.density; + + // Load MHD variables. Note that I only need the centered values for the transverse fields except for the initial + // computation of the primitive variables +#ifdef MHD + auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + switch (o1) { + case grid_enum::momentum_x: + loaded_data.magnetic_x = magnetic_centered.x; + loaded_data.magnetic_y = magnetic_centered.y; + loaded_data.magnetic_z = magnetic_centered.z; + break; + case grid_enum::momentum_y: + loaded_data.magnetic_x = magnetic_centered.y; + loaded_data.magnetic_y = magnetic_centered.z; + loaded_data.magnetic_z = magnetic_centered.x; + break; + case grid_enum::momentum_z: + loaded_data.magnetic_x = magnetic_centered.z; + loaded_data.magnetic_y = magnetic_centered.x; + loaded_data.magnetic_z = magnetic_centered.y; + break; + } +#endif // MHD + +// Load pressure accounting for duel energy if enabled +#ifdef DE // DE + Real const E = dev_conserved[grid_enum::Energy * n_cells + id]; + Real const gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + + Real E_non_thermal = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( + loaded_data.density, loaded_data.velocity_x, loaded_data.velocity_y, loaded_data.velocity_z); + + #ifdef MHD + E_non_thermal += mhd::utils::computeMagneticEnergy(magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); + #endif // MHD + + loaded_data.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_non_thermal, gas_energy, gamma); + loaded_data.gas_energy = gas_energy / loaded_data.density; +#else // not DE + #ifdef MHD + loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive( + dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x, + loaded_data.velocity_y, loaded_data.velocity_z, gamma, loaded_data.magnetic_x, loaded_data.magnetic_y, + loaded_data.magnetic_z); + #else // not MHD + loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive( + dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x, + loaded_data.velocity_y, loaded_data.velocity_z, gamma); + #endif // MHD +#endif // DE + +#ifdef SCALAR + for (size_t i = 0; i < grid_enum::nscalars; i++) { + loaded_data.scalar[i] = dev_conserved[(grid_enum::scalar + i) * n_cells + id] / loaded_data.density; + } +#endif // SCALAR + + return loaded_data; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute a simple slope. Equation is `coef * (right - left)`. + * + * \param[in] left The data with the lower index (on the "left" side) + * \param[in] right The data with the higher index (on the "right" side) + * \param[in] coef The coefficient to multiply the slope by. Defaults to 1.0 + * \return Primitive The slopes + */ +Primitive __device__ __host__ __inline__ Compute_Slope(Primitive const &left, Primitive const &right, + Real const &coef = 1.0) +{ + Primitive slopes; + + slopes.density = coef * (right.density - left.density); + slopes.velocity_x = coef * (right.velocity_x - left.velocity_x); + slopes.velocity_y = coef * (right.velocity_y - left.velocity_y); + slopes.velocity_z = coef * (right.velocity_z - left.velocity_z); + slopes.pressure = coef * (right.pressure - left.pressure); + +#ifdef MHD + slopes.magnetic_y = coef * (right.magnetic_y - left.magnetic_y); + slopes.magnetic_z = coef * (right.magnetic_z - left.magnetic_z); +#endif // MHD + +#ifdef DE + slopes.gas_energy = coef * (right.gas_energy - left.gas_energy); +#endif // DE + +#ifdef SCALAR + for (size_t i = 0; i < grid_enum::nscalars; i++) { + slopes.scalar[i] = coef * (right.scalar[i] - left.scalar[i]); + } +#endif // SCALAR + + return slopes; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the Van Lear slope from the left and right slopes + * + * \param[in] left_slope The left slope + * \param[in] right_slope The right slope + * \return Primitive The Van Leer slope + */ +Primitive __device__ __host__ __inline__ Van_Leer_Slope(Primitive const &left_slope, Primitive const &right_slope) +{ + Primitive vl_slopes; + + auto Calc_Vl_Slope = [](Real const &left, Real const &right) -> Real { + if (left * right > 0.0) { + return 2.0 * left * right / (left + right); + } else { + return 0.0; + } + }; + + vl_slopes.density = Calc_Vl_Slope(left_slope.density, right_slope.density); + vl_slopes.velocity_x = Calc_Vl_Slope(left_slope.velocity_x, right_slope.velocity_x); + vl_slopes.velocity_y = Calc_Vl_Slope(left_slope.velocity_y, right_slope.velocity_y); + vl_slopes.velocity_z = Calc_Vl_Slope(left_slope.velocity_z, right_slope.velocity_z); + vl_slopes.pressure = Calc_Vl_Slope(left_slope.pressure, right_slope.pressure); + +#ifdef MHD + vl_slopes.magnetic_y = Calc_Vl_Slope(left_slope.magnetic_y, right_slope.magnetic_y); + vl_slopes.magnetic_z = Calc_Vl_Slope(left_slope.magnetic_z, right_slope.magnetic_z); +#endif // MHD + +#ifdef DE + vl_slopes.gas_energy = Calc_Vl_Slope(left_slope.gas_energy, right_slope.gas_energy); +#endif // DE + +#ifdef SCALAR + for (size_t i = 0; i < grid_enum::nscalars; i++) { + vl_slopes.scalar[i] = Calc_Vl_Slope(left_slope.scalar[i], right_slope.scalar[i]); + } +#endif // SCALAR + + return vl_slopes; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the eigenvectors in the given cell + * + * \param[in] primitive The primitive variables in a particular cell + * \param[in] sound_speed The sound speed + * \param[in] sound_speed_squared The sound speed squared + * \param[in] gamma The adiabatic index + * \return EigenVecs + */ +#ifdef MHD +EigenVecs __device__ __inline__ Compute_Eigenvectors(Primitive const &primitive, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) +{ + EigenVecs output; + // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant + + // Compute wave speeds and their squares + output.magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + output.magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + + output.magnetosonic_speed_fast_squared = output.magnetosonic_speed_fast * output.magnetosonic_speed_fast; + output.magnetosonic_speed_slow_squared = output.magnetosonic_speed_slow * output.magnetosonic_speed_slow; + + // Compute Alphas (equation A16) + if (Real const denom = (output.magnetosonic_speed_fast_squared - output.magnetosonic_speed_slow_squared), + numerator_2 = (output.magnetosonic_speed_fast_squared - sound_speed_squared); + denom <= 0.0 or numerator_2 <= 0.0) { + output.alpha_fast = 1.0; + output.alpha_slow = 0.0; + } else if (Real const numerator_1 = (sound_speed_squared - output.magnetosonic_speed_slow_squared); + numerator_1 <= 0.0) { + output.alpha_fast = 0.0; + output.alpha_slow = 1.0; + } else { + output.alpha_fast = sqrt(numerator_1 / denom); + output.alpha_slow = sqrt(numerator_2 / denom); + } + + // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check + // handles that case + Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); + output.beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 1.0; + output.beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; + + // Compute Q(s) (equation A14) + output.sign = copysign(1.0, primitive.magnetic_x); + output.n_fs = 0.5 / sound_speed_squared; // equation A19 + output.q_prime_fast = output.sign * output.n_fs * output.alpha_fast * output.magnetosonic_speed_fast; + output.q_prime_slow = output.sign * output.n_fs * output.alpha_slow * output.magnetosonic_speed_slow; + output.q_fast = output.sign * output.alpha_fast * output.magnetosonic_speed_fast; + output.q_slow = output.sign * output.alpha_slow * output.magnetosonic_speed_slow; + + // Compute A(s) (equation A15) + output.a_fast = output.alpha_fast * sound_speed * sqrt(primitive.density); + output.a_slow = output.alpha_slow * sound_speed * sqrt(primitive.density); + output.a_prime_fast = 0.5 * output.alpha_fast / (sound_speed * sqrt(primitive.density)); + output.a_prime_slow = 0.5 * output.alpha_slow / (sound_speed * sqrt(primitive.density)); + + return output; +} +#endif // MHD +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Project from the primitive variables slopes to the characteristic variables slopes. Stone Eqn 37. Use the + * eigenvectors given in Stone 2008, Appendix A + * + * \param[in] primitive The primitive variables + * \param[in] primitive_slope The primitive variables slopes + * \param[in] EigenVecs The eigenvectors + * \param[in] sound_speed The speed of sound + * \param[in] sound_speed_squared The speed of sound squared + * \param[in] gamma The adiabatic index + * \return Characteristic + */ +Characteristic __device__ __inline__ Primitive_To_Characteristic(Primitive const &primitive, + Primitive const &primitive_slope, + EigenVecs const &eigen, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) +{ + Characteristic output; + +#ifdef MHD + // Multiply the slopes by the left eigenvector matrix given in equation 18 + Real const inverse_sqrt_density = rsqrt(primitive.density); + output.a0 = + eigen.n_fs * eigen.alpha_fast * + (primitive_slope.pressure / primitive.density - eigen.magnetosonic_speed_fast * primitive_slope.velocity_x) + + eigen.q_prime_slow * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) + + eigen.a_prime_slow * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z); + + output.a1 = + 0.5 * + (eigen.beta_y * (primitive_slope.magnetic_z * eigen.sign * inverse_sqrt_density + primitive_slope.velocity_z) - + eigen.beta_z * (primitive_slope.magnetic_y * eigen.sign * inverse_sqrt_density + primitive_slope.velocity_y)); + + output.a2 = + eigen.n_fs * eigen.alpha_slow * + (primitive_slope.pressure / primitive.density - eigen.magnetosonic_speed_slow * primitive_slope.velocity_x) - + eigen.q_prime_fast * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) - + eigen.a_prime_fast * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z); + + output.a3 = primitive_slope.density - primitive_slope.pressure / sound_speed_squared; + + output.a4 = + eigen.n_fs * eigen.alpha_slow * + (primitive_slope.pressure / primitive.density + eigen.magnetosonic_speed_slow * primitive_slope.velocity_x) + + eigen.q_prime_fast * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) - + eigen.a_prime_fast * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z); + output.a5 = + 0.5 * + (eigen.beta_y * (primitive_slope.magnetic_z * eigen.sign * inverse_sqrt_density - primitive_slope.velocity_z) - + eigen.beta_z * (primitive_slope.magnetic_y * eigen.sign * inverse_sqrt_density - primitive_slope.velocity_y)); + + output.a6 = + eigen.n_fs * eigen.alpha_fast * + (primitive_slope.pressure / primitive.density + eigen.magnetosonic_speed_fast * primitive_slope.velocity_x) - + eigen.q_prime_slow * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) + + eigen.a_prime_slow * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z); + +#else // not MHD + output.a0 = -primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + + primitive_slope.pressure / (2.0 * sound_speed_squared); + output.a1 = primitive_slope.density - primitive_slope.pressure / (sound_speed_squared); + output.a2 = primitive_slope.velocity_y; + output.a3 = primitive_slope.velocity_z; + output.a4 = primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + + primitive_slope.pressure / (2.0 * sound_speed_squared); +#endif // MHD + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Project from the characteristic variables slopes to the primitive variables slopes. Stone Eqn 39. Use the + * eigenvectors given in Stone 2008, Appendix A + * + * \param[in] primitive The primitive variables + * \param[in] characteristic_slope The characteristic slopes + * \param[in] eigen The eigenvectors + * \param[in] sound_speed The sound speed + * \param[in] sound_speed_squared The sound speed squared + * \param[in] gamma The adiabatic index + * \return Primitive The state in primitive variables + */ +Primitive __device__ __host__ __inline__ Characteristic_To_Primitive(Primitive const &primitive, + Characteristic const &characteristic_slope, + EigenVecs const &eigen, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) +{ + Primitive output; +#ifdef MHD + // Multiply the slopes by the right eigenvector matrix given in equation 12 + output.density = primitive.density * (eigen.alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + + eigen.alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)) + + characteristic_slope.a3; + output.velocity_x = + eigen.magnetosonic_speed_fast * eigen.alpha_fast * (characteristic_slope.a6 - characteristic_slope.a0) + + eigen.magnetosonic_speed_slow * eigen.alpha_slow * (characteristic_slope.a4 - characteristic_slope.a2); + output.velocity_y = eigen.beta_y * (eigen.q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + + eigen.q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + + eigen.beta_z * (characteristic_slope.a5 - characteristic_slope.a1); + output.velocity_z = eigen.beta_z * (eigen.q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + + eigen.q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + + eigen.beta_y * (characteristic_slope.a1 - characteristic_slope.a5); + output.pressure = primitive.density * sound_speed_squared * + (eigen.alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + + eigen.alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)); + output.magnetic_y = + eigen.beta_y * (eigen.a_slow * (characteristic_slope.a0 + characteristic_slope.a6) - + eigen.a_fast * (characteristic_slope.a2 + characteristic_slope.a4)) - + eigen.beta_z * eigen.sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); + output.magnetic_z = + eigen.beta_z * (eigen.a_slow * (characteristic_slope.a0 + characteristic_slope.a6) - + eigen.a_fast * (characteristic_slope.a2 + characteristic_slope.a4)) + + eigen.beta_y * eigen.sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); + +#else // not MHD + output.density = characteristic_slope.a0 + characteristic_slope.a1 + characteristic_slope.a4; + output.velocity_x = sound_speed / primitive.density * (characteristic_slope.a4 - characteristic_slope.a0); + output.velocity_y = characteristic_slope.a2; + output.velocity_z = characteristic_slope.a3; + output.pressure = sound_speed_squared * (characteristic_slope.a0 + characteristic_slope.a4); +#endif // MHD + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Monotonize the characteristic slopes and project back into the primitive slopes + * + * \param[in] primitive The primitive variables + * \param[in] del_L The left primitive slopes + * \param[in] del_R The right primitive slopes + * \param[in] del_C The centered primitive slopes + * \param[in] del_G The Van Leer primitive slopes + * \param[in] del_a_L The left characteristic slopes + * \param[in] del_a_R The right characteristic slopes + * \param[in] del_a_C The centered characteristic slopes + * \param[in] del_a_G The Van Leer characteristic slopes + * \param[in] sound_speed The sound speed + * \param[in] sound_speed_squared The sound speed squared + * \param[in] gamma The adiabatic index + * \return Primitive The Monotonized primitive slopes + */ +Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( + Primitive const &primitive, Primitive const &del_L, Primitive const &del_R, Primitive const &del_C, + Primitive const &del_G, Characteristic const &del_a_L, Characteristic const &del_a_R, Characteristic const &del_a_C, + Characteristic const &del_a_G, EigenVecs const &eigenvectors, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) +{ + // The function that will actually do the monotozation + auto Monotonize = [](Real const &left, Real const &right, Real const ¢ered, Real const &van_leer) -> Real { + if (left * right > 0.0) { + Real const lim_slope_a = 2.0 * fmin(fabs(left), fabs(right)); + Real const lim_slope_b = fmin(fabs(centered), fabs(van_leer)); + return copysign(fmin(lim_slope_a, lim_slope_b), centered); + } else { + return 0.0; + } + }; + + // the monotonized difference in the characteristic variables + Characteristic del_a_m; + + // Monotonize the slopes + del_a_m.a0 = Monotonize(del_a_L.a0, del_a_R.a0, del_a_C.a0, del_a_G.a0); + del_a_m.a1 = Monotonize(del_a_L.a1, del_a_R.a1, del_a_C.a1, del_a_G.a1); + del_a_m.a2 = Monotonize(del_a_L.a2, del_a_R.a2, del_a_C.a2, del_a_G.a2); + del_a_m.a3 = Monotonize(del_a_L.a3, del_a_R.a3, del_a_C.a3, del_a_G.a3); + del_a_m.a4 = Monotonize(del_a_L.a4, del_a_R.a4, del_a_C.a4, del_a_G.a4); + +#ifdef MHD + del_a_m.a5 = Monotonize(del_a_L.a5, del_a_R.a5, del_a_C.a5, del_a_G.a5); + del_a_m.a6 = Monotonize(del_a_L.a6, del_a_R.a6, del_a_C.a6, del_a_G.a6); +#endif // MHD + + // Project into the primitive variables. Note the return by reference to preserve the values in the gas_energy and + // scalars + Primitive output = + Characteristic_To_Primitive(primitive, del_a_m, eigenvectors, sound_speed, sound_speed_squared, gamma); + +#ifdef DE + output.gas_energy = Monotonize(del_L.gas_energy, del_R.gas_energy, del_C.gas_energy, del_G.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + output.scalar[i] = Monotonize(del_L.scalar[i], del_R.scalar[i], del_C.scalar[i], del_G.scalar[i]); + } +#endif // SCALAR + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Monotonize the parabolic interface states + * + * \param[in] cell_i The state in cell i + * \param[in] cell_im1 The state in cell i-1 + * \param[in] cell_ip1 The state in cell i+1 + * \param[in,out] interface_L_iph The left interface state at i+1/2 + * \param[in,out] interface_R_imh The right interface state at i-1/2 + * \return Primitive + */ +void __device__ __host__ __inline__ Monotonize_Parabolic_Interface(Primitive const &cell_i, Primitive const &cell_im1, + Primitive const &cell_ip1, + Primitive &interface_L_iph, + Primitive &interface_R_imh) +{ + // The function that will actually do the monotozation. Note the return by refernce of the interface state + auto Monotonize = [](Real const &state_i, Real const &state_im1, Real const &state_ip1, Real &interface_L, + Real &interface_R) { + // Some terms we need for the comparisons + Real const term_1 = 6.0 * (interface_L - interface_R) * (state_i - 0.5 * (interface_R + interface_L)); + Real const term_2 = pow(interface_L - interface_R, 2.0); + + // First monotonicity constraint. Equations 47-49 in Stone et al. 2008 + if ((interface_L - state_i) * (state_i - interface_R) <= 0.0) { + interface_L = state_i; + interface_R = state_i; + } + // Second monotonicity constraint. Equations 50 & 51 in Stone et al. 2008 + else if (term_1 > term_2) { + interface_R = 3.0 * state_i - 2.0 * interface_L; + } + // Third monotonicity constraint. Equations 52 & 53 in Stone et al. 2008 + else if (term_1 < -term_2) { + interface_L = 3.0 * state_i - 2.0 * interface_R; + } + + // Bound the interface to lie between adjacent cell centered values + interface_R = fmax(fmin(state_i, state_im1), interface_R); + interface_R = fmin(fmax(state_i, state_im1), interface_R); + interface_L = fmax(fmin(state_i, state_ip1), interface_L); + interface_L = fmin(fmax(state_i, state_ip1), interface_L); + }; + + // Monotonize each interface state + Monotonize(cell_i.density, cell_im1.density, cell_ip1.density, interface_L_iph.density, interface_R_imh.density); + Monotonize(cell_i.velocity_x, cell_im1.velocity_x, cell_ip1.velocity_x, interface_L_iph.velocity_x, + interface_R_imh.velocity_x); + Monotonize(cell_i.velocity_y, cell_im1.velocity_y, cell_ip1.velocity_y, interface_L_iph.velocity_y, + interface_R_imh.velocity_y); + Monotonize(cell_i.velocity_z, cell_im1.velocity_z, cell_ip1.velocity_z, interface_L_iph.velocity_z, + interface_R_imh.velocity_z); + Monotonize(cell_i.pressure, cell_im1.pressure, cell_ip1.pressure, interface_L_iph.pressure, interface_R_imh.pressure); + +#ifdef MHD + Monotonize(cell_i.magnetic_y, cell_im1.magnetic_y, cell_ip1.magnetic_y, interface_L_iph.magnetic_y, + interface_R_imh.magnetic_y); + Monotonize(cell_i.magnetic_z, cell_im1.magnetic_z, cell_ip1.magnetic_z, interface_L_iph.magnetic_z, + interface_R_imh.magnetic_z); +#endif // MHD + +#ifdef DE + Monotonize(cell_i.gas_energy, cell_im1.gas_energy, cell_ip1.gas_energy, interface_L_iph.gas_energy, + interface_R_imh.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + Monotonize(cell_i.scalar[i], cell_im1.scalar[i], cell_ip1.scalar[i], interface_L_iph.scalar[i], + interface_R_imh.scalar[i]); + } +#endif // SCALAR +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the interface state from the slope and cell centered state using linear interpolation + * + * \param[in] primitive The cell centered state + * \param[in] slopes The slopes + * \param[in] sign Whether to add or subtract the slope. +1 to add it and -1 to subtract it + * \return Primitive The interface state + */ +Primitive __device__ __host__ __inline__ Calc_Interface_Linear(Primitive const &primitive, Primitive const &slopes, + Real const &sign) +{ + Primitive output; + + auto interface = [&sign](Real const &state, Real const &slope) -> Real { return state + sign * 0.5 * slope; }; + + output.density = interface(primitive.density, slopes.density); + output.velocity_x = interface(primitive.velocity_x, slopes.velocity_x); + output.velocity_y = interface(primitive.velocity_y, slopes.velocity_y); + output.velocity_z = interface(primitive.velocity_z, slopes.velocity_z); + output.pressure = interface(primitive.pressure, slopes.pressure); + +#ifdef MHD + output.magnetic_y = interface(primitive.magnetic_y, slopes.magnetic_y); + output.magnetic_z = interface(primitive.magnetic_z, slopes.magnetic_z); +#endif // MHD + +#ifdef DE + output.gas_energy = interface(primitive.gas_energy, slopes.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + output.scalar[i] = interface(primitive.scalar[i], slopes.scalar[i]); + } +#endif // SCALAR + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the interface state for the CTU version fo the reconstructor from the slope and cell centered state + * using parabolic interpolation + * + * \param[in] cell_i The state in cell i + * \param[in] cell_im1 The state in cell i-1 + * \param[in] slopes_i The slopes in cell i + * \param[in] slopes_im1 The slopes in cell i-1 + * \return Primitive The interface state + */ +Primitive __device__ __host__ __inline__ Calc_Interface_Parabolic(Primitive const &cell_i, Primitive const &cell_im1, + Primitive const &slopes_i, + Primitive const &slopes_im1) +{ + Primitive output; + + auto interface = [](Real const &state_i, Real const &state_im1, Real const &slope_i, Real const &slope_im1) -> Real { + return 0.5 * (state_i + state_im1) - (slope_i - slope_im1) / 6.0; + }; + + output.density = interface(cell_i.density, cell_im1.density, slopes_i.density, slopes_im1.density); + output.velocity_x = interface(cell_i.velocity_x, cell_im1.velocity_x, slopes_i.velocity_x, slopes_im1.velocity_x); + output.velocity_y = interface(cell_i.velocity_y, cell_im1.velocity_y, slopes_i.velocity_y, slopes_im1.velocity_y); + output.velocity_z = interface(cell_i.velocity_z, cell_im1.velocity_z, slopes_i.velocity_z, slopes_im1.velocity_z); + output.pressure = interface(cell_i.pressure, cell_im1.pressure, slopes_i.pressure, slopes_im1.pressure); + +#ifdef MHD + output.magnetic_y = interface(cell_i.magnetic_y, cell_im1.magnetic_y, slopes_i.magnetic_y, slopes_im1.magnetic_y); + output.magnetic_z = interface(cell_i.magnetic_z, cell_im1.magnetic_z, slopes_i.magnetic_z, slopes_im1.magnetic_z); +#endif // MHD + +#ifdef DE + output.gas_energy = interface(cell_i.gas_energy, cell_im1.gas_energy, slopes_i.gas_energy, slopes_im1.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + output.scalar[i] = interface(cell_i.scalar[i], cell_im1.scalar[i], slopes_i.scalar[i], slopes_im1.scalar[i]); + } +#endif // SCALAR + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the PPM interface state for a given field/stencil. + * + * \details This method is heavily based on the implementation in Athena++. See the following papers for details + * - K. Felker & J. Stone, "A fourth-order accurate finite volume method for ideal MHD via upwind constrained + * transport", JCP, 375, (2018) + * - P. Colella & P. Woodward, "The Piecewise Parabolic Method (PPM) for Gas-Dynamical Simulations", JCP, 54, 174 + * (1984) + * - P. Colella & M. Sekora, "A limiter for PPM that preserves accuracy at smooth extrema", JCP, 227, 7069 (2008) + * - P. McCorquodale & P. Colella, "A high-order finite-volume method for conservation laws on locally refined + * grids", CAMCoS, 6, 1 (2011) + * - P. Colella, M.R. Dorr, J. Hittinger, D. Martin, "High-order, finite-volume methods in mapped coordinates", JCP, + * 230, 2952 (2011) + * + * \param[in] cell_im2 The value of the field/stencil at i-2 + * \param[in] cell_im1 The value of the field/stencil at i-1 + * \param[in] cell_i The value of the field/stencil at i + * \param[in] cell_ip1 The value of the field/stencil at i+1 + * \param[in] cell_ip2 The value of the field/stencil at i+2 + * \param[out] interface_L_iph The left interface at the i+1/2 face + * \param[out] interface_R_imh The right interface at the i-1/2 face + */ +void __device__ __host__ __inline__ PPM_Single_Variable(Real const &cell_im2, Real const &cell_im1, Real const &cell_i, + Real const &cell_ip1, Real const &cell_ip2, + Real &interface_L_iph, Real &interface_R_imh) +{ + // Let's start by setting up some things that we'll need later + + // Colella & Sekora 2008 constant used in second derivative limiter + Real const C2 = 1.25; + + // This lambda function is used for limiting the interfaces + auto limit_interface = [&C2](Real const &cell_i, Real const &cell_im1, Real const &interface, Real const &slope_2nd_i, + Real const &slope_2nd_im1) -> Real { + // Colella et al. 2011 eq. 85b. + // 85a is slope_2nd_im1 and 85c is slope_2nd_i + Real slope_2nd_centered = 3.0 * (cell_im1 + cell_i - 2.0 * interface); + + Real limited_slope = 0.0; + if (SIGN(slope_2nd_centered) == SIGN(slope_2nd_im1) and SIGN(slope_2nd_centered) == SIGN(slope_2nd_i)) { + limited_slope = SIGN(slope_2nd_centered) * + fmin(C2 * abs(slope_2nd_im1), fmin(C2 * abs(slope_2nd_i), abs(slope_2nd_centered))); + } + + // Collela et al. 2011 eq. 84a & 84b + Real const diff_left = interface - cell_im1; + Real const diff_right = cell_i - interface; + if (diff_left * diff_right < 0.0) { + // Local extrema detected at the interface + return 0.5 * (cell_im1 + cell_i) - limited_slope / 6.0; + } else { + return interface; + } + }; + + // Now that the setup is done we can start computing the interface states + + // Compute average slopes + Real const slope_left = (cell_i - cell_im1); + Real const slope_right = (cell_ip1 - cell_i); + Real const slope_avg_im1 = 0.5 * slope_left + 0.5 * (cell_im1 - cell_im2); + Real const slope_avg_i = 0.5 * slope_right + 0.5 * slope_left; + Real const slope_avg_ip1 = 0.5 * (cell_ip2 - cell_ip1) + 0.5 * slope_right; + + // Approximate interface average at i-1/2 and i+1/2 using PPM + // P. Colella & P. Woodward 1984 eq. 1.6 + interface_R_imh = 0.5 * (cell_im1 + cell_i) + (slope_avg_im1 - slope_avg_i) / 6.0; + interface_L_iph = 0.5 * (cell_i + cell_ip1) + (slope_avg_i - slope_avg_ip1) / 6.0; + + // Limit interpolated interface states (Colella et al. 2011 section 4.3.1) + + // Approximate second derivative at interfaces for smooth extrema preservation + // Colella et al. 2011 eq 85a + Real const slope_2nd_im1 = cell_im2 + cell_i - 2.0 * cell_im1; + Real const slope_2nd_i = cell_im1 + cell_ip1 - 2.0 * cell_i; + Real const slope_2nd_ip1 = cell_i + cell_ip2 - 2.0 * cell_ip1; + + interface_R_imh = limit_interface(cell_i, cell_im1, interface_R_imh, slope_2nd_i, slope_2nd_im1); + interface_L_iph = limit_interface(cell_ip1, cell_i, interface_L_iph, slope_2nd_ip1, slope_2nd_i); + + // Compute cell-centered difference stencils (McCorquodale & Colella 2011 section 2.4.1) + + // Apply Colella & Sekora limiters to parabolic interpolant + Real slope_2nd_face = 6.0 * (interface_R_imh + interface_L_iph - 2.0 * cell_i); + + Real slope_2nd_limited = 0.0; + if (SIGN(slope_2nd_im1) == SIGN(slope_2nd_i) and SIGN(slope_2nd_im1) == SIGN(slope_2nd_ip1) and + SIGN(slope_2nd_im1) == SIGN(slope_2nd_face)) { + // Extrema is smooth + // Colella & Sekora eq. 22 + slope_2nd_limited = SIGN(slope_2nd_face) * fmin(fmin(C2 * abs(slope_2nd_im1), C2 * abs(slope_2nd_i)), + fmin(C2 * abs(slope_2nd_ip1), abs(slope_2nd_face))); + } + + // Check if 2nd derivative is close to roundoff error + Real cell_max = fmax(abs(cell_im2), abs(cell_im1)); + cell_max = fmax(cell_max, abs(cell_i)); + cell_max = fmax(cell_max, abs(cell_ip1)); + cell_max = fmax(cell_max, abs(cell_ip2)); + + // If this condition is true then the limiter is not sensitive to roundoff and we use the limited ratio + // McCorquodale & Colella 2011 eq. 27 + Real const rho = (abs(slope_2nd_face) > (1.0e-12) * cell_max) ? slope_2nd_limited / slope_2nd_face : 0.0; + + // Colella & Sekora eq. 25 + Real slope_face_left = cell_i - interface_R_imh; + Real slope_face_right = interface_L_iph - cell_i; + + // Check for local extrema + if ((slope_face_left * slope_face_right) <= 0.0 or ((cell_ip1 - cell_i) * (cell_i - cell_im1)) <= 0.0) { + // Extrema detected + // Check if relative change in limited 2nd deriv is > roundoff + if (rho <= (1.0 - (1.0e-12))) { + // Limit smooth extrema + // Colella & Sekora eq. 23 + interface_R_imh = cell_i - rho * slope_face_left; + interface_L_iph = cell_i + rho * slope_face_right; + } + } else { + // No extrema detected + // Overshoot i-1/2,R / i,(-) state + if (abs(slope_face_left) >= 2.0 * abs(slope_face_right)) { + interface_R_imh = cell_i - 2.0 * slope_face_right; + } + // Overshoot i+1/2,L / i,(+) state + if (abs(slope_face_right) >= 2.0 * abs(slope_face_left)) { + interface_L_iph = cell_i + 2.0 * slope_face_left; + } + } +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Write the interface data to the appropriate arrays + * + * \param[in] interface_state The interface state to write + * \param[out] dev_interface The interface array + * \param[in] dev_conserved The conserved variables + * \param[in] id The cell id to write to + * \param[in] n_cells The total number of cells + * \param[in] o1 Directional parameter + * \param[in] o2 Directional parameter + * \param[in] o3 Directional parameter + * \param[in] gamma The adiabatic index + */ +void __device__ __host__ __inline__ Write_Data(Primitive const &interface_state, Real *dev_interface, + Real const *dev_conserved, size_t const &id, size_t const &n_cells, + size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma) +{ + // Write out density and momentum + dev_interface[grid_enum::density * n_cells + id] = interface_state.density; + dev_interface[o1 * n_cells + id] = interface_state.density * interface_state.velocity_x; + dev_interface[o2 * n_cells + id] = interface_state.density * interface_state.velocity_y; + dev_interface[o3 * n_cells + id] = interface_state.density * interface_state.velocity_z; + +#ifdef MHD + // Write the Y and Z interface states and load the X magnetic face needed to compute the energy + Real magnetic_x; + switch (o1) { + case grid_enum::momentum_x: + dev_interface[grid_enum::Q_x_magnetic_y * n_cells + id] = interface_state.magnetic_y; + dev_interface[grid_enum::Q_x_magnetic_z * n_cells + id] = interface_state.magnetic_z; + magnetic_x = dev_conserved[grid_enum::magnetic_x * n_cells + id]; + break; + case grid_enum::momentum_y: + dev_interface[grid_enum::Q_y_magnetic_z * n_cells + id] = interface_state.magnetic_y; + dev_interface[grid_enum::Q_y_magnetic_x * n_cells + id] = interface_state.magnetic_z; + magnetic_x = dev_conserved[grid_enum::magnetic_y * n_cells + id]; + break; + case grid_enum::momentum_z: + dev_interface[grid_enum::Q_z_magnetic_x * n_cells + id] = interface_state.magnetic_y; + dev_interface[grid_enum::Q_z_magnetic_y * n_cells + id] = interface_state.magnetic_z; + magnetic_x = dev_conserved[grid_enum::magnetic_z * n_cells + id]; + break; + } + + // Compute the MHD energy + dev_interface[grid_enum::Energy * n_cells + id] = hydro_utilities::Calc_Energy_Primitive( + interface_state.pressure, interface_state.density, interface_state.velocity_x, interface_state.velocity_y, + interface_state.velocity_z, gamma, magnetic_x, interface_state.magnetic_y, interface_state.magnetic_z); +#else // not MHD + // Compute the hydro energy + dev_interface[grid_enum::Energy * n_cells + id] = hydro_utilities::Calc_Energy_Primitive( + interface_state.pressure, interface_state.density, interface_state.velocity_x, interface_state.velocity_y, + interface_state.velocity_z, gamma); +#endif // MHD + +#ifdef DE + dev_interface[grid_enum::GasEnergy * n_cells + id] = interface_state.density * interface_state.gas_energy; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_interface[(grid_enum::scalar + i) * n_cells + id] = interface_state.density * interface_state.scalar[i]; + } +#endif // SCALAR +} +// ===================================================================================================================== +} // namespace reconstruction diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu new file mode 100644 index 000000000..5f8000bf8 --- /dev/null +++ b/src/reconstruction/reconstruction_tests.cu @@ -0,0 +1,616 @@ +/*! + * \file reconstruction_tests.cu + * \brief Tests for the contents of reconstruction.h + * + */ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../io/io.h" +#include "../reconstruction/reconstruction.h" +#include "../utils/DeviceVector.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" + +#ifdef MHD +__global__ void test_prim_2_char(reconstruction::Primitive const primitive, + reconstruction::Primitive const primitive_slope, + reconstruction::EigenVecs const eigenvectors, Real const gamma, Real const sound_speed, + Real const sound_speed_squared, reconstruction::Characteristic *characteristic_slope) +{ + *characteristic_slope = reconstruction::Primitive_To_Characteristic(primitive, primitive_slope, eigenvectors, + sound_speed, sound_speed_squared, gamma); +} + +__global__ void test_char_2_prim(reconstruction::Primitive const primitive, + reconstruction::Characteristic const characteristic_slope, + reconstruction::EigenVecs const eigenvectors, Real const gamma, Real const sound_speed, + Real const sound_speed_squared, reconstruction::Primitive *primitive_slope) +{ + *primitive_slope = reconstruction::Characteristic_To_Primitive(primitive, characteristic_slope, eigenvectors, + sound_speed, sound_speed_squared, gamma); +} + +__global__ void test_compute_eigenvectors(reconstruction::Primitive const primitive, Real const sound_speed, + Real const sound_speed_squared, Real const gamma, + reconstruction::EigenVecs *eigenvectors) +{ + *eigenvectors = reconstruction::Compute_Eigenvectors(primitive, sound_speed, sound_speed_squared, gamma); +} + +TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive const primitive_slope{9, 10, 11, 12, 13, 14, 15, 16}; + reconstruction::EigenVecs const eigenvectors{ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + }; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(test_prim_2_char, 1, 1, 0, 0, primitive, primitive_slope, eigenvectors, gamma, sound_speed, + sound_speed_squared, dev_results.data()); + CudaCheckError(); + cudaDeviceSynchronize(); + reconstruction::Characteristic const host_results = dev_results.at(0); + + // Check results + reconstruction::Characteristic const fiducial_results{-40327, 110, -132678, 7.4400000000000004, 98864, 98, 103549}; + testingUtilities::checkResults(fiducial_results.a0, host_results.a0, "a0"); + testingUtilities::checkResults(fiducial_results.a1, host_results.a1, "a1"); + testingUtilities::checkResults(fiducial_results.a2, host_results.a2, "a2"); + testingUtilities::checkResults(fiducial_results.a3, host_results.a3, "a3"); + testingUtilities::checkResults(fiducial_results.a4, host_results.a4, "a4"); + testingUtilities::checkResults(fiducial_results.a5, host_results.a5, "a5"); + testingUtilities::checkResults(fiducial_results.a6, host_results.a6, "a6"); +} + +TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; + reconstruction::EigenVecs const eigenvectors{ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + }; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(test_char_2_prim, 1, 1, 0, 0, primitive, characteristic_slope, eigenvectors, gamma, sound_speed, + sound_speed_squared, dev_results.data()); + CudaCheckError(); + cudaDeviceSynchronize(); + reconstruction::Primitive const host_results = dev_results.at(0); + + // Check results + reconstruction::Primitive const fiducial_results{1740, 2934, -2526, -2828, 14333.333333333338, 0.0, -24040, 24880}; + testingUtilities::checkResults(fiducial_results.density, host_results.density, "density"); + testingUtilities::checkResults(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14); + testingUtilities::checkResults(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z", 1.6E-14); + testingUtilities::checkResults(fiducial_results.pressure, host_results.pressure, "pressure"); + testingUtilities::checkResults(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); +} + +TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(test_compute_eigenvectors, 1, 1, 0, 0, primitive, sound_speed, sound_speed_squared, gamma, + dev_results.data()); + CudaCheckError(); + cudaDeviceSynchronize(); + reconstruction::EigenVecs const host_results = dev_results.at(0); + // std::cout << to_string_exact(host_results.magnetosonic_speed_fast) << ","; + // std::cout << to_string_exact(host_results.magnetosonic_speed_slow) << ","; + // std::cout << to_string_exact(host_results.magnetosonic_speed_fast_squared) << ","; + // std::cout << to_string_exact(host_results.magnetosonic_speed_slow_squared) << ","; + // std::cout << to_string_exact(host_results.alpha_fast) << ","; + // std::cout << to_string_exact(host_results.alpha_slow) << ","; + // std::cout << to_string_exact(host_results.beta_y) << ","; + // std::cout << to_string_exact(host_results.beta_z) << ","; + // std::cout << to_string_exact(host_results.n_fs) << ","; + // std::cout << to_string_exact(host_results.sign) << ","; + // std::cout << to_string_exact(host_results.q_fast) << ","; + // std::cout << to_string_exact(host_results.q_slow) << ","; + // std::cout << to_string_exact(host_results.a_fast) << ","; + // std::cout << to_string_exact(host_results.a_slow) << ","; + // std::cout << to_string_exact(host_results.q_prime_fast) << ","; + // std::cout << to_string_exact(host_results.q_prime_slow) << ","; + // std::cout << to_string_exact(host_results.a_prime_fast) << ","; + // std::cout << to_string_exact(host_results.a_prime_slow) << "," << std::endl; + // Check results + reconstruction::EigenVecs const fiducial_results{ + 12.466068627219666, 1.3894122191714398, 155.40286701855041, 1.9304663147829049, 0.20425471836256681, + 0.97891777490585408, 0.65850460786851805, 0.75257669470687782, 0.059999999999999984, 1, + 2.546253336541183, 1.3601203180183106, 0.58963258314939582, 2.825892204282022, 0.15277520019247093, + 0.081607219081098623, 0.03537795498896374, 0.1695535322569213}; + testingUtilities::checkResults(fiducial_results.magnetosonic_speed_fast, host_results.magnetosonic_speed_fast, + "magnetosonic_speed_fast"); + testingUtilities::checkResults(fiducial_results.magnetosonic_speed_slow, host_results.magnetosonic_speed_slow, + "magnetosonic_speed_slow"); + testingUtilities::checkResults(fiducial_results.magnetosonic_speed_fast_squared, + host_results.magnetosonic_speed_fast_squared, "magnetosonic_speed_fast_squared"); + testingUtilities::checkResults(fiducial_results.magnetosonic_speed_slow_squared, + host_results.magnetosonic_speed_slow_squared, "magnetosonic_speed_slow_squared"); + testingUtilities::checkResults(fiducial_results.alpha_fast, host_results.alpha_fast, "alpha_fast"); + testingUtilities::checkResults(fiducial_results.alpha_slow, host_results.alpha_slow, "alpha_slow"); + testingUtilities::checkResults(fiducial_results.beta_y, host_results.beta_y, "beta_y"); + testingUtilities::checkResults(fiducial_results.beta_z, host_results.beta_z, "beta_z"); + testingUtilities::checkResults(fiducial_results.n_fs, host_results.n_fs, "n_fs"); + testingUtilities::checkResults(fiducial_results.sign, host_results.sign, "sign"); + testingUtilities::checkResults(fiducial_results.q_fast, host_results.q_fast, "q_fast"); + testingUtilities::checkResults(fiducial_results.q_slow, host_results.q_slow, "q_slow"); + testingUtilities::checkResults(fiducial_results.a_fast, host_results.a_fast, "a_fast"); + testingUtilities::checkResults(fiducial_results.a_slow, host_results.a_slow, "a_slow"); + testingUtilities::checkResults(fiducial_results.q_prime_fast, host_results.q_prime_fast, "q_prime_fast"); + testingUtilities::checkResults(fiducial_results.q_prime_slow, host_results.q_prime_slow, "q_prime_slow"); + testingUtilities::checkResults(fiducial_results.a_prime_fast, host_results.a_prime_fast, "a_prime_fast"); + testingUtilities::checkResults(fiducial_results.a_prime_slow, host_results.a_prime_slow, "a_prime_slow"); +} +#endif // MHD + +TEST(tALLReconstructionThreadGuard, CorrectInputExpectCorrectOutput) +{ + // Test parameters + int const order = 3; + int const nx = 6; + int const ny = 6; + int const nz = 6; + + // fiducial data + std::vector fiducial_vals(nx * ny * nz, 1); + fiducial_vals.at(86) = 0; + + // loop through all values of the indices and check them + for (int xid = 0; xid < nx; xid++) { + for (int yid = 0; yid < ny; yid++) { + for (int zid = 0; zid < nz; zid++) { + // Get the test value + bool test_val = reconstruction::Thread_Guard(nx, ny, nz, xid, yid, zid); + + // Compare + int id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + ASSERT_EQ(test_val, fiducial_vals.at(id)) + << "Test value not equal to fiducial value at id = " << id << std::endl; + } + } + } +} + +TEST(tALLReconstructionLoadData, CorrectInputExpectCorrectOutput) +{ + // Set up test and mock up grid + size_t const nx = 3, ny = 3, nz = 3; + size_t const n_cells = nx * ny * nz; + size_t const xid = 1, yid = 1, zid = 1; + size_t const o1 = grid_enum::momentum_x, o2 = grid_enum::momentum_y, o3 = grid_enum::momentum_z; + Real const gamma = 5. / 3.; + + std::vector conserved(n_cells * grid_enum::num_fields); + std::iota(conserved.begin(), conserved.end(), 0.0); + + // Up the energy part of the grid to avoid negative pressure + for (size_t i = grid_enum::Energy * n_cells; i < (grid_enum::Energy + 1) * n_cells; i++) { + conserved.at(i) *= 5.0E2; + } + + // Get test data + auto const test_data = reconstruction::Load_Data(conserved.data(), xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); + +// Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{ + 13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, 9662.3910256410272, 147.5, 173.5, 197.5}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::checkResults(fiducial_data.magnetic_x, test_data.magnetic_x, "magnetic_x"); + testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive fiducial_data{13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, + 39950.641025641031}; + #ifdef DE + fiducial_data.pressure = 34274.282506448195; + #endif // DE + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); +#endif // MHD +} + +TEST(tALLReconstructionComputeSlope, CorrectInputExpectCorrectOutput) +{ +// Setup input data +#ifdef MHD + reconstruction::Primitive left{6, 7, 8, 9, 10, 11, 12, 13}; + reconstruction::Primitive right{1, 2, 3, 4, 5, 6, 7, 8}; +#else // MHD + reconstruction::Primitive left{6, 7, 8, 9, 10}; + reconstruction::Primitive right{1, 2, 3, 4, 5}; +#endif // MHD + Real const coef = 0.5; + + // Get test data + auto test_data = reconstruction::Compute_Slope(left, right, coef); + + // Check results +#ifdef MHD + Real const fiducial_data = -2.5; + testingUtilities::checkResults(fiducial_data, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data, test_data.pressure, "pressure"); + testingUtilities::checkResults(fiducial_data, test_data.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_data, test_data.magnetic_z, "magnetic_z"); +#else // MHD + Real const fiducial_data = -2.5; + testingUtilities::checkResults(fiducial_data, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data, test_data.pressure, "pressure"); +#endif // MHD +} + +TEST(tALLReconstructionVanLeerSlope, CorrectInputExpectCorrectOutput) +{ +// Setup input data +#ifdef MHD + reconstruction::Primitive left{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive right{6, 7, 8, 9, 10, 11, 12, 13}; +#else // MHD + reconstruction::Primitive left{1, 2, 3, 4, 5}; + reconstruction::Primitive right{6, 7, 8, 9, 10}; +#endif // MHD + + // Get test data + auto test_data = reconstruction::Van_Leer_Slope(left, right); + + // Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{1.7142857142857142, 3.1111111111111112, 4.3636363636363633, + 5.5384615384615383, 6.666666666666667, 0, + 8.8421052631578956, 9.9047619047619051}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{1.7142857142857142, 3.1111111111111112, 4.3636363636363633, + 5.5384615384615383, 6.666666666666667}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); +#endif // MHD +} + +__global__ void test_monotize_characteristic_return_primitive( + reconstruction::Primitive const primitive, reconstruction::Primitive const del_L, + reconstruction::Primitive const del_R, reconstruction::Primitive const del_C, reconstruction::Primitive const del_G, + reconstruction::Characteristic const del_a_L, reconstruction::Characteristic const del_a_R, + reconstruction::Characteristic const del_a_C, reconstruction::Characteristic const del_a_G, + reconstruction::EigenVecs const eigenvectors, Real const sound_speed, Real const sound_speed_squared, + Real const gamma, reconstruction::Primitive *monotonized_slope) +{ + *monotonized_slope = reconstruction::Monotonize_Characteristic_Return_Primitive( + primitive, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, + sound_speed_squared, gamma); +} + +TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpectCorrectOutput) +{ +#ifdef MHD + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive const del_L{9, 10, 11, 12, 13, 14, 15, 16}; + reconstruction::Primitive const del_R{17, 18, 19, 20, 21, 22, 23, 24}; + reconstruction::Primitive const del_C{25, 26, 27, 28, 29, 30, 31, 32}; + reconstruction::Primitive const del_G{33, 34, 35, 36, 37, 38, 39, 40}; + reconstruction::Characteristic const del_a_L{41, 42, 43, 44, 45, 46, 47}; + reconstruction::Characteristic const del_a_R{48, 49, 50, 51, 52, 53, 54}; + reconstruction::Characteristic const del_a_C{55, 56, 57, 58, 59, 60, 61}; + reconstruction::Characteristic const del_a_G{62, 64, 65, 66, 67, 68, 69}; +#else // MHD + reconstruction::Primitive const primitive{1, 2, 3, 4, 5}; + reconstruction::Primitive const del_L{9, 10, 11, 12, 13}; + reconstruction::Primitive const del_R{17, 18, 19, 20, 21}; + reconstruction::Primitive const del_C{25, 26, 27, 28, 29}; + reconstruction::Primitive const del_G{33, 34, 35, 36, 37}; + reconstruction::Characteristic const del_a_L{41, 42, 43, 44, 45}; + reconstruction::Characteristic const del_a_R{48, 49, 50, 51, 52}; + reconstruction::Characteristic const del_a_C{55, 56, 57, 58, 59}; + reconstruction::Characteristic const del_a_G{62, 64, 65, 66, 67}; +#endif // MHD + Real const sound_speed = 17.0, sound_speed_squared = sound_speed * sound_speed; + Real const gamma = 5. / 3.; + reconstruction::EigenVecs const eigenvectors{ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + }; + + // Get test data + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(test_monotize_characteristic_return_primitive, 1, 1, 0, 0, primitive, del_L, del_R, del_C, del_G, + del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, sound_speed_squared, gamma, + dev_results.data()); + CudaCheckError(); + cudaDeviceSynchronize(); + reconstruction::Primitive const host_results = dev_results.at(0); + + // Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{5046, 2934, -2526, -2828, 1441532, 0.0, -69716, 72152}; + testingUtilities::checkResults(fiducial_data.density, host_results.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, host_results.pressure, "pressure"); + testingUtilities::checkResults(fiducial_data.magnetic_y, host_results.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_data.magnetic_z, host_results.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{170, 68, 57, 58, 32946}; + testingUtilities::checkResults(fiducial_data.density, host_results.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, host_results.pressure, "pressure"); +#endif // MHD +} + +TEST(tHYDROReconstructionMonotizeParabolicInterface, CorrectInputExpectCorrectOutput) +{ + // Input Data + + reconstruction::Primitive const cell_i{1.4708046701, 9.5021020181, 3.7123503442, 4.6476103466, 3.7096802847}; + reconstruction::Primitive const cell_im1{3.9547588941, 3.1552319951, 3.0209247624, 9.5841013261, 2.2945188332}; + reconstruction::Primitive const cell_ip1{5.1973323534, 6.9132613767, 1.8397298636, 5.341960387, 9.093498542}; + reconstruction::Primitive interface_L_iph{6.7787324804, 9.5389820358, 9.8522754567, 7.8305142852, 2.450533435}; + reconstruction::Primitive interface_R_imh{4.8015193892, 5.9124263972, 8.7513040382, 8.3659359773, 1.339777121}; + + // Get test data + reconstruction::Monotonize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); + + // Check results + reconstruction::Primitive const fiducial_interface_L{1.4708046700999999, 9.5021020181000004, 3.7123503441999999, + 4.6476103465999996, 3.7096802847000001}; + reconstruction::Primitive const fiducial_interface_R{1.4708046700999999, 9.428341982700001, 3.7123503441999999, + 4.6476103465999996, 3.7096802847000001}; + testingUtilities::checkResults(fiducial_interface_L.density, interface_L_iph.density, "density"); + testingUtilities::checkResults(fiducial_interface_L.velocity_x, interface_L_iph.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_interface_L.velocity_y, interface_L_iph.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_interface_L.velocity_z, interface_L_iph.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_interface_L.pressure, interface_L_iph.pressure, "pressure"); + + testingUtilities::checkResults(fiducial_interface_R.density, interface_R_imh.density, "density"); + testingUtilities::checkResults(fiducial_interface_R.velocity_x, interface_R_imh.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_interface_R.velocity_y, interface_R_imh.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_interface_R.velocity_z, interface_R_imh.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_interface_R.pressure, interface_R_imh.pressure, "pressure"); +} + +TEST(tALLReconstructionCalcInterfaceLinear, CorrectInputExpectCorrectOutput) +{ + // Setup input data +#ifdef MHD + reconstruction::Primitive left{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive right{6, 7, 8, 9, 10, 11, 12, 13}; +#else // MHD + reconstruction::Primitive left{1, 2, 3, 4, 5}; + reconstruction::Primitive right{6, 7, 8, 9, 10}; +#endif // MHD + Real const coef = 0.5; + + // Get test data + auto test_data = reconstruction::Calc_Interface_Linear(left, right, coef); + + // Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{2.5, 3.75, 5, 6.25, 7.5, 0, 10, 11.25}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{2.5, 3.75, 5, 6.25, 7.5}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); +#endif // MHD +} + +TEST(tALLReconstructionCalcInterfaceParabolic, CorrectInputExpectCorrectOutput) +{ + // Setup input data +#ifdef MHD + reconstruction::Primitive cell_i{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive cell_im1{6, 7, 8, 9, 10, 11, 12, 13}; + reconstruction::Primitive slopes_i{14, 15, 16, 17, 18, 19, 20, 21}; + reconstruction::Primitive slopes_im1{22, 23, 24, 25, 26, 27, 28, 29}; +#else // MHD + reconstruction::Primitive cell_i{1, 2, 3, 4, 5}; + reconstruction::Primitive cell_im1{6, 7, 8, 9, 10}; + reconstruction::Primitive slopes_i{14, 15, 16, 17, 18}; + reconstruction::Primitive slopes_im1{22, 23, 24, 25, 26}; +#endif // MHD + + // Get test data + auto test_data = reconstruction::Calc_Interface_Parabolic(cell_i, cell_im1, slopes_i, slopes_im1); + + // Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{4.833333333333333, 5.833333333333333, 6.833333333333333, + 7.833333333333333, 8.8333333333333339, 0.0, + 10.833333333333334, 11.833333333333334}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); + testingUtilities::checkResults(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testingUtilities::checkResults(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{4.833333333333333, 5.833333333333333, 6.833333333333333, + 7.833333333333333, 8.8333333333333339}; + testingUtilities::checkResults(fiducial_data.density, test_data.density, "density"); + testingUtilities::checkResults(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testingUtilities::checkResults(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testingUtilities::checkResults(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testingUtilities::checkResults(fiducial_data.pressure, test_data.pressure, "pressure"); +#endif // MHD +} + +TEST(tALLReconstructionPPMSingleVariable, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(-100, 100); + + // Set up testing parameters + size_t const n_tests = 100; + std::vector input_data(n_tests * 5); + for (double &val : input_data) { + val = doubleRand(prng); + } + + std::vector fiducial_left_interface{ + 50.429040149605328, -40.625142952817804, 37.054257344499717, -55.796322960572695, -14.949021655598202, + -10.760611497035882, 71.107183338735751, -29.453314279116661, 7.38606168778702, -23.210826670297152, + -85.15197822983292, 18.98804944849401, 64.754272117396766, 4.5584678980835918, 45.81912726561103, + 58.769584663215738, 47.626531326553447, 9.3792919223901166, 47.06767164062336, -53.975231802858218, + -81.51278133300454, -74.554960772880221, 96.420244795844823, 37.498528618937456, -41.370881014041672, + -41.817524439980467, 58.391560533135817, -85.991024651293131, -12.674113472365306, 30.421304081280084, + 43.700175645941769, 58.342347077360131, -31.574197692184548, 98.151410701129635, -9.4994975790183389, + -87.49117921577357, -94.449608348937488, 79.849643090061676, 93.096197902468759, -64.374502025066192, + 82.037247010307937, -60.629868182203786, -41.343090531127039, -75.449850543801574, -82.52313028208863, + 19.871484181185011, -22.253989777496159, 86.943333900988137, -83.887344220269938, 73.270857190511975, + 84.784625452008811, -27.929776508530765, -9.6992610428405612, -65.233676045197072, -88.498474065470134, + 47.637114710282589, -69.50911815749248, -69.848254012650372, -7.4520009269431711, 90.887158278825865, + -50.671539065300863, 13.424189957034622, 80.237684918029572, 32.454734198410179, 66.84741286999801, + 24.53669768915492, -67.195147776790975, 72.277527112459907, -46.094192444366435, -99.915875366345205, + 32.244024128018054, -95.648868731550635, 17.922876720365402, -86.334093878928797, -16.580223524066724, + 39.48244113577249, 64.203567686297504, 23.62791013796798, 59.620571575902432, 41.0983082454959, + -30.533954819557593, -23.149979553301478, -54.098849622102691, -45.577469823900444, 33.284499908516068, + -39.186662569988762, 76.266375356625161, -51.650172854435624, -68.894636301310584, 98.410134045837452, + -49.167117951549066, 78.440749922366507, 51.390453104722326, 3.1993391287610393, 43.749856317813453, + -81.399433434996496, 88.385686355761862, 78.242223440453444, 27.539590130937498, -6.9781781598207147, + }; + std::vector fiducial_right_interface{ + 50.429040149605328, 4.4043935241855703, 37.054257344499717, 23.707343328192596, -14.949021655598202, + -10.760611497035882, 8.367260859616664, 8.5357943668839624, 7.38606168778702, -23.210826670297152, + -85.15197822983292, 18.98804944849401, 64.754272117396766, 4.5584678980835918, 45.81912726561103, + 58.769584663215738, 47.626531326553447, 23.370742401854159, 47.06767164062336, -53.975231802858218, + -81.51278133300454, -74.554960772880221, 75.572387546643355, 61.339053128914685, -41.370881014041672, + -41.817524439980467, 58.391560533135817, -85.991024651293131, -36.626332669233776, 30.421304081280084, + 20.637382412674096, 58.342347077360131, -79.757902483702381, 98.151410701129635, -9.4994975790183389, + -87.49117921577357, -39.384192078363533, 79.849643090061676, 93.096197902468759, -64.374502025066192, + 82.037247010307937, -20.951323678824952, 46.927431599533087, -75.449850543801574, -54.603894223278004, + -59.419110050353098, -22.253989777496159, 86.943333900988137, -83.887344220269938, 73.270857190511975, + 84.784625452008811, -27.929776508530765, -9.6992610428405612, -65.233676045197072, -88.498474065470134, + 47.637114710282589, -69.50911815749248, -69.848254012650372, -7.4520009269431711, 90.887158278825865, + -79.086012597191512, -45.713537271527976, 80.237684918029572, -60.666381661910016, 68.727158732184449, + 24.53669768915492, -67.195147776790975, 72.610434112023597, 54.910597945673814, -19.862686571231023, + 32.244024128018054, -95.648868731550635, -34.761757909478987, -86.334093878928797, -16.580223524066724, + 39.48244113577249, 64.203567686297504, 0.77846541072490538, 59.620571575902432, 41.0983082454959, + -2.6491435658297036, -23.149979553301478, -54.098849622102691, -45.577469823900444, 33.284499908516068, + -39.186662569988762, 76.266375356625161, -51.650172854435624, -68.894636301310584, 98.410134045837452, + 30.9954824410611, 78.440749922366507, 51.390453104722326, 70.625792807373429, 43.749856317813453, + -81.399433434996496, 88.385686355761862, 78.242223440453444, 27.539590130937498, -6.9781781598207147, + }; + + // Run n_tests iterations of the loop choosing random numbers to put into the interface state computation and checking + // the results + for (size_t i = 0; i < n_tests; i++) { + // Run the function + double test_left_interface, test_right_interface; + size_t const idx = 5 * i; + reconstruction::PPM_Single_Variable(input_data[idx], input_data[idx + 1], input_data[idx + 2], input_data[idx + 3], + input_data[idx + 4], test_left_interface, test_right_interface); + + // Compare results + testingUtilities::checkResults(fiducial_left_interface.at(i), test_left_interface, "left i+1/2 interface"); + testingUtilities::checkResults(fiducial_right_interface.at(i), test_right_interface, "right i-1/2 interface"); + } +} + +TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) +{ + // Set up test and mock up grid +#ifdef MHD + reconstruction::Primitive interface { + 1, 2, 3, 4, 5, 6, 7, 8 + }; +#else // MHD + reconstruction::Primitive interface { + 6, 7, 8, 9, 10 + }; +#endif // MHD + size_t const nx = 3, ny = 3, nz = 3; + size_t const n_cells = nx * ny * nz; + size_t const xid = 1, yid = 1, zid = 1; + size_t const id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + size_t const o1 = grid_enum::momentum_x, o2 = grid_enum::momentum_y, o3 = grid_enum::momentum_z; + Real const gamma = 5. / 3.; + + std::vector conserved(n_cells * grid_enum::num_fields); + std::vector interface_arr(n_cells * grid_enum::num_fields); + + // Get test data + reconstruction::Write_Data(interface, interface_arr.data(), conserved.data(), id, n_cells, o1, o2, o3, gamma); + +// Fiducial Data +#ifdef MHD + std::unordered_map fiducial_interface = {{13, 1}, {40, 2}, {67, 3}, {94, 4}, + {121, 78.5}, {148, 7}, {175, 8}}; +#else // MHD + std::unordered_map fiducial_interface = {{13, 6}, {40, 42}, {67, 48}, {94, 54}, {121, 597}}; +#endif // MHD + + // Perform Comparison + for (size_t i = 0; i < interface_arr.size(); i++) { + // Check the interface + double test_val = interface_arr.at(i); + double fiducial_val = (fiducial_interface.find(i) == fiducial_interface.end()) ? 0.0 : fiducial_interface[i]; + + testingUtilities::checkResults(fiducial_val, test_val, "Interface at i=" + std::to_string(i)); + } +} diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index c962325a7..b7c1fdff0 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -35,7 +35,7 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const Real const gamma, int const direction, int const n_fields) { // get a thread index - int threadId = threadIdx.x + blockIdx.x * blockDim.x; + int const threadId = threadIdx.x + blockIdx.x * blockDim.x; // Thread guard to avoid overrun if (threadId >= n_cells) { @@ -44,20 +44,22 @@ __global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const // Offsets & indices int o1, o2, o3; - if (direction == 0) { - o1 = grid_enum::momentum_x; - o2 = grid_enum::momentum_y; - o3 = grid_enum::momentum_z; - } - if (direction == 1) { - o1 = grid_enum::momentum_y; - o2 = grid_enum::momentum_z; - o3 = grid_enum::momentum_x; - } - if (direction == 2) { - o1 = grid_enum::momentum_z; - o2 = grid_enum::momentum_x; - o3 = grid_enum::momentum_y; + switch (direction) { + case 0: + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + break; + case 1: + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + break; + case 2: + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; + break; } // ============================ @@ -200,7 +202,7 @@ __device__ __host__ mhd::_internal::State loadState(Real const *interfaceArr, Re #else // Note that this function does the positive pressure check // internally - state.gasPressure = mhd::utils::computeGasPressure(state, magneticX, gamma); + state.gasPressure = mhd::_internal::Calc_Pressure_Primitive(state, magneticX, gamma); #endif // DE state.totalPressure = diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index b2311071f..c7363ed65 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -13,6 +13,7 @@ // Local Includes #include "../global/global.h" +#include "../utils/hydro_utilities.h" #ifdef CUDA /*! @@ -93,8 +94,8 @@ struct DoubleStarState { // pressureDoubleStar = pressureStar // Shared values Real velocityY, velocityZ, magneticY, magneticZ; - // Different values - Real energyL, energyR; + // Different values. Initializing these since one or the other can be uninitializing leading to bad tests + Real energyL = 0.0, energyR = 0.0; }; /*! @@ -253,6 +254,20 @@ __device__ __host__ mhd::_internal::Flux computeDoubleStarFluxes( mhd::_internal::StarState const &starState, mhd::_internal::State const &state, mhd::_internal::Flux const &flux, mhd::_internal::Speeds const &speed, Real const &speedSide, Real const &speedSideStar); +/*! + * \brief Specialization of mhd::utils::computeGasPressure for use in the HLLD solver + * + * \param state The State to compute the gas pressure of + * \param magneticX The X magnetic field + * \param gamma The adiabatic index + * \return Real The gas pressure + */ +inline __host__ __device__ Real Calc_Pressure_Primitive(mhd::_internal::State const &state, Real const &magneticX, + Real const &gamma) +{ + return hydro_utilities::Calc_Pressure_Primitive(state.energy, state.density, state.velocityX, state.velocityY, + state.velocityZ, gamma, magneticX, state.magneticY, state.magneticZ); +} } // namespace _internal } // end namespace mhd #endif // CUDA diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 4a9a10270..c39e091d1 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -19,6 +19,7 @@ #include "../grid/grid_enum.h" #include "../riemann_solvers/hlld_cuda.h" // Include code to test #include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" #include "../utils/mhd_utilities.h" #include "../utils/testing_utilities.h" @@ -160,7 +161,7 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test * \param[in] direction Which plane the interface is. 0 = plane normal to * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. */ - void checkResults(std::vector fiducialFlux, std::vector scalarFlux, Real thermalEnergyFlux, + void checkResults(std::vector fiducialFlux, std::vector const &scalarFlux, Real thermalEnergyFlux, std::vector const &testFlux, std::string const &customOutput = "", int const &direction = 0) { // Field names @@ -234,12 +235,12 @@ class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test output.at(1) = input.at(1) * input.at(0); // X Velocity to momentum output.at(2) = input.at(2) * input.at(0); // Y Velocity to momentum output.at(3) = input.at(3) * input.at(0); // Z Velocity to momentum - output.at(4) = mhd::utils::computeEnergy(input.at(4), input.at(0), input.at(1), input.at(2), input.at(3), - input.at(5), input.at(6), input.at(7), - gamma); // Pressure to Energy - output.at(5) = input.at(5); // X Magnetic Field - output.at(6) = input.at(6); // Y Magnetic Field - output.at(7) = input.at(7); // Z Magnetic Field + output.at(4) = + hydro_utilities::Calc_Energy_Primitive(input.at(4), input.at(0), input.at(1), input.at(2), input.at(3), gamma, + input.at(5), input.at(6), input.at(7)); // Pressure to Energy + output.at(5) = input.at(5); // X Magnetic Field + output.at(6) = input.at(6); // Y Magnetic Field + output.at(7) = input.at(7); // Z Magnetic Field #ifdef SCALAR std::vector conservedScalar(primitiveScalars.size()); @@ -2170,10 +2171,8 @@ TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) testParams const parameters; std::vector fiducialState{ - {1.0519818825796206, 0.68198273634686157, 26.835645069149873, 7.4302316959173442, -999.79694164635089, - 90.44484278669114}, - {0.61418047569879897, 0.71813570322922715, 98.974446283273181, 10.696380763901459, -999.79694164635089, - 61.33664731346812}}; + {1.0519818825796206, 0.68198273634686157, 26.835645069149873, 7.4302316959173442, 0.0, 90.44484278669114}, + {0.61418047569879897, 0.71813570322922715, 98.974446283273181, 10.696380763901459, 0.0, 61.33664731346812}}; for (size_t i = 0; i < parameters.names.size(); i++) { mhd::_internal::DoubleStarState const testState = @@ -2268,20 +2267,22 @@ TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) for (size_t direction = 0; direction < 1; direction++) { int o1, o2, o3; - if (direction == 0) { - o1 = 1; - o2 = 2; - o3 = 3; - } - if (direction == 1) { - o1 = 2; - o2 = 3; - o3 = 1; - } - if (direction == 2) { - o1 = 3; - o2 = 1; - o3 = 2; + switch (direction) { + case 0: + o1 = 1; + o2 = 2; + o3 = 3; + break; + case 1: + o1 = 2; + o2 = 3; + o3 = 1; + break; + case 2: + o1 = 3; + o2 = 1; + o3 = 2; + break; } std::vector testFluxArray(nFields * n_cells, dummyValue); @@ -2364,20 +2365,22 @@ TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) for (size_t direction = 0; direction < 3; direction++) { int o1, o2, o3; - if (direction == 0) { - o1 = 1; - o2 = 2; - o3 = 3; - } - if (direction == 1) { - o1 = 2; - o2 = 3; - o3 = 1; - } - if (direction == 2) { - o1 = 3; - o2 = 1; - o3 = 2; + switch (direction) { + case 0: + o1 = 1; + o2 = 2; + o3 = 3; + break; + case 1: + o1 = 2; + o2 = 3; + o3 = 1; + break; + case 2: + o1 = 3; + o2 = 1; + o3 = 2; + break; } mhd::_internal::State const testState = mhd::_internal::loadState(interfaceArray.data(), parameters.magneticX.at(0), diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 9d9639f65..288690290 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -26,44 +26,60 @@ * */ /// @{ -class tHYDROSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam +class tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam { protected: systemTest::SystemTestRunner sodTest; }; -TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) +TEST_P(tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) { - // #ifdef MHD - // // Loosen correctness check to account for MHD only having PCM. This is - // // about the error between PCM and PPMP in hydro - // sodTest.setFixedEpsilon(1E-3); +#ifdef MHD + sodTest.setFixedEpsilon(1.0E-4); + + // Don't test the gas energy fields + auto datasetNames = sodTest.getDataSetsToTest(); + datasetNames.erase(std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), datasetNames.end()); - // // Don't test the gas energy fields - // auto datasetNames = sodTest.getDataSetsToTest(); - // datasetNames.erase(std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), datasetNames.end()); + // Set the magnetic fiducial datasets to zero + size_t const size = 64 * 64 * 65; + std::vector const magVec(size, 0); - // // Set the magnetic fiducial datasets to zero - // size_t const size = std::pow(65, 3); - // std::vector const magVec(0, size); + for (const auto *field : {"magnetic_x", "magnetic_y", "magnetic_z"}) { + sodTest.setFiducialData(field, magVec); + datasetNames.emplace_back(field); + } - // for (const auto *field : {"magnetic_x", "magnetic_y", "magnetic_z"}) { - // sodTest.setFiducialData(field, magVec); - // datasetNames.push_back(field); - // } + sodTest.setDataSetsToTest(datasetNames); - // sodTest.setDataSetsToTest(datasetNames); - // #endif // MHD + double const maxAllowedL1Error = 7.0E-3; + double const maxAllowedError = 4.6E-2; +#else + double const maxAllowedL1Error = 9.4E-5; + double const maxAllowedError = 6.4E-4; +#endif // MHD sodTest.numMpiRanks = GetParam(); - sodTest.runTest(); + sodTest.runTest(true, maxAllowedL1Error, maxAllowedError); } -INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, tHYDROSYSTEMSodShockTubeParameterizedMpi, +INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, ::testing::Values(1, 2, 4)); /// @} // ============================================================================= +TEST(tHYDROSYSTEMSodShockTube, OneDimensionalCorrectInputExpectCorrectOutput) +{ + systemTest::SystemTestRunner sodTest; + sodTest.runTest(); +} + +TEST(tHYDROSYSTEMSodShockTube, TwoDimensionalCorrectInputExpectCorrectOutput) +{ + systemTest::SystemTestRunner sodTest; + sodTest.runTest(); +} + TEST(tHYDROtMHDSYSTEMConstant, CorrectInputExpectCorrectOutput) { systemTest::SystemTestRunner testObject(false, false, false); @@ -97,8 +113,15 @@ TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) #ifdef MHD // Loosen correctness check to account for MHD only having PCM. This is // about the error between PCM and PPMP in hydro - tolerance = 1E-6; -#endif // MHD + // Check Results. Values based on results in Gardiner & Stone 2008 + #ifdef PCM + tolerance = 1e-6; + #elif defined(PLMC) + tolerance = 1.0E-7; + #elif defined(PPMC) + tolerance = 1.9E-9; + #endif // PCM +#endif // MHD testObject.launchCholla(); @@ -133,9 +156,12 @@ class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithPa #ifdef PCM double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 double const allowedError = 4E-7; -#else // PCM - double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; +#elif defined(PLMC) + double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1E-7; +#elif defined(PPMC) + double const allowedL1Error = 2.7E-8; // Based on results in Gardiner & Stone 2008 + double const allowedError = 2.7E-8; #endif // PCM void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, diff --git a/examples/3D/Ryu_and_Jones_2a.txt b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt similarity index 57% rename from examples/3D/Ryu_and_Jones_2a.txt rename to src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt index 70a7ef8e9..dd54ff082 100644 --- a/examples/3D/Ryu_and_Jones_2a.txt +++ b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt @@ -1,26 +1,20 @@ # -# Parameter File for 3D Ryu & Jones MHD shock tube 4d. -# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: -# Algorithms and Tests for One-Dimensional Flow" -# -# Note: There are many shock tubes in this paper. This settings file is -# specifically for shock tube 4d +# Parameter File for 1D Sod Shock tube # ################################################ # number of grid cells in the x dimension nx=64 # number of grid cells in the y dimension -ny=64 +ny=1 # number of grid cells in the z dimension -nz=64 +nz=1 # final output time tout=0.2 # time interval for output outstep=0.2 # name of initial conditions init=Riemann - # domain properties xmin=0.0 ymin=0.0 @@ -28,7 +22,6 @@ zmin=0.0 xlen=1.0 ylen=1.0 zlen=1.0 - # type of boundary conditions xl_bcnd=3 xu_bcnd=3 @@ -36,39 +29,28 @@ yl_bcnd=3 yu_bcnd=3 zl_bcnd=3 zu_bcnd=3 - # path to output directory outdir=./ ################################################# # Parameters for 1D Riemann problems # density of left state -rho_l=1.08 +rho_l=1.0 # velocity of left state -vx_l=1.2 -vy_l=0.01 -vz_l=0.5 +vx_l=0.0 +vy_l=0.0 +vz_l=0.0 # pressure of left state -P_l=0.95 -# Magnetic field of the left state -Bx_l=0.5641895835477563 -By_l=1.0155412503859613 -Bz_l=0.5641895835477563 - +P_l=1.0 # density of right state -rho_r=1.0 +rho_r=0.1 # velocity of right state vx_r=0.0 vy_r=0.0 vz_r=0.0 # pressure of right state -P_r=1.0 -# Magnetic field of the right state -Bx_r=0.5641895835477563 -By_r=1.1283791670955126 -Bz_r=0.5641895835477563 - +P_r=0.1 # location of initial discontinuity diaph=0.5 # value of gamma -gamma=1.6666666666666667 +gamma=1.4 diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_SodShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt similarity index 95% rename from src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_SodShockTubeCorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt index 7246e311f..c89e179be 100644 --- a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_SodShockTubeCorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt @@ -1,5 +1,5 @@ # -# Parameter File for 3D Sod Shock tube +# Parameter File for 1D Sod Shock tube # ################################################ @@ -8,7 +8,7 @@ nx=64 # number of grid cells in the y dimension ny=64 # number of grid cells in the z dimension -nz=64 +nz=1 # final output time tout=0.2 # time interval for output @@ -54,4 +54,3 @@ P_r=0.1 diaph=0.5 # value of gamma gamma=1.4 - diff --git a/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt similarity index 99% rename from src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt index 5e7687d07..efcd912fd 100644 --- a/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt @@ -54,4 +54,3 @@ P_r=0.1 diaph=0.5 # value of gamma gamma=1.4 - diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt index 4e70c2993..77ef94b72 100644 --- a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt @@ -1,27 +1,27 @@ # # Parameter File for the MHD Blast wavelength -# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) for details. +# See [Stone & Gardiner 2009](https://ui.adsabs.harvard.edu/abs/2009NewA...14..139S/abstract) for details. # ################################################ # number of grid cells in the x dimension -nx=64 +nx=50 # number of grid cells in the y dimension -ny=64 +ny=100 # number of grid cells in the z dimension -nz=64 +nz=50 # final output time -tout=0.02 +tout=0.2 # time interval for output -outstep=0.02 +outstep=0.2 # name of initial conditions init=MHD_Spherical_Blast # domain properties xmin=-0.5 -ymin=-0.5 +ymin=-0.75 zmin=-0.5 xlen=1.0 -ylen=1.0 +ylen=1.5 zlen=1.0 # type of boundary conditions xl_bcnd=1 @@ -45,17 +45,17 @@ vy=0.0 # velocity in the z direction vz=0.0 # initial pressure outside the blast zone -P=1.0 -# initial pressure inside the blast zone -P_blast=100.0 +P=0.1 +# initial pressure inside the blast zone. Note that the paper says this should be 100, that is a typo +P_blast=10.0 # The radius of the blast zone -radius=0.125 -# magnetic field in the x direction. Equal to 10/sqrt(2) -Bx=7.0710678118654746 -# magnetic field in the y direction -By=0.0 -# magnetic field in the z direction. Equal to 10/sqrt(2) -Bz=7.0710678118654746 +radius=0.1 +# magnetic field in the x direction. Equal to 1/sqrt(2) +Bx=0.70710678118654746 +# magnetic field in the y direction. Equal to 1/sqrt(2) +By=0.70710678118654746 +# magnetic field in the z direction +Bz=0.0 # value of gamma gamma=1.666666666666667 diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt index 07440faa3..a03aef938 100644 --- a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt @@ -1,10 +1,10 @@ # -# Parameter File for 3D Ryu & Jones MHD shock tube 4d. +# Parameter File for 3D Ryu & Jones MHD shock tube 1a. # Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: # Algorithms and Tests for One-Dimensional Flow" # # Note: There are many shock tubes in this paper. This settings file is -# specifically for shock tube 4d +# specifically for shock tube 1a # ################################################ diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput.txt deleted file mode 100644 index 70a7ef8e9..000000000 --- a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones2aShockTubeCorrectInputExpectCorrectOutput.txt +++ /dev/null @@ -1,74 +0,0 @@ -# -# Parameter File for 3D Ryu & Jones MHD shock tube 4d. -# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: -# Algorithms and Tests for One-Dimensional Flow" -# -# Note: There are many shock tubes in this paper. This settings file is -# specifically for shock tube 4d -# - -################################################ -# number of grid cells in the x dimension -nx=64 -# number of grid cells in the y dimension -ny=64 -# number of grid cells in the z dimension -nz=64 -# final output time -tout=0.2 -# time interval for output -outstep=0.2 -# name of initial conditions -init=Riemann - -# domain properties -xmin=0.0 -ymin=0.0 -zmin=0.0 -xlen=1.0 -ylen=1.0 -zlen=1.0 - -# type of boundary conditions -xl_bcnd=3 -xu_bcnd=3 -yl_bcnd=3 -yu_bcnd=3 -zl_bcnd=3 -zu_bcnd=3 - -# path to output directory -outdir=./ - -################################################# -# Parameters for 1D Riemann problems -# density of left state -rho_l=1.08 -# velocity of left state -vx_l=1.2 -vy_l=0.01 -vz_l=0.5 -# pressure of left state -P_l=0.95 -# Magnetic field of the left state -Bx_l=0.5641895835477563 -By_l=1.0155412503859613 -Bz_l=0.5641895835477563 - -# density of right state -rho_r=1.0 -# velocity of right state -vx_r=0.0 -vy_r=0.0 -vz_r=0.0 -# pressure of right state -P_r=1.0 -# Magnetic field of the right state -Bx_r=0.5641895835477563 -By_r=1.1283791670955126 -Bz_r=0.5641895835477563 - -# location of initial discontinuity -diaph=0.5 -# value of gamma -gamma=1.6666666666666667 diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp index aaeb2f4e7..30e1d81cf 100644 --- a/src/system_tests/mhd_system_tests.cpp +++ b/src/system_tests/mhd_system_tests.cpp @@ -14,6 +14,7 @@ // Local includes #include "../io/io.h" #include "../system_tests/system_tester.h" +#include "../utils/testing_utilities.h" // ============================================================================= // Test Suite: tMHDSYSTEMLinearWavesParameterizedAngle @@ -32,23 +33,15 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< protected: systemTest::SystemTestRunner waveTest; - -#ifdef PCM - double const allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 4E-7; -#else // PCM - double const allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 - double const allowedError = 1E-7; -#endif // PCM + inline static std::unordered_map high_res_l2norms; void setLaunchParams(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, double const &rEigenVec_E, double const &rEigenVec_Bx, double const &rEigenVec_By, double const &rEigenVec_Bz, double const &pitch, double const &yaw, double const &domain, int const &domain_direction, - double const &vx = 0.0) + double const &vx = 0.0, size_t const &N = 32) { // Constant for all tests - size_t const N = 32; double const gamma = 5. / 3.; double const tOut = 2 * domain / waveSpeed; @@ -74,18 +67,10 @@ class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam< case 2: // swap X and Y y_len *= 2; ny *= 2; - std::swap(vx_rot, vy_rot); - std::swap(Bx_rot, By_rot); - std::swap(rEigenVec_Bx_rot, rEigenVec_By_rot); - std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumY_rot); break; case 3: // swap X and Z z_len *= 2; nz *= 2; - std::swap(vx_rot, vz_rot); - std::swap(Bx_rot, Bz_rot); - std::swap(rEigenVec_Bx_rot, rEigenVec_Bz_rot); - std::swap(rEigenVec_MomentumX_rot, rEigenVec_MomentumZ_rot); break; default: throw std::invalid_argument("Invalid value of domain_direction given to setLaunchParams"); @@ -162,12 +147,16 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveRightMovingC // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); -// Check Results +// Check Results. Values based on results in Gardiner & Stone 2008 #ifdef PCM waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); -#else // PCM - waveTest.runL1ErrorTest(allowedL1Error, allowedError); +#elif defined(PLMC) + waveTest.runL1ErrorTest(6.5E-8, 6.5E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(6.11E-8, 5.5E-8); #endif // PCM + + high_res_l2norms["fast_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) @@ -196,11 +185,13 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveLeftMovingCo // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); -// Check Results +// Check Results. Values based on results in Gardiner & Stone 2008 #ifdef PCM waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); -#else // PCM - waveTest.runL1ErrorTest(allowedL1Error, allowedError); +#elif defined(PLMC) + waveTest.runL1ErrorTest(6.5E-8, 6.5E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(6.1E-8, 5.5E-8); #endif // PCM } @@ -232,8 +223,16 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingC // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(1.45E-9, 1.3E-9); +#endif // PCM + + high_res_l2norms["slow_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) @@ -262,8 +261,14 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCo // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(2.0E-8, 2.7E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(1.45E-9, 1.3E-9); +#endif // PCM } // Alfven Waves Moving Left and Right @@ -293,8 +298,16 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveRightMovingCorrectInpu // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(3.0E-8, 3.0E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(1.95e-09, 2.16e-09); +#endif // PCM + + high_res_l2norms["alfven_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); } TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInputExpectCorrectOutput) @@ -322,8 +335,14 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInput // Set the number of timesteps waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); - // Check Results - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(3.0E-8, 3.0E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(1.95e-09, 2.16e-09); +#endif // PCM } // Contact Wave Moving Right @@ -355,11 +374,155 @@ TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveCorrectInputExpect waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); // Check Results +// Check Results. Values based on results in Gardiner & Stone 2008 #ifdef PCM - waveTest.runL1ErrorTest(1.35 * allowedL1Error, 1.35 * allowedError); -#else // PCM - waveTest.runL1ErrorTest(allowedL1Error, allowedError); + waveTest.runL1ErrorTest(5.4E-7, 5.4E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(3.0E-8, 3.0E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(1.41e-09, 1.5E-09); #endif // PCM + + high_res_l2norms["contact_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecondOrderConvergence) +{ + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Specific to this test + double const waveSpeed = 2.; + std::vector const numTimeSteps = {107, 102, 110}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 2; + double const rEigenVec_MomentumX = prefix * 4; + double const rEigenVec_MomentumY = prefix * -2; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * 4; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 9; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Run the wave + waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + + // Check the scaling + double const low_res_l2norm = waveTest.getL2Norm(); + testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)], "", + 0.17); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecondOrderConvergence) +{ + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Specific to this test + double const waveSpeed = 0.5; + std::vector const numTimeSteps = {427, 407, 440}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * 2; + double const rEigenVec_MomentumY = prefix * 4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Run the wave + waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + + // Check the scaling + double const low_res_l2norm = waveTest.getL2Norm(); + testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)], "", + 0.17); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConvergence) +{ + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {214, 204, 220}; + + double const rEigenVec_rho = 0; + double const rEigenVec_MomentumX = 0; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = -1; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 1; + double const rEigenVec_E = 0; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, 16); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Run the wave + waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + + // Check the scaling + double const low_res_l2norm = waveTest.getL2Norm(); + testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["alfven_" + std::to_string(domain_direction)], + "", 0.17); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderConvergence) +{ + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {321, 310, 327}; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 0; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 0; + double const rEigenVec_E = 0.5; + double const velocityX = waveSpeed; + + // Set the launch parameters + setLaunchParams(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, rEigenVec_E, + rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, velocityX, 16); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Run the wave + waveTest.runL1ErrorTest(3.0E-8, 4.0E-8); + + // Check the scaling + double const low_res_l2norm = waveTest.getL2Norm(); + testingUtilities::checkResults(4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], + "", 0.17); } INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle, @@ -390,14 +553,6 @@ class tMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam(testDims[0] * testDims[1] * testDims[2]); + _L2Norm += L1_error * L1_error; + } + } + + if (compute_L2_norm_only) { + // Check the L2 Norm + _L2Norm = std::sqrt(_L2Norm); + EXPECT_LT(_L2Norm, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; + + // Check the Max Error + EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; } } // ============================================================================= @@ -238,9 +270,9 @@ void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Erro << std::endl; // Loop over the datasets to be tested - double L2Norm = 0; + _L2Norm = 0; double maxError = 0; - for (auto dataSetName : _fiducialDataSetNames) { + for (auto const &dataSetName : _fiducialDataSetNames) { if (dataSetName == "GasEnergy") { continue; } @@ -280,16 +312,16 @@ void systemTest::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Erro } L1_error /= static_cast(initialDims[0] * initialDims[1] * initialDims[2]); - L2Norm += L1_error * L1_error; + _L2Norm += L1_error * L1_error; // Perform the correctness check EXPECT_LT(L1_error, maxAllowedL1Error) << "the L1 error for the " << dataSetName << " data has exceeded the allowed value"; } - // Check the L1 Norm - L2Norm = std::sqrt(L2Norm); - EXPECT_LT(L2Norm, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; + // Check the L2 Norm + _L2Norm = std::sqrt(_L2Norm); + EXPECT_LT(_L2Norm, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; // Check the Max Error EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; @@ -396,10 +428,10 @@ systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, bool co const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); std::stringstream nameStream; std::string suiteName = test_info->test_suite_name(); - suiteName = suiteName.substr(suiteName.find("/") + 1, suiteName.length()); + suiteName = suiteName.substr(suiteName.find('/') + 1, suiteName.length()); nameStream << suiteName << "_" << test_info->name(); std::string fullTestName = nameStream.str(); - _fullTestFileName = fullTestName.substr(0, fullTestName.find("/")); + _fullTestFileName = fullTestName.substr(0, fullTestName.find('/')); // Generate the input paths. Strip out everything after a "/" since that // probably indicates a parameterized test. @@ -591,7 +623,7 @@ std::vector systemTest::SystemTestRunner::_loadTestParticleData(std::str { // Determine the total number of particles if (_testTotalNumParticles == 0) { - for (auto file : _testParticlesFileVec) { + for (auto const &file : _testParticlesFileVec) { // Open the dataset H5::DataSet const dataSet = file.openDataSet(dataSetName); @@ -647,7 +679,7 @@ std::vector systemTest::SystemTestRunner::_loadTestParticleData(std::str // ============================================================================= std::vector systemTest::SystemTestRunner::_loadFiducialFieldData(std::string const &dataSetName) { - if (_fiducialFileExists) { + if (_fiducialFileExists and (_fiducialDataSets.find(dataSetName) == _fiducialDataSets.end())) { // Open the dataset H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName); diff --git a/src/system_tests/system_tester.h b/src/system_tests/system_tester.h index 1c942a766..20e430a3a 100644 --- a/src/system_tests/system_tester.h +++ b/src/system_tests/system_tester.h @@ -70,7 +70,8 @@ class systemTest::SystemTestRunner * \brief Run the system test that has been set up * */ - void runTest(); + void runTest(bool const &compute_L2_norm_only = false, double const &maxAllowedL1Error = 0.0, + double const &maxAllowedError = 0.0); /*! * \brief Compute the L1 error for each field compared to the initial @@ -105,6 +106,13 @@ class systemTest::SystemTestRunner */ std::string getChollaSettingsFilePath() { return _chollaSettingsPath; }; + /*! + * \brief Get the L2Norm + * + * \return double The L2Norm of the last run test + */ + double getL2Norm() { return _L2Norm; }; + /*! * \brief Get the Output Directory object * @@ -303,6 +311,9 @@ class systemTest::SystemTestRunner /// appear to differ from NVIDIA/GCC/XL by roughly 1E-12 double _fixedEpsilon = 5.0E-12; + /// The L2 norm of the error vector + double _L2Norm; + /// Flag to indicate if a fiducial HDF5 data file is being used or a /// programmatically generated H5File object. `true` = use a file, `false` = /// use generated H5File object diff --git a/src/utils/debug_utilities.cu b/src/utils/debug_utilities.cu new file mode 100644 index 000000000..9a1157aca --- /dev/null +++ b/src/utils/debug_utilities.cu @@ -0,0 +1,60 @@ +#include + +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../io/io.h" // provides chprintf +#include "../utils/error_handling.h" // provides chexit + +__global__ void Dump_Values_Kernel(Real* device_array, int array_size, int marker) +{ + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= array_size) { + return; + } + kernel_printf("Dump Values: marker %d tid %d value %g \n", marker, tid, device_array[tid]); +} + +/* + Prints out all values of a device_array + */ +void Dump_Values(Real* device_array, int array_size, int marker) +{ + int ngrid = (array_size + TPB - 1) / TPB; + dim3 dim1dGrid(ngrid, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(Dump_Values_Kernel, dim1dGrid, dim1dBlock, 0, 0, device_array, array_size, marker); +} + +__global__ void Check_For_Nan_Kernel(Real* device_array, int array_size, int check_num, bool* out_bool) +{ + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= array_size) { + return; + } + if (device_array[tid] == device_array[tid]) { + return; + } + out_bool[0] = true; + kernel_printf("Check_For_Nan_Kernel found Nan Checknum: %d Thread: %d\n", check_num, tid); +} + +/* + Checks a device_array for NaN and prints/exits if found + */ +void Check_For_Nan(Real* device_array, int array_size, int check_num) +{ + bool host_out_bool[1] = {false}; + bool* out_bool; + cudaMalloc((void**)&out_bool, sizeof(bool)); + cudaMemcpy(out_bool, host_out_bool, sizeof(bool), cudaMemcpyHostToDevice); + int ngrid = (array_size + TPB - 1) / TPB; + dim3 dim1dGrid(ngrid, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(Check_For_Nan_Kernel, dim1dGrid, dim1dBlock, 0, 0, device_array, array_size, check_num, out_bool); + cudaMemcpy(host_out_bool, out_bool, sizeof(bool), cudaMemcpyDeviceToHost); + cudaFree(out_bool); + + if (host_out_bool[0]) { + chexit(-1); + } +} diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index da2ea80fe..37e527ca8 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -2,6 +2,7 @@ #include #include +#include #ifdef MPI_CHOLLA #include @@ -49,6 +50,27 @@ void Check_Configuration(parameters const &P) #error "The CUDA macro is required" #endif //! CUDA +// Can only have one integrator enabled +#if ((defined(VL) + defined(CTU) + defined(SIMPLE)) != 1) + #error "Only one integrator can be enabled at a time." +#endif // Only one integrator check + + // Check the boundary conditions + auto Check_Boundary = [](int const &boundary, std::string const &direction) { + bool is_allowed_bc = boundary >= 0 and boundary <= 4; + std::string const error_message = + "WARNING: Possibly invalid boundary conditions for direction: " + direction + + " flag: " + std::to_string(boundary) + + ". Must select between 0 (no boundary), 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi)."; + assert(is_allowed_bc && error_message.c_str()); + }; + Check_Boundary(P.xl_bcnd, "xl_bcnd"); + Check_Boundary(P.xu_bcnd, "xu_bcnd"); + Check_Boundary(P.yl_bcnd, "yl_bcnd"); + Check_Boundary(P.yu_bcnd, "yu_bcnd"); + Check_Boundary(P.zl_bcnd, "zl_bcnd"); + Check_Boundary(P.zu_bcnd, "zu_bcnd"); + // warn if error checking is disabled #ifndef CUDA_ERROR_CHECK #warning "CUDA error checking is disabled. Enable it with the CUDA_ERROR_CHECK macro" @@ -79,9 +101,9 @@ void Check_Configuration(parameters const &P) #endif //! HLLD or EXACT or ROE or HLL or HLLC // May only use certain reconstructions - #if !defined(PCM) || defined(PLMP) || defined(PLMC) || defined(PPMC) || defined(PPMP) - #error "MHD only supports PCM reconstruction" - #endif //! PCM or PLMP or PLMC or PPMC or PPMP + #if ((defined(PCM) + defined(PLMC) + defined(PPMC)) != 1) || defined(PLMP) || defined(PPMP) + #error "MHD only supports PCM, PLMC, and PPMC reconstruction" + #endif // Reconstruction check // must have HDF5 #ifndef HDF5 diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 358d7e352..c0f783e1c 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -14,6 +14,8 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../utils/gpu.hpp" +#include "../utils/math_utilities.h" +#include "../utils/mhd_utilities.h" /*! * INDEX OF VARIABLES @@ -30,19 +32,29 @@ namespace hydro_utilities { inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, Real const &vy, - Real const &vz, Real const &gamma) + Real const &vz, Real const &gamma, Real const &magnetic_x = 0.0, + Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0) { - Real P; - P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); - P = fmax(P, TINY_NUMBER); - return P; + Real pressure = (E - 0.5 * d * (vx * vx + ((vy * vy) + (vz * vz)))); + +#ifdef MHD + pressure -= mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); +#endif // MHD + + return fmax((gamma - 1.) * pressure, TINY_NUMBER); } inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, Real const &my, - Real const &mz, Real const &gamma) + Real const &mz, Real const &gamma, Real const &magnetic_x = 0.0, + Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0) { - Real P = (E - 0.5 * (mx * mx + my * my + mz * mz) / d) * (gamma - 1.); - return fmax(P, TINY_NUMBER); + Real pressure = (E - 0.5 * (mx * mx + my * my + mz * mz) / d); + +#ifdef MHD + pressure -= mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); +#endif // MHD + + return fmax((gamma - 1.) * pressure, TINY_NUMBER); } inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n) @@ -60,10 +72,33 @@ inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, Real #endif // DE inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy, - Real const &vz, Real const &gamma) + Real const &vz, Real const &gamma, Real const &magnetic_x = 0.0, + Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0) { // Compute and return energy - return (fmax(P, TINY_NUMBER) / (gamma - 1.)) + 0.5 * d * (vx * vx + vy * vy + vz * vz); + Real energy = (fmax(P, TINY_NUMBER) / (gamma - 1.)) + 0.5 * d * (vx * vx + vy * vy + vz * vz); + +#ifdef MHD + energy += mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); +#endif // MHD + + return energy; +} + +inline __host__ __device__ Real Calc_Energy_Conserved(Real const &P, Real const &d, Real const &momentum_x, + Real const &momentum_y, Real const &momentum_z, Real const &gamma, + Real const &magnetic_x = 0.0, Real const &magnetic_y = 0.0, + Real const &magnetic_z = 0.0) +{ + // Compute and return energy + Real energy = (fmax(P, TINY_NUMBER) / (gamma - 1.)) + + (0.5 / d) * (momentum_x * momentum_x + momentum_y * momentum_y + momentum_z * momentum_z); + +#ifdef MHD + energy += mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); +#endif // MHD + + return energy; } inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected, @@ -79,7 +114,8 @@ inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const & U = U_advected; } P = U * (gamma - 1.0); - return P; + return fmax(P, (Real)TINY_NUMBER); + ; } /*! @@ -112,6 +148,17 @@ inline __host__ __device__ Real Calc_Kinetic_Energy_From_Momentum(Real const &d, return (0.5 / d) * (mx * mx + my * my * mz * mz); } +/*! + * \brief Compute the sound speed in the cell from conserved variables + * + * \param E Energy + * \param d densidy + * \param mx x momentum + * \param my y momentum + * \param mz z momentum + * \param gamma adiabatic index + * \return Real The sound speed + */ inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, Real const &mx, Real const &my, Real const &mz, Real const &gamma) { @@ -119,4 +166,17 @@ inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, R return sqrt(gamma * P / d); } +/*! + * \brief Compute the sound in the cell from primitive variables + * + * \param P + * \param d + * \param gamma + * \return __host__ + */ +inline __host__ __device__ Real Calc_Sound_Speed(Real const &P, Real const &d, Real const &gamma) +{ + return sqrt(gamma * P / d); +} + } // namespace hydro_utilities diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp index 6c8c37cf1..7bab43b69 100644 --- a/src/utils/hydro_utilities_tests.cpp +++ b/src/utils/hydro_utilities_tests.cpp @@ -50,33 +50,78 @@ struct TestParams { std::vector ge{4.890374019e-10, 1.0756968986e2, 3.8740982372e100}; std::vector U_total{2.389074039e-10, 4.890374019e2, 6.8731436293e100}; std::vector U_advected{1.3847303413e-10, 1.0756968986e2, 1.0882403847e100}; + std::vector pressureTotal{8.1704748693e-100, 2.6084125198e2, 1.8242151369e100}; + std::vector magnetic_x{2.8568843801e-100, 9.2400807786e2, 2.1621115264e100}; + std::vector magnetic_y{9.2900880344e-100, 8.0382409757e2, 6.6499532343e100}; + std::vector magnetic_z{9.5795678229e-100, 3.3284839263e2, 9.2337456649e100}; std::vector names{"Small number case", "Medium number case", "Large number case"}; }; } // namespace -TEST(tHYDROHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) +TEST(tHYDROtMHDHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducial_Ps{1e-20, 139983415580.5549, 1.2697896247496674e+301}; +#ifdef MHD + std::vector fiducial_pressure{0, 139982878676.5015, 1.2697896247496674e+301}; +#else // not MHD + std::vector fiducial_pressure{1e-20, 139983415580.5549, 1.2697896247496674e+301}; +#endif // MHD for (size_t i = 0; i < parameters.names.size(); i++) { - Real test_Ps = hydro_utilities::Calc_Pressure_Primitive(parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), - parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); + Real test_Ps = hydro_utilities::Calc_Pressure_Primitive( + parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); + testingUtilities::checkResults(fiducial_pressure.at(i), test_Ps, parameters.names.at(i)); } } -TEST(tHYDROHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) +TEST(tHYDROtMHDHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducial_Ps{1e-20, 139984604373.87094, 1.3965808056866668e+301}; +#ifdef MHD + std::vector fiducial_pressure{0, 139984067469.81754, 1.3965808056866668e+301}; +#else // not MHD + std::vector fiducial_pressure{1e-20, 139984604373.87094, 1.3965808056866668e+301}; +#endif // MHD for (size_t i = 0; i < parameters.names.size(); i++) { - Real test_Ps = hydro_utilities::Calc_Pressure_Conserved(parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), - parameters.my.at(i), parameters.mz.at(i), parameters.gamma); + Real test_pressure = hydro_utilities::Calc_Pressure_Conserved( + parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); + testingUtilities::checkResults(fiducial_pressure.at(i), test_pressure, parameters.names.at(i)); + } +} + +TEST(tHYDROtMHDHydroUtilsCalcPressurePrimitive, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_pressure = hydro_utilities::Calc_Pressure_Primitive( + parameters.E.at(i), parameters.d.at(i), 1E4 * parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); + + // I'm using the binary equality assertion here since in the case of + // negative pressure the function should return exactly TINY_NUMBER + EXPECT_EQ(TINY_NUMBER, test_pressure) << "Difference in " << parameters.names.at(i) << std::endl; + } +} + +TEST(tHYDROtMHDHydroUtilsCalcPressureConserved, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; + + for (size_t i = 0; i < parameters.names.size() - 1; i++) { + Real test_pressure = hydro_utilities::Calc_Pressure_Conserved( + 1E-10 * parameters.E.at(i), parameters.d.at(i), 1E4 * parameters.mx.at(i), 1E4 * parameters.my.at(i), + 1E4 * parameters.mz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), + parameters.magnetic_z.at(i)); + + // I'm using the binary equality assertion here since in the case of + // negative pressure the function should return exactly TINY_NUMBER + EXPECT_EQ(TINY_NUMBER, test_pressure) << "Difference in " << parameters.names.at(i) << std::endl; } } @@ -107,16 +152,73 @@ TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) } #endif // DE -TEST(tHYDROHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) +TEST(tHYDROtMHDHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) { TestParams parameters; - std::vector fiducial_Es{3.3366124363499997e-10, 1784507.7619407175, 1.9018677140549926e+300}; +#ifdef MHD + std::vector fiducial_energy{3.3366124363499997e-10, 2589863.8420712831, 1.9018677140549926e+300}; +#else // not MHD + std::vector fiducial_energy{3.3366124363499997e-10, 1784507.7619407175, 1.9018677140549926e+300}; +#endif // MHD for (size_t i = 0; i < parameters.names.size(); i++) { - Real test_Es = hydro_utilities::Calc_Energy_Primitive(parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), - parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); + Real test_Es = hydro_utilities::Calc_Energy_Primitive( + parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); + + testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + } +} + +TEST(tHYDROtMHDHydroUtilsCalcEnergyConserved, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; +#ifdef MHD + std::vector fiducial_energy{3.3366124363499997e-10, 806673.86799851817, 6.7079331637514162e+201}; +#else // not MHD + std::vector fiducial_energy{3.3366124363499997e-10, 1317.7878679524658, 1.0389584427972784e+101}; +#endif // MHD + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Es = hydro_utilities::Calc_Energy_Conserved( + parameters.P.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); + + testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + } +} + +TEST(tHYDROtMHDHydroUtilsCalcEnergyPrimitive, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; +#ifdef MHD + std::vector fiducial_energy{1.4999999999999998e-20, 2588562.2478059679, 1.9018677140549926e+300}; +#else // not MHD + std::vector fiducial_energy{0, 1783206.1676754025, 1.9018677140549926e+300}; +#endif // MHD + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Es = hydro_utilities::Calc_Energy_Primitive( + -parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); + + testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + } +} + +TEST(tHYDROtMHDHydroUtilsCalcEnergyConserved, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; +#ifdef MHD + std::vector fiducial_energy{0, 805372.27373320318, 6.7079331637514162e+201}; +#else // not MHD + std::vector fiducial_energy{0, 16.193602637465997, 3.0042157852278494e+99}; +#endif // MHD + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Es = hydro_utilities::Calc_Energy_Conserved( + -parameters.P.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_Es.at(i), test_Es, parameters.names.at(i)); + testingUtilities::checkResults(fiducial_energy.at(i), test_Es, parameters.names.at(i)); } } diff --git a/src/utils/mhd_utilities.cu b/src/utils/mhd_utilities.cu index 110d8d66b..bceb4abc1 100644 --- a/src/utils/mhd_utilities.cu +++ b/src/utils/mhd_utilities.cu @@ -23,14 +23,14 @@ void Init_Magnetic_Field_With_Vector_Potential(Header const &H, Grid3D::Conserve std::vector const &vectorPotential) { // Compute the magnetic field - for (int k = 1; k < H.nz; k++) { - for (int j = 1; j < H.ny; j++) { - for (int i = 1; i < H.nx; i++) { + for (size_t k = 1; k < H.nz; k++) { + for (size_t j = 1; j < H.ny; j++) { + for (size_t i = 1; i < H.nx; i++) { // Get cell index. The "xmo" means: X direction Minus One - int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); - int const idxmo = cuda_utilities::compute1DIndex(i - 1, j, k, H.nx, H.ny); - int const idymo = cuda_utilities::compute1DIndex(i, j - 1, k, H.nx, H.ny); - int const idzmo = cuda_utilities::compute1DIndex(i, j, k - 1, H.nx, H.ny); + size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + size_t const idxmo = cuda_utilities::compute1DIndex(i - 1, j, k, H.nx, H.ny); + size_t const idymo = cuda_utilities::compute1DIndex(i, j - 1, k, H.nx, H.ny); + size_t const idzmo = cuda_utilities::compute1DIndex(i, j, k - 1, H.nx, H.ny); C.magnetic_x[id] = (vectorPotential.at(id + 2 * H.n_cells) - vectorPotential.at(idymo + 2 * H.n_cells)) / H.dy - (vectorPotential.at(id + 1 * H.n_cells) - vectorPotential.at(idzmo + 1 * H.n_cells)) / H.dz; diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index 57bf14549..1a0c91674 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -16,7 +16,6 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../grid/grid3D.h" -#include "../riemann_solvers/hlld_cuda.h" #include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" @@ -79,81 +78,6 @@ inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, Rea } // ========================================================================= -// ========================================================================= -/*! - * \brief Compute the energy in a cell. If MHD is not defined then simply - * return the hydro only energy - * - * \param[in] pressure The gas pressure - * \param[in] density The density - * \param[in] velocityX Velocity in the x-direction - * \param[in] velocityY Velocity in the y-direction - * \param[in] velocityZ Velocity in the z-direction - * \param[in] magneticX Magnetic field in the x-direction - * \param[in] magneticY Magnetic field in the y-direction - * \param[in] magneticZ Magnetic field in the z-direction - * \param[in] gamma The adiabatic index - * \return Real The energy within a cell - */ -inline __host__ __device__ Real computeEnergy(Real const &pressure, Real const &density, Real const &velocityX, - Real const &velocityY, Real const &velocityZ, Real const &magneticX, - Real const &magneticY, Real const &magneticZ, Real const &gamma) -{ - // Compute and return energy - Real energy = (fmax(pressure, TINY_NUMBER) / (gamma - 1.)) + - 0.5 * density * (velocityX * velocityX + ((velocityY * velocityY) + (velocityZ * velocityZ))); -#ifdef MHD - energy += computeMagneticEnergy(magneticX, magneticY, magneticZ); -#endif // MHD - - return energy; -} -// ========================================================================= - -// ========================================================================= -/*! - * \brief Compute the MHD gas pressure in a cell - * - * \param[in] energy The energy - * \param[in] density The density - * \param[in] momentumX Momentum in the x-direction - * \param[in] momentumY Momentum in the y-direction - * \param[in] momentumZ Momentum in the z-direction - * \param[in] magneticX Magnetic field in the x-direction - * \param[in] magneticY Magnetic field in the y-direction - * \param[in] magneticZ Magnetic field in the z-direction - * \param[in] gamma The adiabatic index - * \return Real The gas pressure in a cell - */ -inline __host__ __device__ Real computeGasPressure(Real const &energy, Real const &density, Real const &momentumX, - Real const &momentumY, Real const &momentumZ, Real const &magneticX, - Real const &magneticY, Real const &magneticZ, Real const &gamma) -{ - Real pressure = - (gamma - 1.) * - (energy - 0.5 * (momentumX * momentumX + ((momentumY * momentumY) + (momentumZ * momentumZ))) / density - - computeMagneticEnergy(magneticX, magneticY, magneticZ)); - - return fmax(pressure, TINY_NUMBER); -} - -/*! - * \brief Specialization of mhd::utils::computeGasPressure for use in the HLLD solver - * - * \param state The State to compute the gas pressure of - * \param magneticX The X magnetic field - * \param gamma The adiabatic index - * \return Real The gas pressure - */ -inline __host__ __device__ Real computeGasPressure(mhd::_internal::State const &state, Real const &magneticX, - Real const &gamma) -{ - return mhd::utils::computeGasPressure(state.energy, state.density, state.velocityX * state.density, - state.velocityY * state.density, state.velocityZ * state.density, magneticX, - state.magneticY, state.magneticZ, gamma); -} -// ========================================================================= - // ========================================================================= /*! * \brief Compute the MHD thermal energy in a cell diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu index 7383ef0e3..980259d28 100644 --- a/src/utils/mhd_utilities_tests.cu +++ b/src/utils/mhd_utilities_tests.cu @@ -46,100 +46,6 @@ struct testParams { } // namespace // ============================================================================= -// ============================================================================= -// Tests for the mhd::utils::computeEnergy function -// ============================================================================= -/*! - * \brief Test the mhd::utils::computeEnergy function with the standard set of - * parameters - * - */ -TEST(tMHDComputeEnergy, CorrectInputExpectCorrectOutput) -{ - testParams parameters; - std::vector fiducialEnergies{3.3366124363499995e-100, 137786230.15630624, 9.2884430880010847e+301}; - - for (size_t i = 0; i < parameters.names.size(); i++) { - Real testEnergy = - mhd::utils::computeEnergy(parameters.pressureGas.at(i), parameters.density.at(i), parameters.velocityX.at(i), - parameters.velocityY.at(i), parameters.velocityZ.at(i), parameters.magneticX.at(i), - parameters.magneticY.at(i), parameters.magneticZ.at(i), parameters.gamma); - - testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); - } -} - -/*! - * \brief Test the mhd::utils::computeEnergy function with a the standard set of - * parameters except pressure is now negative - * - */ -TEST(tMHDComputeEnergy, NegativePressureExpectAutomaticFix) -{ - testParams parameters; - std::vector fiducialEnergies{3.3366124363499995e-100, 137784928.56204093, 9.2884430880010847e+301}; - - for (size_t i = 0; i < parameters.names.size(); i++) { - Real testEnergy = - mhd::utils::computeEnergy(-parameters.pressureGas.at(i), parameters.density.at(i), parameters.velocityX.at(i), - parameters.velocityY.at(i), parameters.velocityZ.at(i), parameters.magneticX.at(i), - parameters.magneticY.at(i), parameters.magneticZ.at(i), parameters.gamma); - - testingUtilities::checkResults(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); - } -} -// ============================================================================= -// End of tests for the mhd::utils::computeEnergy function -// ============================================================================= - -// ============================================================================= -// Tests for the mhd::utils::computeGasPressure function -// ============================================================================= -/*! - * \brief Test the mhd::utils::computeGasPressure function with the standard set - * of parameters. Energy has been increased to avoid negative pressures - * - */ -TEST(tMHDComputeGasPressure, CorrectInputExpectCorrectOutput) -{ - testParams parameters; - std::vector energyMultiplier{3, 1.0E4, 1.0E105}; - std::vector fiducialGasPressures{1.8586864490415075e-100, 4591434.7663756227, 1.29869419465575e+205}; - - for (size_t i = 0; i < parameters.names.size(); i++) { - Real testGasPressure = mhd::utils::computeGasPressure( - energyMultiplier.at(i) * parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), - parameters.momentumY.at(i), parameters.momentumZ.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), - parameters.magneticZ.at(i), parameters.gamma); - - testingUtilities::checkResults(fiducialGasPressures.at(i), testGasPressure, parameters.names.at(i)); - } -} - -/*! - * \brief Test the mhd::utils::computeGasPressure function with a the standard - * set of parameters which produce negative pressures - * - */ -TEST(tMHDComputeGasPressure, NegativePressureExpectAutomaticFix) -{ - testParams parameters; - - for (size_t i = 0; i < parameters.names.size(); i++) { - Real testGasPressure = mhd::utils::computeGasPressure( - parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), parameters.momentumY.at(i), - parameters.momentumZ.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), parameters.magneticZ.at(i), - parameters.gamma); - - // I'm using the binary equality assertion here since in the case of - // negative pressure the function should return exactly TINY_NUMBER - EXPECT_EQ(TINY_NUMBER, testGasPressure) << "Difference in " << parameters.names.at(i) << std::endl; - } -} -// ============================================================================= -// End of tests for the mhd::utils::computeGasPressure function -// ============================================================================= - // ============================================================================= // Tests for the mhd::utils::computeThermalEnergy function // ============================================================================= diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu index d7fdaf3d3..e689e2a5f 100644 --- a/src/utils/reduction_utilities_tests.cu +++ b/src/utils/reduction_utilities_tests.cu @@ -43,8 +43,8 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) std::mt19937 prng(1); std::uniform_real_distribution doubleRand(-std::abs(maxValue) - 1, std::abs(maxValue) - 1); std::uniform_int_distribution intRand(0, host_grid.size() - 1); - for (size_t i = 0; i < host_grid.size(); i++) { - host_grid.at(i) = doubleRand(prng); + for (Real& host_data : host_grid) { + host_data = doubleRand(prng); } host_grid.at(intRand(prng)) = maxValue; diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index 79dc8d11b..02aaadd68 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -71,17 +71,11 @@ bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int6 // Perform the ULP check which is for numbers far from zero and perform the absolute check which is for numbers near // zero - if (ulpsDiff <= ulpsEpsilon or absoluteDiff <= fixedEpsilon) { - return true; - } - // if the checks don't pass indicate test failure - else { - return false; - } + return ulpsDiff <= ulpsEpsilon or absoluteDiff <= fixedEpsilon; } // ========================================================================= -void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_value, double fid_value, +void wrapperEqual(int i, int j, int k, std::string const &dataSetName, double test_value, double fid_value, double fixedEpsilon = 5.0E-12) { std::string outString; @@ -97,7 +91,7 @@ void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_valu ASSERT_NO_FATAL_FAILURE(checkResults<1>(fid_value, test_value, outString, fixedEpsilon)); } -void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value) +void analyticConstant(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double value) { std::vector testDims(3, 1); std::vector testData = testObject.loadTestFieldData(dataSetName, testDims); @@ -112,8 +106,8 @@ void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataS } } -void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName, double constant, double amplitude, - double kx, double ky, double kz, double phase, double tolerance) +void analyticSine(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double constant, + double amplitude, double kx, double ky, double kz, double phase, double tolerance) { std::vector testDims(3, 1); std::vector testData = testObject.loadTestFieldData(dataSetName, testDims); diff --git a/src/utils/testing_utilities.h b/src/utils/testing_utilities.h index 12daf0969..7057e01e9 100644 --- a/src/utils/testing_utilities.h +++ b/src/utils/testing_utilities.h @@ -95,13 +95,13 @@ bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int6 double const &fixedEpsilon = 1E-14, int64_t const &ulpsEpsilon = 4); // ========================================================================= -void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_value, double fid_value, +void wrapperEqual(int i, int j, int k, std::string const &dataSetName, double test_value, double fid_value, double fixedEpsilon); -void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value); +void analyticConstant(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double value); -void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName, double constant, double amplitude, - double kx, double ky, double kz, double phase, double tolerance); +void analyticSine(systemTest::SystemTestRunner testObject, std::string const &dataSetName, double constant, + double amplitude, double kx, double ky, double kz, double phase, double tolerance); // ========================================================================= /*! @@ -120,7 +120,7 @@ void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetNa * values are ignored and default behaviour is used */ template -void checkResults(double fiducialNumber, double testNumber, std::string outString, double fixedEpsilon = -999, +void checkResults(double fiducialNumber, double testNumber, std::string const &outString, double fixedEpsilon = -999, int64_t ulpsEpsilon = -999) { // Check for equality and if not equal return difference diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 05c0546be..133971b68 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -14,6 +14,7 @@ void OneTime::Start() { + cudaDeviceSynchronize(); if (inactive) { return; } @@ -29,6 +30,7 @@ void OneTime::Subtract(Real time_to_subtract) void OneTime::End() { + cudaDeviceSynchronize(); if (inactive) { return; } @@ -96,6 +98,7 @@ void Time::Initialize() #ifdef PARTICLES &(Calc_dt = OneTime("Calc_dt")), #endif + &(Hydro_Integrator = OneTime("Hydro_Integrator")), &(Hydro = OneTime("Hydro")), &(Boundaries = OneTime("Boundaries")), #ifdef GRAVITY @@ -109,8 +112,11 @@ void Time::Initialize() &(Advance_Part_1 = OneTime("Advance_Part_1")), &(Advance_Part_2 = OneTime("Advance_Part_2")), #endif + #ifdef COOLING_GPU + &(Cooling_GPU = OneTime("Cooling_GPU")), + #endif #ifdef COOLING_GRACKLE - &(Cooling = OneTime("Cooling")), + &(Cooling_Grackle = OneTime("Cooling_Grackle")), #endif #ifdef CHEMISTRY_GPU &(Chemistry = OneTime("Chemistry")), diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h index 2438c1595..09e209f8b 100644 --- a/src/utils/timing_functions.h +++ b/src/utils/timing_functions.h @@ -45,6 +45,7 @@ class Time OneTime Total; OneTime Calc_dt; + OneTime Hydro_Integrator; OneTime Hydro; OneTime Boundaries; OneTime Grav_Potential; @@ -54,7 +55,8 @@ class Time OneTime Part_Dens_Transf; OneTime Advance_Part_1; OneTime Advance_Part_2; - OneTime Cooling; + OneTime Cooling_GPU; + OneTime Cooling_Grackle; OneTime Chemistry; OneTime Feedback; OneTime FeedbackAnalysis;