diff --git a/.gitmodules b/.gitmodules index c9a26c699..e69de29bb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "cholla-tests-data"] - path = cholla-tests-data - url = https://github.com/cholla-hydro/cholla-tests-data.git diff --git a/Makefile b/Makefile index 7ed600340..31e8c6380 100644 --- a/Makefile +++ b/Makefile @@ -11,9 +11,9 @@ CUDA_ARCH ?= sm_70 DIRS := src src/analysis src/chemistry_gpu src/cooling src/cooling_grackle src/cosmology \ src/cpu src/global src/gravity src/gravity/paris src/grid src/hydro \ - src/integrators src/io src/main.cpp src/main_tests.cpp src/mhd\ + src/integrators src/io src/main.cpp src/main_tests.cpp src/mhd \ src/model src/mpi src/old_cholla src/particles src/reconstruction \ - src/riemann_solvers src/system_tests src/utils src/dust src/cluster \ + src/riemann_solvers src/system_tests src/utils src/dust src/cluster src/cloud_tracking \ SUFFIX ?= .$(TYPE).$(MACHINE) diff --git a/builds/make.type.dust b/builds/make.type.dust index 1669a4077..a039eff9b 100644 --- a/builds/make.type.dust +++ b/builds/make.type.dust @@ -8,7 +8,7 @@ MPI_GPU ?= DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPLMC +DFLAGS += -DPPMP DFLAGS += -DHLLC DFLAGS += -DDE @@ -24,10 +24,11 @@ DFLAGS += -DSCALAR_FLOOR # Define dust macro DFLAGS += -DDUST +DFLAGS += -DOUTFLOW_ANALYSIS +# DFLAGS += -DCLOUD_TRACKING # Apply the cooling in the GPU from precomputed tables DFLAGS += -DCOOLING_GPU -DFLAGS += -DCLOUDY_COOLING #Measure the Timing of the different stages #DFLAGS += -DCPU_TIME @@ -37,10 +38,10 @@ DFLAGS += -DPROJECTION DFLAGS += $(OUTPUT) -DFLAGS += -DOUTPUT_ALWAYS +# DFLAGS += -DOUTPUT_ALWAYS #Select if the Hydro Conserved data will reside in the GPU #and the MPI transfers are done from the GPU #If not specified, MPI_GPU is off by default #This is set in the system make.host file -DFLAGS += $(MPI_GPU) \ No newline at end of file +DFLAGS += $(MPI_GPU) diff --git a/builds/make.type.hydro b/builds/make.type.hydro index 9e9b1d77c..cc8bbfbe6 100644 --- a/builds/make.type.hydro +++ b/builds/make.type.hydro @@ -13,19 +13,28 @@ DFLAGS += -DVL endif # Apply a density and temperature floor -DFLAGS += -DDENSITY_FLOOR -DFLAGS += -DTEMPERATURE_FLOOR +#DFLAGS += -DDENSITY_FLOOR +#DFLAGS += -DTEMPERATURE_FLOOR # Solve the Gas Internal Energy usisng a Dual Energy Formalism -#DFLAGS += -DDE +DFLAGS += -DDE + +DFLAGS += -DSLICES # Apply cooling on the GPU from precomputed tables #DFLAGS += -DCOOLING_GPU +DFLAGS += -DAVERAGE_SLOW_CELLS + +# Apply reference frame shift to track cloud +DFLAGS += -DCLOUD_TRACKING # Measure the Timing of the different stages #DFLAGS += -DCPU_TIME # Select output format -# Can also add -DSLICES and -DPROJECTIONS -OUTPUT ?= -DOUTPUT -DHDF5 +# Can also add -DSLICES and -DPROJECTION +OUTPUT ?= -DOUTPUT -DHDF5 DFLAGS += $(OUTPUT) + +DFLAGS += -DSLICES +DFLAGS += -DPROJECTION \ No newline at end of file diff --git a/builds/make.type.m82 b/builds/make.type.m82 index f5c4ef54b..b3996715f 100644 --- a/builds/make.type.m82 +++ b/builds/make.type.m82 @@ -2,7 +2,9 @@ #-- separated output flag so that it can be overriden in target-specific # for make check -OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES -DPROJECTION -DROTATED_PROJECTION +# OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES -DPROJECTION -DROTATED_PROJECTION + +OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES MPI_GPU ?= @@ -19,10 +21,14 @@ DFLAGS += -DSTATIC_GRAV # EVOLVE SCALAR (INJECTED BY SUPERNOVA) DFLAGS += -DSCALAR -DFLAGS += -DBASIC_SCALAR +# DFLAGS += -DBASIC_SCALAR + +DFLAGS += -DDUST +# DFLAGS += -DOUTFLOW_ANALYSIS +DFLAGS += -DSCALAR_FLOOR # MW_MODEL vs m82 (default) -DFLAGS += -DMW_MODEL +# DFLAGS += -DMW_MODEL ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE diff --git a/builds/make.type.particles b/builds/make.type.particles index f718f975b..2951f628f 100644 --- a/builds/make.type.particles +++ b/builds/make.type.particles @@ -11,11 +11,12 @@ DFLAGS += -DPARTICLES_GPU #Solve only an N-Body Simulation (No Hydro, DM Only simulation) -# DFLAGS += -DONLY_PARTICLES +DFLAGS += -DONLY_PARTICLES # Track Particles IDs and write them to the output files DFLAGS += -DPARTICLE_IDS +DFLAGS += -DCLOUD_TRACKING # Track Particle Ages ( Stellar Populations ) diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp index 3dab7b6da..5fe0e7543 100644 --- a/src/analysis/feedback_analysis.cpp +++ b/src/analysis/feedback_analysis.cpp @@ -87,7 +87,7 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) #ifdef MPI_CHOLLA MPI_Allreduce(&partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); #else - total_mass = partial_mass; + total_mass = partial_mass; #endif for (k = G.H.n_ghost; k < G.H.nz - G.H.n_ghost; k++) { diff --git a/src/analysis/outflow_analysis.cu b/src/analysis/outflow_analysis.cu new file mode 100644 index 000000000..18ea297e8 --- /dev/null +++ b/src/analysis/outflow_analysis.cu @@ -0,0 +1,241 @@ +#ifdef DUST + #ifdef OUTFLOW_ANALYSIS + // STL includes + #include + #include + + #include + #include + #include + + #include "../analysis/outflow_analysis.h" + #include "../grid/grid_enum.h" + #include "../utils/DeviceVector.h" + #include "../utils/cuda_utilities.h" + #include "../utils/hydro_utilities.h" + #include "../utils/reduction_utilities.h" + +void Outflow_Analysis(Real *dev_conserved, int nx, int ny, int nz, Real dx, Real dy, Real dz, int n_ghost, int n_fields, + Real *mass_cloud, Real *mass_dust, Real density_cloud_init) +{ + cuda_utilities::AutomaticLaunchParams static const launchParams(Outflow_Analysis_Kernel); + + cuda_utilities::DeviceVector dev_mass_cloud(1); + cuda_utilities::DeviceVector dev_mass_dust(1); + + hipLaunchKernelGGL(Outflow_Analysis_Kernel, launchParams.get_numBlocks(), launchParams.get_threadsPerBlock(), 0, 0, dev_conserved, + nx, ny, nz, dx, dy, dz, n_ghost, n_fields, dev_mass_cloud.data(), dev_mass_dust.data(), + density_cloud_init); + cudaDeviceSynchronize(); + + *mass_cloud = dev_mass_cloud[0]; + *mass_dust = dev_mass_dust[0]; +} + +__global__ void Outflow_Analysis_Kernel(Real *dev_conserved, int nx, int ny, int nz, Real dx, Real dy, Real dz, + int n_ghost, int n_fields, Real *mass_cloud, Real *mass_dust, + Real density_cloud_init) +{ + int xid, yid, zid, n_cells; + n_cells = nx * ny * nz; + + Real density_gas; + Real mass_cloud_stride = 0.0; + Real density_dust; + Real mass_dust_stride = 0.0; + + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { + cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); + // grid cells + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + density_gas = dev_conserved[id + n_cells * grid_enum::density]; + density_dust = dev_conserved[id + n_cells * grid_enum::dust_density]; + + mass_dust_stride += density_dust * dx * dy * dz; + + if ((density_gas * DENSITY_UNIT) >= (density_cloud_init / 30)) { + mass_cloud_stride += density_gas * dx * dy * dz; + } + } + } + + __syncthreads(); + + reduction_utilities::Grid_Reduce_Add(mass_cloud_stride, mass_cloud); + reduction_utilities::Grid_Reduce_Add(mass_dust_stride, mass_dust); + + /* + // int id, xid, yid, zid, gid; + + // calculate ghost cell ID and i,j,k in GPU grid + int id = threadIdx.x + blockIdx.x * blockDim.x; + + int isize, jsize; + + // +x boundary first + isize = n_ghost; + jsize = ny; + + // not true i,j,k but relative i,j,k in the GPU grid + zid = id / (isize * jsize); + yid = (id - zid * isize * jsize) / isize; + xid = id - zid * isize * jsize - yid * isize; + + // map thread id to ghost cell id + xid += nx - n_ghost; // +x boundary + int gid = xid + yid * nx + zid * nx * ny; + + // boundary cells + // -x boundary + if (xid == n_ghost && xid < nx && yid < ny && zid < nz) { + velocity_x = dev_conserved[gid + n_cells * grid_enum::momentum_x] / density_gas; + density_gas = dev_conserved[gid + n_cells * grid_enum::density]; + #ifdef DUST + density_dust = dev_conserved[gid + n_cells * grid_enum::dust_density]; + #endif + if (velocity_x < 0) { + if ((density_gas * DENSITY_UNIT) >= (density_cloud_init / 3)) { + printf("-x %e/n", density_gas * DENSITY_UNIT); + mass_cloud_bndry_stride += density_gas * dx * dy * dz; + rate_cloud_stride += density_gas * std::abs(velocity_x) * dx * dx; + } + #ifdef DUST + mass_dust_bndry_stride += density_dust * dx * dy * dz; + rate_dust_stride += density_dust * std::abs(velocity_x) * dx * dx; + #endif // DUST + } + } + // +x boundary + if (xid == nx - n_ghost && xid < nx && yid < ny && zid < nz) { + velocity_x = dev_conserved[gid + n_cells * grid_enum::momentum_x] / density_gas; + density_gas = dev_conserved[gid + n_cells * grid_enum::density]; + #ifdef DUST + density_dust = dev_conserved[gid + n_cells * grid_enum::dust_density]; + #endif + if (velocity_x > 0) { + if ((density_gas * DENSITY_UNIT) >= (density_cloud_init / 3)) { + printf("+x %e/n", density_gas * DENSITY_UNIT); + mass_cloud_bndry_stride += density_gas * dx * dy * dz; + rate_cloud_stride += density_gas * std::abs(velocity_x) * dx * dx; + } + #ifdef DUST + mass_dust_bndry_stride += density_dust * dx * dy * dz; + rate_dust_stride += density_dust * std::abs(velocity_x) * dx * dx; + #endif // DUST + } + } + + // +y boundary next + isize = nx; + jsize = n_ghost; + // ksize = nz; + + // not true i,j,k but relative i,j,k + zid = id / (isize * jsize); + yid = (id - zid * isize * jsize) / isize; + xid = id - zid * isize * jsize - yid * isize; + + // map thread id to ghost cell id + yid += ny - n_ghost; + gid = xid + yid * nx + zid * nx * ny; + + // -y boundary + if (xid < nx && yid == n_ghost && yid < ny && zid < nz) { + velocity_y = dev_conserved[gid + n_cells * grid_enum::momentum_y] / density_gas; + density_gas = dev_conserved[gid + n_cells * grid_enum::density]; + #ifdef DUST + density_dust = dev_conserved[gid + n_cells * grid_enum::dust_density]; + #endif + if (velocity_y < 0) { + if ((density_gas * DENSITY_UNIT) >= (density_cloud_init / 3)) { + // printf("-y cloud %e/n", density_gas * DENSITY_UNIT); + printf("yid: %d\n", yid); + mass_cloud_bndry_stride += density_gas * dx * dy * dz; + rate_cloud_stride += density_gas * std::abs(velocity_y) * dx * dx; + } + #ifdef DUST + // printf("-y dust %e/n", density_dust * DENSITY_UNIT); + mass_dust_bndry_stride += density_dust * dx * dy * dz; + rate_dust_stride += density_dust * std::abs(velocity_y) * dx * dx; + #endif // DUST + } + } + // +y boundary + if (xid < nx && yid == ny - n_ghost && yid < ny && zid < nz) { + velocity_y = dev_conserved[gid + n_cells * grid_enum::momentum_y] / density_gas; + density_gas = dev_conserved[gid + n_cells * grid_enum::density]; + #ifdef DUST + density_dust = dev_conserved[gid + n_cells * grid_enum::dust_density]; + #endif + if (velocity_y > 0) { + if ((density_gas * DENSITY_UNIT) >= (density_cloud_init / 3)) { + // printf("+y %e/n", density_gas * DENSITY_UNIT); + printf("yid: %d\n", yid); + printf("yid: %d\n", ny_real); + mass_cloud_bndry_stride += density_gas * dx * dy * dz; + rate_cloud_stride += density_gas * std::abs(velocity_y) * dx * dx; + } + #ifdef DUST + mass_dust_bndry_stride += density_dust * dx * dy * dz; + rate_dust_stride += density_dust * std::abs(velocity_y) * dx * dx; + #endif // DUST + } + } + + isize = nx; + jsize = ny; + // ksize = n_ghost; + + // not true i,j,k but relative i,j,k + zid = id / (isize * jsize); + yid = (id - zid * isize * jsize) / isize; + xid = id - zid * isize * jsize - yid * isize; + + // map thread id to ghost cell id + zid += nz - n_ghost; + gid = xid + yid * nx + zid * nx * ny; + + // -z boundary + if (xid < nx && yid < ny && zid == n_ghost && zid < nz) { + velocity_z = dev_conserved[gid + n_cells * grid_enum::momentum_z] / density_gas; + density_gas = dev_conserved[gid + n_cells * grid_enum::density]; + #ifdef DUST + density_dust = dev_conserved[gid + n_cells * grid_enum::dust_density]; + #endif + if (velocity_z < 0) { + if ((density_gas * DENSITY_UNIT) >= (density_cloud_init / 3)) { + printf("-z %e/n", density_gas * DENSITY_UNIT); + mass_cloud_bndry_stride += density_gas * dx * dy * dz; + rate_cloud_stride += density_gas * std::abs(velocity_z) * dx * dx; + } + #ifdef DUST + mass_dust_bndry_stride += density_dust * dx * dy * dz; + rate_dust_stride += density_dust * std::abs(velocity_z) * dx * dx; + #endif // DUST + } + } + // +z bondary + if (xid < nx && yid < ny && zid == nz - n_ghost && zid < nz) { + velocity_z = dev_conserved[gid + n_cells * grid_enum::momentum_z] / density_gas; + density_gas = dev_conserved[gid + n_cells * grid_enum::density]; + #ifdef DUST + density_dust = dev_conserved[gid + n_cells * grid_enum::dust_density]; + #endif + if (velocity_z > 0) { + if ((density_gas * DENSITY_UNIT) >= (density_cloud_init / 3)) { + printf("+z %e/n", density_gas * DENSITY_UNIT); + mass_cloud_bndry_stride += density_gas * dx * dy * dz; + rate_cloud_stride += density_gas * std::abs(velocity_z) * dx * dx; + } + #ifdef DUST + mass_dust_bndry_stride += density_dust * dx * dy * dz; + rate_dust_stride += density_dust * std::abs(velocity_z) * dx * dx; + #endif // DUST + } + } + */ +} + + #endif // OUTFLOW_ANALYSIS +#endif // DUST diff --git a/src/analysis/outflow_analysis.h b/src/analysis/outflow_analysis.h new file mode 100644 index 000000000..1f945d4f5 --- /dev/null +++ b/src/analysis/outflow_analysis.h @@ -0,0 +1,15 @@ +#ifdef OUTFLOW_ANALYSIS + #ifndef OUTFLOW_ANALYSIS_CUDA_H + #define OUTFLOW_ANALYSIS_CUDA_H + + #include "../global/global.h" + #include "../utils/gpu.hpp" + +void Outflow_Analysis(Real *dev_conserved, int nx, int ny, int nz, Real dx, Real dy, Real dz, int n_ghost, int n_fields, + Real *mass_cloud, Real *mass_dust, Real density_cloud_init); + +__global__ void Outflow_Analysis_Kernel(Real *dev_conserved, int nx, int ny, int nz, Real dx, Real dy, Real dz, + int n_ghost, int n_fields, Real *mass_cloud, Real *mass_dust, + Real density_cloud_init); + #endif // OUTFLOW_ANALYSIS_H +#endif // OUTFLOW_ANALYSIS diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp index 7999a6d55..65e3af691 100644 --- a/src/chemistry_gpu/chemistry_functions.cpp +++ b/src/chemistry_gpu/chemistry_functions.cpp @@ -228,7 +228,7 @@ void Grid3D::Update_Chemistry() #ifdef COSMOLOGY Chem.H.current_z = Cosmo.current_z; #else - Chem.H.current_z = 0; + Chem.H.current_z = 0; #endif Do_Chemistry_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, Chem.H); diff --git a/src/cloud_tracking/cloud_tracking.cu b/src/cloud_tracking/cloud_tracking.cu new file mode 100644 index 000000000..424fafb3a --- /dev/null +++ b/src/cloud_tracking/cloud_tracking.cu @@ -0,0 +1,125 @@ +#ifdef CLOUD_TRACKING + + // STL includes + #include + + #include + #include + #include + + #include "../cloud_tracking/cloud_tracking.h" + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../grid/grid3D.h" + #include "../grid/grid_enum.h" + #include "../utils/DeviceVector.h" + #include "../utils/cuda_utilities.h" + #include "../utils/gpu.hpp" + #include "../utils/hydro_utilities.h" + #include "../utils/reduction_utilities.h" + +void Cloud_Velocity_Reduction(Real *dev_conserved, int nx, int ny, int nz, Real dx, Real dy, Real dz, int n_ghost, + int n_fields, Real density_cloud_init, Real density_wind_init, Real *mass_cloud, + Real *integrand_cloud) +{ + cuda_utilities::AutomaticLaunchParams static const launchParams(Cloud_Reduction_Kernel); + + cuda_utilities::DeviceVector dev_mass_cloud(1, true); + cuda_utilities::DeviceVector dev_integrand_cloud(1, true); + + // Initialize host vectors to copy results to + std::vector host_mass_cloud{0}; + std::vector host_integrand_cloud{0}; + + // .data() gets device vector pointers + hipLaunchKernelGGL(Cloud_Reduction_Kernel, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_conserved, + nx, ny, nz, dx, dy, dz, n_ghost, n_fields, density_cloud_init, density_wind_init, + dev_mass_cloud.data(), dev_integrand_cloud.data()); + cudaDeviceSynchronize(); + + // Copy result of reductions from device to host + dev_mass_cloud.cpyDeviceToHost(host_mass_cloud); + dev_integrand_cloud.cpyDeviceToHost(host_integrand_cloud); + + *mass_cloud = host_mass_cloud[0]; + *integrand_cloud = host_integrand_cloud[0]; +} + +void Update_Grid_Frame(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, + Real velocity_x_cloud_avg) +{ + cuda_utilities::AutomaticLaunchParams static const launchParams(Frame_Shift_Kernel); + + // .data() gets device vector pointers + hipLaunchKernelGGL(Frame_Shift_Kernel, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_conserved, nx, + ny, nz, n_ghost, n_fields, velocity_x_cloud_avg); + cudaDeviceSynchronize(); +} + +__global__ void Cloud_Reduction_Kernel(Real *dev_conserved, int nx, int ny, int nz, Real dx, Real dy, Real dz, + int n_ghost, int n_fields, Real density_cloud_init, Real density_wind_init, + Real *mass_cloud, Real *integrand_cloud) +{ + int xid, yid, zid, n_cells; + n_cells = nx * ny * nz; + + Real integrand_stride = 0.0; + Real mass_stride = 0.0; + + Real density, velocity_x, mass; + + // Grid stride loop + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { + cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); + + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + density = dev_conserved[id + n_cells * grid_enum::density]; + velocity_x = dev_conserved[id + n_cells * grid_enum::momentum_x] / density; + mass = density * dx * dy * dz; + // if ((density * DENSITY_UNIT) >= (pow(density_cloud_init * density_wind_init, 0.5))) { + if ((density * DENSITY_UNIT) >= (density_cloud_init / pow(1e3, 1 / 2))) { + mass_stride += mass; + // (Shin et al. (2008) eq. 9) + integrand_stride += velocity_x * density * dx * dy * dz; + } + } + } + __syncthreads(); + + reduction_utilities::Grid_Reduce_Add(mass_stride, mass_cloud); + reduction_utilities::Grid_Reduce_Add(integrand_stride, integrand_cloud); +} + +__global__ void Frame_Shift_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, + Real velocity_x_cloud_avg) +{ + int xid, yid, zid, n_cells; + n_cells = nx * ny * nz; + + Real density, velocity_x, velocity_y, velocity_z, energy, energy_internal; + + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { + cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); + + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + density = dev_conserved[id + n_cells * grid_enum::density]; + velocity_x = dev_conserved[id + n_cells * grid_enum::momentum_x] / density; + velocity_y = dev_conserved[id + n_cells * grid_enum::momentum_y] / density; + velocity_z = dev_conserved[id + n_cells * grid_enum::momentum_z] / density; + energy = dev_conserved[id + n_cells * grid_enum::Energy]; + + energy_internal = energy - 0.5 * density * (pow(velocity_x, 2) + pow(velocity_y, 2) + pow(velocity_z, 2)); + + // Apply frame of reference shift + dev_conserved[id + n_cells * grid_enum::momentum_x] = (velocity_x - velocity_x_cloud_avg) * density; + dev_conserved[id + n_cells * grid_enum::Energy] = + energy_internal + + 0.5 * density * (pow(velocity_x - velocity_x_cloud_avg, 2) + pow(velocity_y, 2) + pow(velocity_z, 2)); + } + } + __syncthreads(); +} + +#endif // CLOUD_TRACKING \ No newline at end of file diff --git a/src/cloud_tracking/cloud_tracking.h b/src/cloud_tracking/cloud_tracking.h new file mode 100644 index 000000000..5a563db52 --- /dev/null +++ b/src/cloud_tracking/cloud_tracking.h @@ -0,0 +1,23 @@ +#ifdef CLOUD_TRACKING + #ifndef CLOUD_TRACKING_CUDA_H + #define CLOUD_TRACKING_CUDA_H + + #include "../global/global.h" + #include "../utils/gpu.hpp" + +void Cloud_Velocity_Reduction(Real *dev_conserved, int nx, int ny, int nz, Real dx, Real dy, Real dz, int n_ghost, + int n_fields, Real density_cloud_init, Real density_wind_init, Real *mass_cloud, + Real *integrand_cloud); + +__global__ void Cloud_Reduction_Kernel(Real *dev_conserved, int nx, int ny, int nz, Real dx, Real dy, Real dz, + int n_ghost, int n_fields, Real density_cloud_init, Real density_wind_init, + Real *mass_cloud, Real *integrand_cloud); + +void Update_Grid_Frame(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, + Real velocity_x_cloud_avg); + +__global__ void Frame_Shift_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, + Real velocity_x_cloud_avg); + + #endif // CLOUD_TRACKING_H +#endif // CLOUD_TRACKING \ No newline at end of file diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index b5c74ec7c..4c24b657e 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -121,6 +121,7 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int // call the cooling function #ifdef CLOUDY_COOL cool = Cloudy_cool(n, T, coolTexObj, heatTexObj); +<<<<<<< #else cool = CIE_cool(n, T); #endif @@ -164,7 +165,7 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int #ifdef CLOUDY_COOL cool = Cloudy_cool(n, T, coolTexObj, heatTexObj); #else - cool = CIE_cool(n, T); + cool = CIE_cool(n, T); // printf("%d %d %d %e %e %e\n", xid, yid, zid, n, T, cool); #endif diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index a7f5c36cb..c5f2a8078 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -89,7 +89,7 @@ void Cool_GK::Initialize(struct Parameters *P, Cosmology &Cosmo) data->metal_cooling = 1; // metal cooling off #else chprintf("WARNING: Metal Cooling is Off. \n"); - data->metal_cooling = 0; // metal cooling off + data->metal_cooling = 0; // metal cooling off #endif #ifdef PARALLEL_OMP diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu index 8b72facdf..f5cb6b7b7 100644 --- a/src/dust/dust_cuda.cu +++ b/src/dust/dust_cuda.cu @@ -9,6 +9,7 @@ #ifdef DUST // STL includes + #include #include #include @@ -21,24 +22,34 @@ #include "../global/global_cuda.h" #include "../grid/grid3D.h" #include "../grid/grid_enum.h" + #include "../utils/DeviceVector.h" #include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" #include "../utils/hydro_utilities.h" + #include "../utils/reduction_utilities.h" -void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, - Real grain_radius) +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, + Real dt, Real gamma, Real grain_radius, Real *mass_hot, Real *mass_mixed) { int n_cells = nx * ny * nz; int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma, - grain_radius); + + cuda_utilities::DeviceVector dev_mass_mixed(1, true); + cuda_utilities::DeviceVector dev_mass_hot(1, true); + + hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dx, dy, dz, + dt, gamma, grain_radius, dev_mass_mixed.data(), dev_mass_hot.data()); GPU_Error_Check(); + cudaDeviceSynchronize(); + + *mass_mixed = dev_mass_mixed[0]; + *mass_hot = dev_mass_hot[0]; } -__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, - Real grain_radius) +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, + Real dz, Real dt, Real gamma, Real grain_radius, Real *mass_hot, Real *mass_mixed) { // get grid indices int n_cells = nx * ny * nz; @@ -54,11 +65,13 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // define physics variables Real density_gas, density_dust; // fluid mass densities Real number_density; // gas number density - Real mu = 0.6; // mean molecular weight + Real mu = 0.6; // mean molecular weight + Real sputtered_hot = 0; + Real sputtered_mixed = 0; // mixed and hot-phase sputtered dust masses // define integration variables Real dd_dt; // instantaneous rate of change in dust density - Real dd; // change in dust density at current timestep + Real dd = 0; // change in dust density at current timestep Real dd_max = 0.01; // allowable percentage of dust density increase Real dt_sub; // refined timestep @@ -67,6 +80,7 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g density_gas = dev_conserved[id + n_cells * grid_enum::density]; density_dust = dev_conserved[id + n_cells * grid_enum::dust_density]; + printf("%d %f", id, density_dust); // convert mass density to number density number_density = density_gas * DENSITY_UNIT / (mu * MP); @@ -110,8 +124,18 @@ __global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_g // update dust density density_dust += dd; + if (temperature >= 1e6) { + sputtered_hot += abs(dd * dx * dy * dz); + } else { + sputtered_mixed += abs(dd * dx * dy * dz); + } + dev_conserved[id + n_cells * grid_enum::dust_density] = density_dust; } + __syncthreads(); + + reduction_utilities::Grid_Reduce_Add(sputtered_hot, mass_hot); + reduction_utilities::Grid_Reduce_Add(sputtered_mixed, mass_mixed); } // McKinnon et al. (2017) sputtering timescale diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h index 212901e8a..3b52cf96f 100644 --- a/src/dust/dust_cuda.h +++ b/src/dust/dust_cuda.h @@ -27,8 +27,8 @@ * \param[in] dt Simulation timestep * \param[in] gamma Specific heat ratio */ -void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, - Real grain_radius); +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, + Real dt, Real gamma, Real grain_radius, Real *mass_hot, Real *mass_mixed); /*! * \brief Compute the change in dust density for a cell and update its value in dev_conserved. @@ -43,8 +43,8 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n * \param[in] dt Simulation timestep * \param[in] gamma Specific heat ratio */ -__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, - Real grain_radius); +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, + Real dz, Real dt, Real gamma, Real grain_radius, Real *mass_hot, Real *mass_mixed); /*! * \brief Compute the sputtering timescale based on a cell's density and temperature. diff --git a/src/global/global.cpp b/src/global/global.cpp index 39d4fd178..cabf95b0e 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -101,10 +101,27 @@ char *Trim(char *s) } // NOLINTNEXTLINE(cert-err58-cpp) -const std::set optionalParams = { - "flag_delta", "ddelta_dt", "n_delta", "Lz", "Lx", "phi", "theta", - "delta", "nzr", "nxr", "H0", "Omega_M", "Omega_L", "Init_redshift", - "End_redshift", "tile_length", "n_proc_x", "n_proc_y", "n_proc_z"}; +const std::set optionalParams = {"flag_delta", + "ddelta_dt", + "n_delta", + "Lz", + "Lx", + "phi", + "theta", + "delta", + "nzr", + "nxr", + "H0", + "Omega_M", + "Omega_L", + "Init_redshift", + "End_redshift", + "tile_length", + "n_proc_x", + "n_proc_y", + "n_proc_z", + "density_cloud_init", + "density_wind_init"}; /*! \fn int Is_Param_Valid(char *name); * \brief Verifies that a param is valid (even if not needed). Avoids @@ -495,10 +512,19 @@ void Parse_Param(char *name, char *value, struct Parameters *parms) } else if (strcmp(name, "supernova_rcl") == 0) { parms->supernova_rcl = atof(value); #endif +#ifdef CLOUD_TRACKING + } else if (strcmp(name, "density_wind_init") == 0) { + parms->density_wind_init = atof(value); +#endif +#if defined(OUTFLOW_ANALYSIS) || defined(CLOUD_TRACKING) + } else if (strcmp(name, "density_cloud_init") == 0) { + parms->density_cloud_init = atof(value); +#endif #ifdef SCALAR #ifdef DUST } else if (strcmp(name, "grain_radius") == 0) { - parms->grain_radius = atoi(value); + parms->grain_radius = atof(value); + chprintf("Grain radius: %e\n", parms->grain_radius); #endif #endif } else if (!Is_Param_Valid(name)) { diff --git a/src/global/global.h b/src/global/global.h index 74aa5c718..28e3a3672 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -309,6 +309,12 @@ struct Parameters { char scale_outputs_file[MAXLEN]; // File for the scale_factor output values // for cosmological simulations #endif // COSMOLOGY +#ifdef CLOUD_TRACKING + Real density_wind_init; +#endif +#if defined(CLOUD_TRACKING) || defined(OUTFLOW_ANALYSIS) + Real density_cloud_init; +#endif #ifdef TILED_INITIAL_CONDITIONS Real tile_length; #endif // TILED_INITIAL_CONDITIONS diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index 744f55825..8ed5fb590 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -134,7 +134,7 @@ void Grid3D::set_dt_Gravity() dt_particles = Calc_Particles_dt(); dt_particles = fmin(dt_particles, Particles.max_dt); #ifdef ONLY_PARTICLES - dt_min = dt_particles; + dt_min = dt_particles; chprintf(" dt_particles: %f \n", dt_particles); #else chprintf(" dt_hydro: %f dt_particles: %f \n", dt_hydro, dt_particles); @@ -208,7 +208,7 @@ Real Grav3D::Get_Average_Density() #ifdef MPI_CHOLLA dens_avrg_all = ReduceRealAvg(dens_mean); #else - dens_avrg_all = dens_mean; + dens_avrg_all = dens_mean; #endif dens_avrg = dens_avrg_all; @@ -527,8 +527,8 @@ void Grid3D::Compute_Gravitational_Potential(struct Parameters *P) input_density = Grav.F.density_d; output_potential = Grav.F.potential_d; #else - input_density = Grav.F.density_h; - output_potential = Grav.F.potential_h; + input_density = Grav.F.density_h; + output_potential = Grav.F.potential_h; #endif #ifdef SOR diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu index b92d19084..15de64a95 100644 --- a/src/gravity/gravity_functions_gpu.cu +++ b/src/gravity/gravity_functions_gpu.cu @@ -127,7 +127,7 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU() #ifdef COSMOLOGY cosmo_rho_0_gas = Cosmo.rho_0_gas; #else - cosmo_rho_0_gas = 1.0; + cosmo_rho_0_gas = 1.0; #endif // Copy the density from the device array to the Poisson input density array @@ -261,7 +261,7 @@ void Grid3D::Extrapolate_Grav_Potential_GPU() #ifdef COSMOLOGY cosmo_factor = Cosmo.current_a * Cosmo.current_a / Cosmo.phi_0_gas; #else - cosmo_factor = 1.0; + cosmo_factor = 1.0; #endif // set values for GPU kernels diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index d9201fe8b..d7a1be2bc 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -534,8 +534,13 @@ void Grid3D::Wind_Boundary() z_off = nz_local_start; #endif +#ifndef CLOUD_TRACKING Wind_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, x_off, y_off, z_off, H.dx, H.dy, H.dz, H.xbound, - H.ybound, H.zbound, gama, H.t); + H.ybound, H.zbound, gama, H.t, 0.0, 0.0); +#else /*CLOUD_TRACKING*/ + Wind_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, x_off, y_off, z_off, H.dx, H.dy, H.dz, H.xbound, + H.ybound, H.zbound, gama, H.t, H.velocity_x_cloud_avg, H.density_wind_init); +#endif /*CLOUD_TRACKING*/ } /*! \fn void Noh_Boundary() diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu index baf846d3c..4429b317f 100644 --- a/src/grid/cuda_boundaries.cu +++ b/src/grid/cuda_boundaries.cu @@ -304,23 +304,26 @@ __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real * __global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, - Real zbound, Real gamma, Real t) + Real zbound, Real gamma, Real t, Real velocity_x_cloud_avg, Real density_wind_init) { int id, xid, yid, zid, gid; - Real n_0, T_0; Real mu = 0.6; - Real vx, vy, vz, d_0, P_0; - n_0 = 1e-2; // same value as n_bg in cloud initial condition function (cm^-3) - T_0 = 3e6; // same value as T_bg in cloud initial condition function (K) + Real density, velocity_x, velocity_y, velocity_z, pressure, number_density; + + number_density = 1e-2; // same value as n_bg in cloud initial condition function (cm^-3) + Real temperature = 3e7; // same value as T_bg in cloud initial condition function (K) // same values as rho_bg and p_bg in cloud initial condition function - d_0 = n_0 * mu * MP / DENSITY_UNIT; - P_0 = n_0 * KB * T_0 / PRESSURE_UNIT; + density = number_density * mu * MP / DENSITY_UNIT; + pressure = number_density * KB * temperature / PRESSURE_UNIT; +#ifdef CLOUD_TRACKING + density = density_wind_init / DENSITY_UNIT; +#endif - vx = 100 * TIME_UNIT / KPC; // km/s * (cholla unit conversion) - vy = 0.0; - vz = 0.0; + velocity_x = 1000 * TIME_UNIT / KPC; // km/s * (cholla unit conversion) + velocity_y = 0.0; + velocity_z = 0.0; // calculate ghost cell ID and i,j,k in GPU grid id = threadIdx.x + blockIdx.x * blockDim.x; @@ -332,13 +335,29 @@ __global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int xid += 0; // -x boundary gid = xid + yid * nx + zid * nx * ny; - if (xid <= n_ghost && xid < nx && yid < ny && zid < nz) { + if (xid < n_ghost && xid < nx && yid < ny && zid < nz) { // set conserved variables - c_device[gid] = d_0; - c_device[gid + 1 * n_cells] = vx * d_0; - c_device[gid + 2 * n_cells] = vy * d_0; - c_device[gid + 3 * n_cells] = vz * d_0; - c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * d_0 * (vx * vx + vy * vy + vz * vz); + c_device[gid + n_cells * grid_enum::density] = density; + c_device[gid + n_cells * grid_enum::momentum_x] = velocity_x * density; + c_device[gid + n_cells * grid_enum::momentum_y] = velocity_y * density; + c_device[gid + n_cells * grid_enum::momentum_z] = velocity_z * density; + c_device[gid + n_cells * grid_enum::Energy] = + pressure / (gamma - 1.0) + + 0.5 * density * (velocity_x * velocity_x + velocity_y * velocity_y + velocity_z * velocity_z); +#ifdef CLOUD_TRACKING + c_device[gid + n_cells * grid_enum::density] = density; + c_device[gid + n_cells * grid_enum::momentum_x] = (velocity_x - velocity_x_cloud_avg) * density; + c_device[gid + n_cells * grid_enum::momentum_y] = velocity_y * density; + c_device[gid + n_cells * grid_enum::momentum_z] = velocity_z * density; + c_device[gid + n_cells * grid_enum::Energy] = + pressure / (gamma - 1.0) + + 0.5 * density * (pow(velocity_x - velocity_x_cloud_avg, 2) + pow(velocity_y, 2) + pow(velocity_z, 2)); +#endif // CLOUD_TRACKING +#ifdef SCALAR + #ifdef DUST + c_device[gid + n_cells * grid_enum::dust_density] = 0.0; + #endif +#endif } __syncthreads(); } @@ -514,7 +533,8 @@ __global__ void Noh_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int } void Wind_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, - int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) + int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t, + Real velocity_x_cloud_avg, Real density_wind_init) { // determine the size of the grid to launch // need at least as many threads as the largest boundary face @@ -529,7 +549,8 @@ void Wind_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, int // launch the boundary kernel hipLaunchKernelGGL(Wind_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, c_device, nx, ny, nz, n_cells, n_ghost, x_off, - y_off, z_off, dx, dy, dz, xbound, ybound, zbound, gamma, t); + y_off, z_off, dx, dy, dz, xbound, ybound, zbound, gamma, t, velocity_x_cloud_avg, + density_wind_init); } void Noh_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, @@ -549,4 +570,4 @@ void Noh_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, int // launch the boundary kernel hipLaunchKernelGGL(Noh_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, c_device, nx, ny, nz, n_cells, n_ghost, x_off, y_off, z_off, dx, dy, dz, xbound, ybound, zbound, gamma, t); -} \ No newline at end of file +} diff --git a/src/grid/cuda_boundaries.h b/src/grid/cuda_boundaries.h index bbf0a5ab8..da38eee11 100644 --- a/src/grid/cuda_boundaries.h +++ b/src/grid/cuda_boundaries.h @@ -16,8 +16,8 @@ void SetGhostCells(Real* c_head, int nx, int ny, int nz, int n_fields, int n_cel int jsize, int ksize, int imin, int jmin, int kmin, int dir); void Wind_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, - int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, - Real t); + int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t, + Real velocity_x_cloud_avg, Real density_wind_init); void Noh_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t); diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index ffb446665..215a053ab 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -38,10 +38,18 @@ #include "../cooling/cooling_cuda.h" // provides Cooling_Update #endif +#ifdef CLOUD_TRACKING + #include "../cloud_tracking/cloud_tracking.h" +#endif + #ifdef DUST #include "../dust/dust_cuda.h" // provides Dust_Update #endif +#ifdef OUTFLOW_ANALYSIS + #include "../analysis/outflow_analysis.h" // provides Dust_Update +#endif + /*! \fn Grid3D(void) * \brief Constructor for the Grid. */ Grid3D::Grid3D(void) @@ -157,8 +165,16 @@ void Grid3D::Initialize(struct Parameters *P) C_cfl = 0.3; #ifdef AVERAGE_SLOW_CELLS - H.min_dt_slow = 1e-100; // Initialize the minumum dt to a tiny number -#endif // AVERAGE_SLOW_CELLS + H.min_dt_slow = 0.005; // Initialize the minumum dt to a tiny number +#endif // AVERAGE_SLOW_CELLS + +#ifdef CLOUD_TRACKING + H.density_wind_init = P->density_wind_init; +#endif + +#if defined(CLOUD_TRACKING) || defined(OUTFLOW_ANALYSIS) + H.density_cloud_init = P->density_cloud_init; +#endif #ifndef MPI_CHOLLA @@ -523,8 +539,74 @@ Real Grid3D::Update_Hydro_Grid() #ifdef DUST // ==Apply dust from dust/dust_cuda.h== - Dust_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama, H.grain_radius); -#endif // DUST + Real mass_mixed, mass_hot; + Real mass_mixed_tot, mass_hot_tot; + Dust_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, H.dy, H.dz, H.dt, gama, H.grain_radius, + &mass_mixed, &mass_hot); + #ifdef MPI_CHOLLA + MPI_Barrier(world); + MPI_Allreduce(&mass_mixed, &mass_mixed_tot, 1, MPI_CHREAL, MPI_SUM, world); + MPI_Allreduce(&mass_hot, &mass_hot_tot, 1, MPI_CHREAL, MPI_SUM, world); + #endif // MPI_CHOLLA + chprintf("** Mixed sputtered mass: %e Hot sputtered mass: %e \n", mass_mixed_tot, mass_hot_tot); +#endif // DUST + +#ifdef CLOUD_TRACKING + Real mass_cloud_tracked, integrand_cloud, velocity_x_cloud_avg, mass_cloud_tot; + // Do the grid-wide reduction to get the sum of rho*vx*V and the total mass for the entire cloud + Cloud_Velocity_Reduction(C.device, H.nx, H.ny, H.nz, H.dx, H.dy, H.dz, H.n_ghost, H.n_fields, H.density_cloud_init, + H.density_wind_init, &mass_cloud_tracked, &integrand_cloud); + + #ifdef MPI_CHOLLA + + Real integrand_reduced; + Real mass_reduced; + + MPI_Allreduce(&integrand_cloud, &integrand_reduced, 1, MPI_CHREAL, MPI_SUM, world); + MPI_Allreduce(&mass_cloud_tracked, &mass_reduced, 1, MPI_CHREAL, MPI_SUM, world); + + #endif // MPI_CHOLLA + + // Calculate the mass-averaged x-velocity (Shin et al. (2008) eq. 9) + if ((integrand_reduced == 0) or (mass_reduced == 0)) { + velocity_x_cloud_avg = 0; + } else { + velocity_x_cloud_avg = integrand_reduced / mass_reduced; + } + + // Update the cumulative reference frame shift + H.velocity_x_cloud_avg += velocity_x_cloud_avg; + + // printf("cumulative velocity: %e\n", H.velocity_x_cloud_avg); + + chprintf("Average cloud velocity = %e km/s\n", velocity_x_cloud_avg * KPC / TIME_UNIT); + chprintf("Mass = %e M_sun\n", mass_reduced); + + #ifdef MPI_CHOLLA + MPI_Barrier(world); + #endif + // Subtract this timestep's reference frame shift off from the entire grid + Update_Grid_Frame(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, velocity_x_cloud_avg); + +#endif // CLOUD_TRACKING + +#ifdef DUST + #ifdef OUTFLOW_ANALYSIS + Real mass_cloud, mass_dust = 0; + + Outflow_Analysis(C.device, H.nx, H.ny, H.nz, H.dx, H.dy, H.dz, H.n_ghost, H.n_fields, &mass_cloud, &mass_dust, + H.density_cloud_init); + + #ifdef MPI_CHOLLA + MPI_Barrier(world); + Real arr_unreduced[2] = {mass_cloud, mass_dust}; + Real arr_reduced[2]; + MPI_Allreduce(&arr_unreduced, &arr_reduced, 2, MPI_CHREAL, MPI_SUM, world); + #endif // MPI_CHOLLA + + chprintf("@@ Cloud mass: %e Dust mass: %e \n", arr_reduced[0], arr_reduced[1]); + #endif // OUTFLOW_ANALYSIS +#endif // DUST #ifdef CHEMISTRY_GPU // Update the H and He ionization fractions and apply cooling and photoheating @@ -533,6 +615,7 @@ Real Grid3D::Update_Hydro_Grid() Timer.Chemistry.RecordTime(Chem.H.runtime_chemistry_step); non_hydro_elapsed_time += Chem.H.runtime_chemistry_step; #endif + C.HI_density = &C.host[H.n_cells * grid_enum::HI_density]; C.HII_density = &C.host[H.n_cells * grid_enum::HII_density]; C.HeI_density = &C.host[H.n_cells * grid_enum::HeI_density]; diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index e248f6490..4ca127180 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -214,6 +214,14 @@ struct Header { Real min_dt_slow; #endif +#ifdef CLOUD_TRACKING + Real density_wind_init; + Real velocity_x_cloud_avg = 0; +#endif // CLOUD_TRACKING + +#if defined(CLOUD_TRACKING) || defined(OUTFLOW_ANALYSIS) + Real density_cloud_init; +#endif /*! \var t_wall * \brief Wall time */ Real t_wall; @@ -518,6 +526,10 @@ class Grid3D * \brief Write xy, xz, and yz slices of all data to a file. */ void Write_Slices_HDF5(hid_t file_id); + /*! \fn void Write_Edges_HDF5(hid_t file_id) + * \brief Write the simulation boundaries. */ + void Write_Edges_HDF5(hid_t file_id); + #endif /*! \fn void Read_Grid(struct Parameters P) @@ -681,7 +693,7 @@ class Grid3D * gravitational collapse */ void Spherical_Overdensity_3D(); - void Clouds(); + void Clouds(struct Parameters P); void Uniform_Grid(); diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index af558be8f..d6da8df19 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -68,7 +68,7 @@ void Grid3D::Set_Initial_Conditions(Parameters P) } else if (strcmp(P.init, "Spherical_Overdensity_3D") == 0) { Spherical_Overdensity_3D(); } else if (strcmp(P.init, "Clouds") == 0) { - Clouds(); + Clouds(P); } else if (strcmp(P.init, "Read_Grid") == 0) { #ifndef ONLY_PARTICLES Read_Grid(P); @@ -1310,7 +1310,7 @@ void Grid3D::Spherical_Overdensity_3D() /*! \fn void Clouds() * \brief Bunch of clouds. */ -void Grid3D::Clouds() +void Grid3D::Clouds(struct Parameters P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -1324,7 +1324,7 @@ void Grid3D::Clouds() Real p_bg, p_cl; // background and cloud pressure Real mu = 0.6; // mean atomic weight int N_cl = 1; // number of clouds - Real R_cl = 2.5; // cloud radius in code units (kpc) + Real R_cl = 0.1; // cloud radius in code units (kpc) Real cl_pos[N_cl][3]; // array of cloud positions Real r; @@ -1339,22 +1339,28 @@ void Grid3D::Clouds() // single centered cloud setup for (int nn = 0; nn < N_cl; nn++) { - cl_pos[nn][0] = 0.5 * H.xdglobal; + cl_pos[nn][0] = 0.0375 * H.xdglobal; cl_pos[nn][1] = 0.5 * H.ydglobal; cl_pos[nn][2] = 0.5 * H.zdglobal; printf("Cloud positions: %f %f %f\n", cl_pos[nn][0], cl_pos[nn][1], cl_pos[nn][2]); } - n_bg = 1.68e-4; - n_cl = 5.4e-2; + n_bg = 1.0e-2; + n_cl = 10; rho_bg = n_bg * mu * MP / DENSITY_UNIT; rho_cl = n_cl * mu * MP / DENSITY_UNIT; - vx_bg = 0.0; +#ifdef CLOUD_TRACKING + rho_cl = P.density_cloud_init / DENSITY_UNIT; + rho_bg = P.density_wind_init / DENSITY_UNIT; + printf("Cloud initial density: %e\n", P.density_cloud_init); + printf("Wind initial density: %e\n", P.density_wind_init); +#endif + vx_bg = 1000 * TIME_UNIT / KPC; // vx_c = -200*TIME_UNIT/KPC; // convert from km/s to kpc/kyr - vx_cl = 0.0; + vx_cl = 0 * TIME_UNIT / KPC; vy_bg = vy_cl = 0.0; vz_bg = vz_cl = 0.0; - T_bg = 3e6; + T_bg = 3e7; T_cl = 1e4; p_bg = n_bg * KB * T_bg / PRESSURE_UNIT; p_cl = p_bg; @@ -1399,7 +1405,7 @@ void Grid3D::Clouds() #ifdef DUST C.host[id + H.n_cells * grid_enum::dust_density] = 0.0; #endif -#endif +#endif // SCALAR // add clouds for (int nn = 0; nn < N_cl; nn++) { r = sqrt((x_pos - cl_pos[nn][0]) * (x_pos - cl_pos[nn][0]) + @@ -1417,6 +1423,9 @@ void Grid3D::Clouds() #ifdef SCALAR #ifdef DUST C.host[id + H.n_cells * grid_enum::dust_density] = rho_cl * 1e-2; + if (isnan(C.host[id + H.n_cells * grid_enum::dust_density])) { + printf("there's a nan in IC %d %d %d\n", i, j, k); + } #endif // DUST #endif // SCALAR } @@ -1482,18 +1491,18 @@ void Grid3D::Zeldovich_Pancake(struct Parameters P) Real H0, h, Omega_M, rho_0, G, z_zeldovich, z_init, x_center, T_init, k_x; chprintf("Setting Zeldovich Pancake initial conditions...\n"); - H0 = P.H0; - h = H0 / 100; + H0 = P.H0; + h = H0 / 100; Omega_M = P.Omega_M; chprintf(" h = %f \n", h); chprintf(" Omega_M = %f \n", Omega_M); H0 /= 1000; //[km/s / kpc] - G = G_COSMO; - rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; + G = G_COSMO; + rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; z_zeldovich = 1; - z_init = P.Init_redshift; + z_init = P.Init_redshift; chprintf(" rho_0 = %f \n", rho_0); chprintf(" z_init = %f \n", z_init); chprintf(" z_zeldovich = %f \n", z_zeldovich); @@ -1553,17 +1562,17 @@ void Grid3D::Zeldovich_Pancake(struct Parameters P) index = (int(x_pos / H.dx) + 0) % 256; // index = ( index + 16 ) % 256; dens = ics_values[0 * nPoints + index]; - vel = ics_values[1 * nPoints + index]; - E = ics_values[2 * nPoints + index]; - U = ics_values[3 * nPoints + index]; + vel = ics_values[1 * nPoints + index]; + E = ics_values[2 * nPoints + index]; + U = ics_values[3 * nPoints + index]; // // // chprintf( "%f \n", vel ); - C.density[id] = dens; + C.density[id] = dens; C.momentum_x[id] = dens * vel; C.momentum_y[id] = 0; C.momentum_z[id] = 0; - C.Energy[id] = E; + C.Energy[id] = E; #ifdef DE C.GasEnergy[id] = U; diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 1d30cc0e2..96f2f6a5d 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -122,7 +122,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #if defined(GRAVITY) dev_grav_potential = d_grav_potential; #else // not GRAVITY - dev_grav_potential = NULL; + dev_grav_potential = NULL; #endif // GRAVITY // If memory is single allocated: memory_allocated becomes true and diff --git a/src/io/io.cpp b/src/io/io.cpp index 3408bbce7..e4d9a4769 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -155,6 +155,12 @@ void Write_Data(Grid3D &G, struct Parameters P, int nfile) } #endif /*SLICES*/ +#ifdef OUTFLOW_ANALYSIS + if (nfile % P.n_slice == 0) { + Output_Edges(G, P, nfile); + } +#endif /*OUTFLOW_ANALYSIS*/ + #ifdef PARTICLES if (nfile % P.n_particle == 0) { G.WriteData_Particles(P, nfile); @@ -552,6 +558,50 @@ void Output_Slices(Grid3D &G, struct Parameters P, int nfile) #endif // HDF5 } +/* Output xy, xz, and yz slices of the grid data. */ +void Output_Edges(Grid3D &G, struct Parameters P, int nfile) +{ +#ifdef HDF5 + hid_t file_id; + herr_t status; + + // create the filename + std::string filename(P.outdir); + filename += std::to_string(nfile); + filename += "_edges.h5"; + + #ifdef MPI_CHOLLA + filename += "." + std::to_string(procID); + #endif /*MPI_CHOLLA*/ + + // Create a new file + file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + // Write header (file attributes) + G.Write_Header_HDF5(file_id); + + // Write slices of all variables to the output file + G.Write_Edges_HDF5(file_id); + + // Close the file + status = H5Fclose(file_id); + + #ifdef MPI_CHOLLA + if (status < 0) { + printf("Output_Edges: File write failed. ProcID: %d\n", procID); + chexit(-1); + } + #else // MPI_CHOLLA is not defined + if (status < 0) { + printf("Output_Edges: File write failed.\n"); + exit(-1); + } + #endif // MPI_CHOLLA +#else // HDF5 is not defined + printf("Output_Edges only defined for hdf5 writes.\n"); +#endif // HDF5 +} + /*! \fn void Write_Header_Text(FILE *fp) * \brief Write some relevant header info to a text output file. */ void Grid3D::Write_Header_Text(FILE *fp) @@ -1375,12 +1425,12 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) #ifdef OUTPUT_METALS output_metals = true; #else // not OUTPUT_METALS - output_metals = false; + output_metals = false; #endif // OUTPUT_METALS #ifdef OUTPUT_ELECTRONS output_electrons = true; #else // not OUTPUT_ELECTRONS - output_electrons = false; + output_electrons = false; #endif // OUTPUT_ELECTRONS #ifdef OUTPUT_FULL_IONIZATION output_full_ionization = true; @@ -1570,7 +1620,7 @@ void Grid3D::Write_Projection_HDF5(hid_t file_id) Real const mx = C.momentum_x[id]; Real const my = C.momentum_y[id]; Real const mz = C.momentum_z[id]; - Real const E = C.Energy[id]; + Real const E = C.Energy[id]; #ifdef MHD auto const [magnetic_x, magnetic_y, magnetic_z] = @@ -1794,7 +1844,7 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) Real const mx = C.momentum_x[id]; Real const my = C.momentum_y[id]; Real const mz = C.momentum_z[id]; - Real const E = C.Energy[id]; + Real const E = C.Energy[id]; #ifdef MHD auto const [magnetic_x, magnetic_y, magnetic_z] = @@ -1858,7 +1908,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) Real *dataset_buffer_GE; #endif #ifdef SCALAR - Real *dataset_buffer_scalar; + #ifdef BASIC_SCALAR + Real *dataset_buffer_basic_scalar; + #endif + #ifdef DUST + Real *dataset_buffer_dust; + #endif #endif herr_t status; int xslice, yslice, zslice; @@ -1898,8 +1953,13 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_GE = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); #endif #ifdef SCALAR - dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.nx_real * H.ny_real * sizeof(Real)); - #endif + #ifdef BASIC_SCALAR + dataset_buffer_basic_scalar = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + #endif + #ifdef DUST + dataset_buffer_dust = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + #endif + #endif // Copy the xy slices to the memory buffers for (j = 0; j < H.ny_real; j++) { @@ -1940,9 +2000,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_GE[buf_id] = C.GasEnergy[id]; #endif #ifdef SCALAR - for (int ii = 0; ii < NSCALARS; ii++) { - dataset_buffer_scalar[buf_id + ii * H.nx * H.ny] = C.scalar[id + ii * H.n_cells]; - } + #ifdef BASIC_SCALAR + dataset_buffer_basic_scalar[buf_id] = C.basic_scalar[id]; + #endif + #ifdef DUST + dataset_buffer_dust[buf_id] = C.dust_density[id]; + #endif #endif #ifdef MPI_CHOLLA } @@ -1962,9 +2025,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_GE[buf_id] = 0; #endif #ifdef SCALAR - for (int ii = 0; ii < NSCALARS; ii++) { - dataset_buffer_scalar[buf_id + ii * H.nx * H.ny] = 0; - } + #ifdef BASIC_SCALAR + dataset_buffer_basic_scalar[buf_id] = 0; + #endif + #ifdef DUST + dataset_buffer_dust[buf_id] = 0; + #endif #endif } #endif // MPI_CHOLLA @@ -1986,7 +2052,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xy"); #endif #ifdef SCALAR - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_xy"); + #ifdef BASIC_SCALAR + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_basic_scalar, "/basic_scalar_xy"); + #endif + #ifdef DUST + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_dust, "/d_dust_xy"); + #endif #endif // Free the dataspace id status = H5Sclose(dataspace_id); @@ -2001,7 +2072,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) free(dataset_buffer_GE); #endif #ifdef SCALAR - free(dataset_buffer_scalar); + #ifdef BASIC_SCALAR + free(dataset_buffer_basic_scalar); + #endif + #ifdef DUST + free(dataset_buffer_dust); + #endif #endif // Create the xz data space for the datasets @@ -2024,7 +2100,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_GE = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); #endif #ifdef SCALAR - dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.nx_real * H.nz_real * sizeof(Real)); + #ifdef BASIC_SCALAR + dataset_buffer_basic_scalar = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #endif + #ifdef DUST + dataset_buffer_dust = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #endif #endif // Copy the xz slices to the memory buffers @@ -2066,9 +2147,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_GE[buf_id] = C.GasEnergy[id]; #endif #ifdef SCALAR - for (int ii = 0; ii < NSCALARS; ii++) { - dataset_buffer_scalar[buf_id + ii * H.nx * H.nz] = C.scalar[id + ii * H.n_cells]; - } + #ifdef BASIC_SCALAR + dataset_buffer_basic_scalar[buf_id] = C.basic_scalar[id]; + #endif + #ifdef DUST + dataset_buffer_dust[buf_id] = C.dust_density[id]; + #endif #endif #ifdef MPI_CHOLLA } @@ -2088,9 +2172,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_GE[buf_id] = 0; #endif #ifdef SCALAR - for (int ii = 0; ii < NSCALARS; ii++) { - dataset_buffer_scalar[buf_id + ii * H.nx * H.nz] = 0; - } + #ifdef BASIC_SCALAR + dataset_buffer_basic_scalar[buf_id] = 0; + #endif + #ifdef DUST + dataset_buffer_dust[buf_id] = 0; + #endif #endif } #endif // MPI_CHOLLA @@ -2112,7 +2199,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xz"); #endif #ifdef SCALAR - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_xz"); +#ifdef BASIC_SCALAR + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_basic_scalar, "/basic_scalar_xz"); + #endif + #ifdef DUST + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_dust, "/d_dust_xz"); + #endif #endif // Free the dataspace id @@ -2128,7 +2220,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) free(dataset_buffer_GE); #endif #ifdef SCALAR - free(dataset_buffer_scalar); + #ifdef BASIC_SCALAR + free(dataset_buffer_basic_scalar); + #endif + #ifdef DUST + free(dataset_buffer_dust); + #endif #endif // Create the yz data space for the datasets @@ -2151,7 +2248,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_GE = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); #endif #ifdef SCALAR - dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.ny_real * H.nz_real * sizeof(Real)); + #ifdef BASIC_SCALAR + dataset_buffer_basic_scalar = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + #endif + #ifdef DUST + dataset_buffer_dust = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + #endif #endif // Copy the yz slices to the memory buffers @@ -2192,9 +2294,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_GE[buf_id] = C.GasEnergy[id]; #endif #ifdef SCALAR - for (int ii = 0; ii < NSCALARS; ii++) { - dataset_buffer_scalar[buf_id + ii * H.ny * H.nz] = C.scalar[id + ii * H.n_cells]; - } + #ifdef DUST + dataset_buffer_dust[buf_id] = C.dust_density[id]; + #endif + #ifdef BASIC_SCALAR + dataset_buffer_basic_scalar[buf_id] = C.basic_scalar[id]; + #endif #endif #ifdef MPI_CHOLLA } @@ -2214,9 +2319,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_GE[buf_id] = 0; #endif #ifdef SCALAR - for (int ii = 0; ii < NSCALARS; ii++) { - dataset_buffer_scalar[buf_id + ii * H.ny * H.nz] = 0; - } + #ifdef BASIC_SCALAR + dataset_buffer_basic_scalar[buf_id] = 0; + #endif + #ifdef DUST + dataset_buffer_dust[buf_id] = 0; + #endif #endif } #endif // MPI_CHOLLA @@ -2238,7 +2346,12 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_yz"); #endif #ifdef SCALAR - status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_yz"); + #ifdef BASIC_SCALAR + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_basic_scalar, "/basic_scalar_yz"); + #endif + #ifdef DUST + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_dust, "/d_dust_yz"); + #endif #endif // Free the dataspace id @@ -2254,74 +2367,668 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) free(dataset_buffer_GE); #endif #ifdef SCALAR - free(dataset_buffer_scalar); + #ifdef BASIC_SCALAR + free(dataset_buffer_basic_scalar); + #endif + #ifdef DUST + free(dataset_buffer_dust); + #endif #endif } else { printf("Slice write only works for 3D data.\n"); } } -#endif // HDF5 -/*! \fn void Read_Grid(struct Parameters P) - * \brief Read in grid data from an output file. */ -void Grid3D::Read_Grid(struct Parameters P) +void Grid3D::Write_Edges_HDF5(hid_t file_id) { - ScopedTimer timer("Read_Grid"); - int nfile = P.nfile; // output step you want to read from - - // create the filename to read from - // assumes your data is in the outdir specified in the input file - // strcpy(filename, P.outdir); - // Changed to read initial conditions from indir - std::string filename(P.indir); - filename += std::to_string(P.nfile); - char sbuffer[1024]; - -#if defined BINARY - filename += ".bin"; -#elif defined HDF5 - filename += ".h5"; -#endif // BINARY or HDF5 -// for now assumes you will run on the same number of processors -#ifdef MPI_CHOLLA - #ifdef TILED_INITIAL_CONDITIONS - sprintf(sbuffer, "%sics_%dMpc_%d.h5", P.indir, (int)P.tile_length / 1000, - H.nx_real); // Everyone reads the same file - filename = sbuffer; - #else // TILED_INITIAL_CONDITIONS is not defined - filename += "." + std::to_string(procID); - #endif // TILED_INITIAL_CONDITIONS -#endif // MPI_CHOLLA - -#if defined BINARY - FILE *fp; - // open the file - fp = fopen(filename, "r"); - if (!fp) { - printf("Unable to open input file.\n"); - exit(0); - } - - // read in grid data - Read_Grid_Binary(fp); - - // close the file - fclose(fp); - -#elif defined HDF5 - hid_t file_id; + int i, j, k, id, buf_id; + hid_t dataset_id, dataspace_id; + Real *dataset_buffer_d; + Real *dataset_buffer_mx; + Real *dataset_buffer_my; + Real *dataset_buffer_mz; + Real *dataset_buffer_E; + #ifdef DE + Real *dataset_buffer_GE; + #endif + #ifdef SCALAR + Real *dataset_buffer_scalar; + #endif herr_t status; + int minus_xslice, minus_yslice, minus_zslice, plus_xslice, plus_yslice, plus_zslice; + minus_xslice = 0; + minus_yslice = 0; + minus_zslice = 0; + plus_xslice = H.nx; + plus_yslice = H.ny; + plus_zslice = H.nz; + #ifdef MPI_CHOLLA + minus_xslice = 0; + minus_yslice = 0; + minus_zslice = 0; + plus_xslice = nx_global - 1; + plus_yslice = ny_global - 1; + plus_zslice = nz_global - 1; + #endif + // 3D + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + int nx_dset = H.nx_real; + int ny_dset = H.ny_real; + int nz_dset = H.nz_real; + hsize_t dims[2]; + //////////////////////////////////////////////////////////// + // Create the -xy data space for the datasets + dims[0] = nx_dset; + dims[1] = ny_dset; + dataspace_id = H5Screate_simple(2, dims, NULL); - // open the file - file_id = H5Fopen(filename.data(), H5F_ACC_RDONLY, H5P_DEFAULT); - if (file_id < 0) { - std::cout << "Unable to open input file: " << filename << std::endl; - exit(0); - } - - // read in grid data - Read_Grid_HDF5(file_id, P); + // Allocate memory for the -xy slices + dataset_buffer_d = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_mx = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_my = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_mz = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_E = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + #ifdef DE + dataset_buffer_GE = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + #endif + #ifdef SCALAR + dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.nx_real * H.ny_real * sizeof(Real)); + #endif + // Copy the -xy slices to the memory buffers + for (j = 0; j < H.ny_real; j++) { + for (i = 0; i < H.nx_real; i++) { + id = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, minus_zslice, H.nx, H.ny); + buf_id = j + i * H.ny_real; + #ifdef MPI_CHOLLA + // When there are multiple processes, check whether this slice is in + // your domain + if (minus_zslice >= nz_local_start && minus_zslice < nz_local_start + nz_local) { + id = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, minus_zslice - nz_local_start + H.n_ghost, + H.nx, H.ny); + #endif // MPI_CHOLLA + dataset_buffer_d[buf_id] = C.density[id]; + dataset_buffer_mx[buf_id] = C.momentum_x[id]; + dataset_buffer_my[buf_id] = C.momentum_y[id]; + dataset_buffer_mz[buf_id] = C.momentum_z[id]; + dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.nx * H.ny] = C.scalar[id + ii * H.n_cells]; + } + #endif + #ifdef MPI_CHOLLA + } + // if the slice isn't in your domain, just write out zeros + else { + dataset_buffer_d[buf_id] = 0; + dataset_buffer_mx[buf_id] = 0; + dataset_buffer_my[buf_id] = 0; + dataset_buffer_mz[buf_id] = 0; + dataset_buffer_E[buf_id] = 0; + #ifdef DE + dataset_buffer_GE[buf_id] = 0; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.nx * H.ny] = 0; + } + #endif + } + #endif // MPI_CHOLLA + } + } + // Write out the xy datasets for each variable + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_minus_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_minus_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_minus_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_minus_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_minus_xy"); + #ifdef DE + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_minus_xy"); + #endif + #ifdef SCALAR + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_minus_xy"); + #endif + // Free the dataspace id + status = H5Sclose(dataspace_id); + + // free the dataset buffers + free(dataset_buffer_d); + free(dataset_buffer_mx); + free(dataset_buffer_my); + free(dataset_buffer_mz); + free(dataset_buffer_E); + #ifdef DE + free(dataset_buffer_GE); + #endif + #ifdef SCALAR + free(dataset_buffer_scalar); + #endif + //////////////////////////////////////////////////////////// + // Create the +xy data space for the datasets + dims[0] = nx_dset; + dims[1] = ny_dset; + dataspace_id = H5Screate_simple(2, dims, NULL); + + // Allocate memory for the +xy slices + dataset_buffer_d = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_mx = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_my = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_mz = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_E = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + #ifdef DE + dataset_buffer_GE = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + #endif + #ifdef SCALAR + dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.nx_real * H.ny_real * sizeof(Real)); + #endif + // Copy the -xy slices to the memory buffers + for (j = 0; j < H.ny_real; j++) { + for (i = 0; i < H.nx_real; i++) { + id = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, plus_zslice, H.nx, H.ny); + buf_id = j + i * H.ny_real; + #ifdef MPI_CHOLLA + // When there are multiple processes, check whether this slice is in + // your domain + if (plus_zslice >= nz_local_start && plus_zslice < nz_local_start + nz_local) { + id = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, plus_zslice - nz_local_start + H.n_ghost, + H.nx, H.ny); + #endif // MPI_CHOLLA + dataset_buffer_d[buf_id] = C.density[id]; + dataset_buffer_mx[buf_id] = C.momentum_x[id]; + dataset_buffer_my[buf_id] = C.momentum_y[id]; + dataset_buffer_mz[buf_id] = C.momentum_z[id]; + dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.nx * H.ny] = C.scalar[id + ii * H.n_cells]; + } + #endif + #ifdef MPI_CHOLLA + } + // if the slice isn't in your domain, just write out zeros + else { + dataset_buffer_d[buf_id] = 0; + dataset_buffer_mx[buf_id] = 0; + dataset_buffer_my[buf_id] = 0; + dataset_buffer_mz[buf_id] = 0; + dataset_buffer_E[buf_id] = 0; + #ifdef DE + dataset_buffer_GE[buf_id] = 0; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.nx * H.ny] = 0; + } + #endif + } + #endif // MPI_CHOLLA + } + } + // Write out the xy datasets for each variable + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_plus_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_plus_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_plus_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_plus_xy"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_plus_xy"); + #ifdef DE + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_plus_xy"); + #endif + #ifdef SCALAR + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_plus_xy"); + #endif + // Free the dataspace id + status = H5Sclose(dataspace_id); + + // free the dataset buffers + free(dataset_buffer_d); + free(dataset_buffer_mx); + free(dataset_buffer_my); + free(dataset_buffer_mz); + free(dataset_buffer_E); + #ifdef DE + free(dataset_buffer_GE); + #endif + #ifdef SCALAR + free(dataset_buffer_scalar); + #endif + //////////////////////////////////////////////////////////// + + // Create the -xz data space for the datasets + dims[0] = nx_dset; + dims[1] = nz_dset; + dataspace_id = H5Screate_simple(2, dims, NULL); + + // allocate the memory for the -xz slices + dataset_buffer_d = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + dataset_buffer_mx = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + dataset_buffer_my = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + dataset_buffer_mz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + dataset_buffer_E = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #ifdef DE + dataset_buffer_GE = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #endif + #ifdef SCALAR + dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.nx_real * H.nz_real * sizeof(Real)); + #endif + + // Copy the xz slices to the memory buffers + for (k = 0; k < H.nz_real; k++) { + for (i = 0; i < H.nx_real; i++) { + id = cuda_utilities::compute1DIndex(i + H.n_ghost, minus_yslice, k + H.n_ghost, H.nx, H.ny); + buf_id = k + i * H.nz_real; + #ifdef MPI_CHOLLA + // When there are multiple processes, check whether this slice is in + // your domain + if (minus_yslice >= ny_local_start && minus_yslice < ny_local_start + ny_local) { + id = cuda_utilities::compute1DIndex(i + H.n_ghost, minus_yslice - ny_local_start + H.n_ghost, k + H.n_ghost, + H.nx, H.ny); + #endif // MPI_CHOLLA + dataset_buffer_d[buf_id] = C.density[id]; + dataset_buffer_mx[buf_id] = C.momentum_x[id]; + dataset_buffer_my[buf_id] = C.momentum_y[id]; + dataset_buffer_mz[buf_id] = C.momentum_z[id]; + dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef DE + dataset_buffer_GE[buf_id] = C.GasEnergy[id]; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.nx * H.nz] = C.scalar[id + ii * H.n_cells]; + } + #endif + #ifdef MPI_CHOLLA + } + // if the slice isn't in your domain, just write out zeros + else { + dataset_buffer_d[buf_id] = 0; + dataset_buffer_mx[buf_id] = 0; + dataset_buffer_my[buf_id] = 0; + dataset_buffer_mz[buf_id] = 0; + dataset_buffer_E[buf_id] = 0; + #ifdef DE + dataset_buffer_GE[buf_id] = 0; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.nx * H.nz] = 0; + } + #endif + } + #endif // MPI_CHOLLA + } + } + // Write out the xz datasets for each variable + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_minus_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_minus_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_minus_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_minus_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_minus_xz"); + #ifdef DE + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_minus_xz"); + #endif + #ifdef SCALAR + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_minus_xz"); + #endif + + // Free the dataspace id + status = H5Sclose(dataspace_id); + + // free the dataset buffers + free(dataset_buffer_d); + free(dataset_buffer_mx); + free(dataset_buffer_my); + free(dataset_buffer_mz); + free(dataset_buffer_E); + #ifdef DE + free(dataset_buffer_GE); + #endif + #ifdef SCALAR + free(dataset_buffer_scalar); + #endif + //////////////////////////////////////////////////////////// + // Create the +xz data space for the datasets + dims[0] = nx_dset; + dims[1] = nz_dset; + dataspace_id = H5Screate_simple(2, dims, NULL); + + // allocate the memory for the +xz slices + dataset_buffer_d = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + dataset_buffer_mx = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + dataset_buffer_my = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + dataset_buffer_mz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + dataset_buffer_E = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #ifdef DE + dataset_buffer_GE = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #endif + #ifdef SCALAR + dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.nx_real * H.nz_real * sizeof(Real)); + #endif + + // Copy the xz slices to the memory buffers + for (k = 0; k < H.nz_real; k++) { + for (i = 0; i < H.nx_real; i++) { + id = cuda_utilities::compute1DIndex(i + H.n_ghost, plus_yslice, k + H.n_ghost, H.nx, H.ny); + buf_id = k + i * H.nz_real; + #ifdef MPI_CHOLLA + // When there are multiple processes, check whether this slice is in + // your domain + if (plus_yslice >= ny_local_start && plus_yslice < ny_local_start + ny_local) { + id = cuda_utilities::compute1DIndex(i + H.n_ghost, plus_yslice - ny_local_start + H.n_ghost, k + H.n_ghost, + H.nx, H.ny); + #endif // MPI_CHOLLA + dataset_buffer_d[buf_id] = C.density[id]; + dataset_buffer_mx[buf_id] = C.momentum_x[id]; + dataset_buffer_my[buf_id] = C.momentum_y[id]; + dataset_buffer_mz[buf_id] = C.momentum_z[id]; + dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef DE + dataset_buffer_GE[buf_id] = C.GasEnergy[id]; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.nx * H.nz] = C.scalar[id + ii * H.n_cells]; + } + #endif + #ifdef MPI_CHOLLA + } + // if the slice isn't in your domain, just write out zeros + else { + dataset_buffer_d[buf_id] = 0; + dataset_buffer_mx[buf_id] = 0; + dataset_buffer_my[buf_id] = 0; + dataset_buffer_mz[buf_id] = 0; + dataset_buffer_E[buf_id] = 0; + #ifdef DE + dataset_buffer_GE[buf_id] = 0; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.nx * H.nz] = 0; + } + #endif + } + #endif // MPI_CHOLLA + } + } + // Write out the xz datasets for each variable + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_plus_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_plus_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_plus_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_plus_xz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_plus_xz"); + #ifdef DE + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_plus_xz"); + #endif + #ifdef SCALAR + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_plus_xz"); + #endif + + // Free the dataspace id + status = H5Sclose(dataspace_id); + + // free the dataset buffers + free(dataset_buffer_d); + free(dataset_buffer_mx); + free(dataset_buffer_my); + free(dataset_buffer_mz); + free(dataset_buffer_E); + #ifdef DE + free(dataset_buffer_GE); + #endif + #ifdef SCALAR + free(dataset_buffer_scalar); + #endif + //////////////////////////////////////////////////////////// + // Create the -yz data space for the datasets + dims[0] = ny_dset; + dims[1] = nz_dset; + dataspace_id = H5Screate_simple(2, dims, NULL); + + // allocate the memory for the -yz slices + dataset_buffer_d = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + dataset_buffer_mx = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + dataset_buffer_my = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + dataset_buffer_mz = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + dataset_buffer_E = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + #ifdef DE + dataset_buffer_GE = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + #endif + #ifdef SCALAR + dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.ny_real * H.nz_real * sizeof(Real)); + #endif + + // Copy the yz slices to the memory buffers + for (k = 0; k < H.nz_real; k++) { + for (j = 0; j < H.ny_real; j++) { + id = cuda_utilities::compute1DIndex(minus_xslice, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny); + buf_id = k + j * H.nz_real; + #ifdef MPI_CHOLLA + // When there are multiple processes, check whether this slice is in + // your domain + if (minus_xslice >= nx_local_start && minus_xslice < nx_local_start + nx_local) { + id = cuda_utilities::compute1DIndex(minus_xslice - nx_local_start, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny); + #endif // MPI_CHOLLA + dataset_buffer_d[buf_id] = C.density[id]; + dataset_buffer_mx[buf_id] = C.momentum_x[id]; + dataset_buffer_my[buf_id] = C.momentum_y[id]; + dataset_buffer_mz[buf_id] = C.momentum_z[id]; + dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef DE + dataset_buffer_GE[buf_id] = C.GasEnergy[id]; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.ny * H.nz] = C.scalar[id + ii * H.n_cells]; + } + #endif + #ifdef MPI_CHOLLA + } + // if the slice isn't in your domain, just write out zeros + else { + dataset_buffer_d[buf_id] = 0; + dataset_buffer_mx[buf_id] = 0; + dataset_buffer_my[buf_id] = 0; + dataset_buffer_mz[buf_id] = 0; + dataset_buffer_E[buf_id] = 0; + #ifdef DE + dataset_buffer_GE[buf_id] = 0; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.ny * H.nz] = 0; + } + #endif + } + #endif // MPI_CHOLLA + } + } + // Write out the yz datasets for each variable + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_minus_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_minus_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_minus_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_minus_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_minus_yz"); + #ifdef DE + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_minus_yz"); + #endif + #ifdef SCALAR + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_minus_yz"); + #endif + + // Free the dataspace id + status = H5Sclose(dataspace_id); + + // free the dataset buffers + free(dataset_buffer_d); + free(dataset_buffer_mx); + free(dataset_buffer_my); + free(dataset_buffer_mz); + free(dataset_buffer_E); + #ifdef DE + free(dataset_buffer_GE); + #endif + #ifdef SCALAR + free(dataset_buffer_scalar); + #endif + //////////////////////////////////////////////////////////// + // Create the +yz data space for the datasets + dims[0] = ny_dset; + dims[1] = nz_dset; + dataspace_id = H5Screate_simple(2, dims, NULL); + + // allocate the memory for the +yz slices + dataset_buffer_d = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + dataset_buffer_mx = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + dataset_buffer_my = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + dataset_buffer_mz = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + dataset_buffer_E = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + #ifdef DE + dataset_buffer_GE = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real)); + #endif + #ifdef SCALAR + dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.ny_real * H.nz_real * sizeof(Real)); + #endif + + // Copy the yz slices to the memory buffers + for (k = 0; k < H.nz_real; k++) { + for (j = 0; j < H.ny_real; j++) { + id = cuda_utilities::compute1DIndex(plus_xslice, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny); + buf_id = k + j * H.nz_real; + #ifdef MPI_CHOLLA + // When there are multiple processes, check whether this slice is in + // your domain + if (plus_xslice >= nx_local_start && plus_xslice < nx_local_start + nx_local) { + id = cuda_utilities::compute1DIndex(plus_xslice - nx_local_start, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny); + #endif // MPI_CHOLLA + dataset_buffer_d[buf_id] = C.density[id]; + dataset_buffer_mx[buf_id] = C.momentum_x[id]; + dataset_buffer_my[buf_id] = C.momentum_y[id]; + dataset_buffer_mz[buf_id] = C.momentum_z[id]; + dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef DE + dataset_buffer_GE[buf_id] = C.GasEnergy[id]; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.ny * H.nz] = C.scalar[id + ii * H.n_cells]; + } + #endif + #ifdef MPI_CHOLLA + } + // if the slice isn't in your domain, just write out zeros + else { + dataset_buffer_d[buf_id] = 0; + dataset_buffer_mx[buf_id] = 0; + dataset_buffer_my[buf_id] = 0; + dataset_buffer_mz[buf_id] = 0; + dataset_buffer_E[buf_id] = 0; + #ifdef DE + dataset_buffer_GE[buf_id] = 0; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.ny * H.nz] = 0; + } + #endif + } + #endif // MPI_CHOLLA + } + } + // Write out the yz datasets for each variable + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_plus_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_plus_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_plus_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_plus_yz"); + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_plus_yz"); + #ifdef DE + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_plus_yz"); + #endif + #ifdef SCALAR + status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_plus_yz"); + #endif + + // Free the dataspace id + status = H5Sclose(dataspace_id); + + // free the dataset buffers + free(dataset_buffer_d); + free(dataset_buffer_mx); + free(dataset_buffer_my); + free(dataset_buffer_mz); + free(dataset_buffer_E); + #ifdef DE + free(dataset_buffer_GE); + #endif + #ifdef SCALAR + free(dataset_buffer_scalar); + #endif + } else { + printf("Slice write only works for 3D data.\n"); + } +} +#endif // HDF5 + +/*! \fn void Read_Grid(struct Parameters P) + * \brief Read in grid data from an output file. */ +void Grid3D::Read_Grid(struct Parameters P) +{ + ScopedTimer timer("Read_Grid"); + int nfile = P.nfile; // output step you want to read from + + // create the filename to read from + // assumes your data is in the outdir specified in the input file + // strcpy(filename, P.outdir); + // Changed to read initial conditions from indir + std::string filename(P.indir); + filename += std::to_string(P.nfile); + char sbuffer[1024]; + +#if defined BINARY + filename += ".bin"; +#elif defined HDF5 + filename += ".h5"; +#endif // BINARY or HDF5 +// for now assumes you will run on the same number of processors +#ifdef MPI_CHOLLA + #ifdef TILED_INITIAL_CONDITIONS + sprintf(sbuffer, "%sics_%dMpc_%d.h5", P.indir, (int)P.tile_length / 1000, + H.nx_real); // Everyone reads the same file + filename = sbuffer; + #else // TILED_INITIAL_CONDITIONS is not defined + filename += "." + std::to_string(procID); + #endif // TILED_INITIAL_CONDITIONS +#endif // MPI_CHOLLA + +#if defined BINARY + FILE *fp; + // open the file + fp = fopen(filename, "r"); + if (!fp) { + printf("Unable to open input file.\n"); + exit(0); + } + + // read in grid data + Read_Grid_Binary(fp); + + // close the file + fclose(fp); + +#elif defined HDF5 + hid_t file_id; + herr_t status; + + // open the file + file_id = H5Fopen(filename.data(), H5F_ACC_RDONLY, H5P_DEFAULT); + if (file_id < 0) { + std::cout << "Unable to open input file: " << filename << std::endl; + exit(0); + } + + // read in grid data + Read_Grid_HDF5(file_id, P); // close the file status = H5Fclose(file_id); @@ -2635,6 +3342,9 @@ void Grid3D::Read_Grid_HDF5(hid_t file_id, struct Parameters P) // Free the dataset id status = H5Dclose(dataset_id); + #ifdef CLOUD_TRACKING + // H.velocity_x_cloud_avg = 1.606867e-01 / (KPC / TIME_UNIT); + #endif mean_l = 0; min_l = 1e65; max_l = -1; diff --git a/src/io/io.h b/src/io/io.h index d8f6ca8ca..952b32b39 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -25,6 +25,8 @@ void Output_Rotated_Projected_Data(Grid3D& G, struct Parameters P, int nfile); /* Output xy, xz, and yz slices of the grid data to file. */ void Output_Slices(Grid3D& G, struct Parameters P, int nfile); +void Output_Edges(Grid3D& G, struct Parameters P, int nfile); + /* MPI-safe printf routine */ int chprintf(const char* __restrict sdata, ...); diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index d68f9e0c4..54f805751 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -839,6 +839,21 @@ void Grid3D::Disk_3D(Parameters p) Real *rho_halo = (Real *)calloc(nr, sizeof(Real)); Real *r_halo = (Real *)calloc(nr, sizeof(Real)); +#ifdef SCALAR + #ifdef DUST + // set entire grid to zero for dust density + for (k = H.n_ghost; k < H.nz - H.n_ghost; k++) { + for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) { + for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) { + // get cell index + id = i + j * H.nx + k * H.nx * H.ny; + C.dust_density[id] = 0.0; + } + } + } + #endif +#endif + ////////////////////////////////////////////// ////////////////////////////////////////////// // Produce a look up table for a hydrostatic hot halo @@ -846,7 +861,6 @@ void Grid3D::Disk_3D(Parameters p) ////////////////////////////////////////////// Hydrostatic_Ray_Analytical_D3D(rho_halo, r_halo, hdp, dr, nr); chprintf("Hot halo lookup table generated...\n"); - ////////////////////////////////////////////// ////////////////////////////////////////////// // Add a disk component @@ -862,7 +876,7 @@ void Grid3D::Disk_3D(Parameters p) // get the centered x, y, and z positions k = H.n_ghost + H.ny; Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); - + // cylindrical radius r = sqrt(x_pos * x_pos + y_pos * y_pos); @@ -894,8 +908,13 @@ void Grid3D::Disk_3D(Parameters p) // store internal energy in Energy array C.Energy[id] = P / (gama - 1.0); - #ifdef BASIC_SCALAR - C.scalar[id] = 1.0 * C.density[id]; + #ifdef SCALAR + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 1.0 * C.density[id]; + #endif + #ifdef DUST + C.dust_density[id] = C.density[id] * 1e-2; + #endif #endif } } @@ -1050,10 +1069,12 @@ void Grid3D::Disk_3D(Parameters p) C.Energy[id] += P / (gama - 1.0); // add a passive scalar +#ifdef SCALAR #ifdef BASIC_SCALAR - c = fmax(C.scalar[id] / C.density[id], 0.1); - C.scalar[id] = c * C.density[id]; + c = fmax(C.basic_scalar[id] / C.density[id], 0.1); + C.basic_scalar[id] = c * C.density[id]; #endif +#endif } } } diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index 3743dd21a..f4c8e1b7a 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -445,12 +445,12 @@ void Particles3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct Para Real vy_max_g = vy_max; Real vz_max_g = vz_max; - Real px_min_g = px_min; - Real py_min_g = py_min; - Real pz_min_g = pz_min; - Real vx_min_g = vx_min; - Real vy_min_g = vy_min; - Real vz_min_g = vz_min; + Real px_min_g = px_min; + Real py_min_g = py_min; + Real pz_min_g = pz_min; + Real vx_min_g = vx_min; + Real vy_min_g = vy_min; + Real vz_min_g = vz_min; #endif // MPI_CHOLLA // Print initial Statistics @@ -563,7 +563,7 @@ void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) #ifdef MPI_CHOLLA N_particles_total = ReducePartIntSum(Particles.n_local); #else - N_particles_total = Particles.n_local; + N_particles_total = Particles.n_local; #endif // Print the total particles when saving the particles data diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp index 6417e4136..87a2be8e5 100644 --- a/src/particles/particles_3D.cpp +++ b/src/particles/particles_3D.cpp @@ -157,12 +157,12 @@ void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Re G.boundary_type_z0 = P->zlg_bcnd; G.boundary_type_z1 = P->zug_bcnd; #else - G.boundary_type_x0 = P->xl_bcnd; - G.boundary_type_x1 = P->xu_bcnd; - G.boundary_type_y0 = P->yl_bcnd; - G.boundary_type_y1 = P->yu_bcnd; - G.boundary_type_z0 = P->zl_bcnd; - G.boundary_type_z1 = P->zu_bcnd; + G.boundary_type_x0 = P->xl_bcnd; + G.boundary_type_x1 = P->xu_bcnd; + G.boundary_type_y0 = P->yl_bcnd; + G.boundary_type_y1 = P->yu_bcnd; + G.boundary_type_z0 = P->zl_bcnd; + G.boundary_type_z1 = P->zu_bcnd; #endif #ifdef PARTICLES_GPU @@ -211,7 +211,7 @@ void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Re #ifdef MPI_CHOLLA n_total_initial = ReducePartIntSum(n_local); #else - n_total_initial = n_local; + n_total_initial = n_local; #endif chprintf("Particles Initialized: \n n_local: %lu \n", n_local); diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index 27470befe..772153534 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -433,13 +433,13 @@ void Particles3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Real offset_extra += 1; pId = recv_buffer[offset_extra]; #else - pId = 0; + pId = 0; #endif #ifdef PARTICLE_AGE offset_extra += 1; pAge = recv_buffer[offset_extra]; #else - pAge = 0.0; + pAge = 0.0; #endif offset_buff += N_DATA_PER_PARTICLE_TRANSFER; diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index e8cfa0d09..4e7417268 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -119,7 +119,7 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef SCALAR diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index 9e9b11140..c1319ea58 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -139,7 +139,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) #ifdef MHD size_t const n_fields = 8; #else // not MHD - size_t const n_fields = 5; + size_t const n_fields = 5; #endif // MHD // Setup host grid. Fill host grid with random values and randomly assign maximum value diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index 2038f215a..be125cd0c 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -166,7 +166,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou dge = dev_conserved[(n_fields - 1) * n_cells + id]; p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); #endif // PRESSURE_DE p_i = fmax(p_i, (Real)TINY_NUMBER); #ifdef DE diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu index 6c4574951..3c749b23a 100644 --- a/src/reconstruction/reconstruction_tests.cu +++ b/src/reconstruction/reconstruction_tests.cu @@ -575,9 +575,13 @@ TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) { // Set up test and mock up grid #ifdef MHD - reconstruction::Primitive interface{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive interface { + 1, 2, 3, 4, 5, 6, 7, 8 + }; #else // MHD - reconstruction::Primitive interface{6, 7, 8, 9, 10}; + reconstruction::Primitive interface { + 6, 7, 8, 9, 10 + }; #endif // MHD size_t const nx = 3, ny = 3, nz = 3; size_t const n_cells = nx * ny * nz; diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index e40f1353d..7f21b0875 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -56,8 +56,8 @@ TEST_P(tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOu double const maxAllowedL1Error = 7.0E-3; double const maxAllowedError = 4.6E-2; #else - double const maxAllowedL1Error = 9.4E-5; - double const maxAllowedError = 6.4E-4; + double const maxAllowedL1Error = 9.4E-5; + double const maxAllowedError = 6.4E-4; #endif // MHD sodTest.numMpiRanks = GetParam(); diff --git a/src/utils/reduction_utilities.cu b/src/utils/reduction_utilities.cu index 6434f560b..349da4ee6 100644 --- a/src/utils/reduction_utilities.cu +++ b/src/utils/reduction_utilities.cu @@ -38,4 +38,18 @@ __global__ void kernelReduceMax(Real* in, Real* out, size_t N) gridReduceMax(maxVal, out); } // ===================================================================== + +__global__ void Kernel_Reduce_Add(Real* in, Real* out, size_t N) +{ + Real sum_stride = 0; + + // Each thread strides through the grid to perform a partial reduction + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { + sum_stride += in[i]; + } + __syncthreads(); + + // Perform grid-wide reduction + Grid_Reduce_Add(sum_stride, out); +} } // namespace reduction_utilities diff --git a/src/utils/reduction_utilities.h b/src/utils/reduction_utilities.h index 99191d8c5..982d8df9c 100644 --- a/src/utils/reduction_utilities.h +++ b/src/utils/reduction_utilities.h @@ -43,6 +43,14 @@ __inline__ __device__ Real warpReduceMax(Real val) } // ===================================================================== +__inline__ __device__ Real Warp_Reduce_Add(Real val) +{ + for (int offset = warpSize / 2; offset > 0; offset /= 2) { + val += __shfl_down(val, offset); + } + return val; +} + // ===================================================================== /*! * \brief Perform a reduction within the block to find the maximum value @@ -80,6 +88,33 @@ __inline__ __device__ Real blockReduceMax(Real val) } // ===================================================================== +__inline__ __device__ Real Block_Reduce_Add(Real val) +{ + // Shared memory for storing the results of each warp-wise partial + // reduction + __shared__ Real shared[::maxWarpsPerBlock]; + + int lane = threadIdx.x % warpSize; // thread ID within the warp, + int warpId = threadIdx.x / warpSize; // ID of the warp itself + + val = Warp_Reduce_Add(val); // Each warp performs partial reduction + + if (lane == 0) { + shared[warpId] = val; + } // Write reduced value to shared memory + + __syncthreads(); // Wait for all partial reductions + + // read from shared memory only if that warp existed + val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; + + if (warpId == 0) { + val = Warp_Reduce_Add(val); + } // Final reduce within first warp + + return val; +} + #ifndef O_HIP // ===================================================================== // This section handles the atomics. It is complicated because CUDA @@ -235,6 +270,28 @@ inline __device__ double atomicMinBits(double* address, double val) } // ===================================================================== +// ===================================================================== +inline __device__ float Atomic_Add_Bits(float* address, float val) +{ +#ifdef O_HIP + return atomicAdd(address, val); +#else // O_HIP + return atomicAdd(address, val); +#endif +} +// ===================================================================== + +// ===================================================================== +inline __device__ double Atomic_Add_Bits(double* address, double val) +{ +#ifdef O_HIP + return atomicAdd(address, val); +#else // O_HIP + return atomicAdd(address, val); +#endif +} +// ===================================================================== + // ===================================================================== /*! * \brief Perform a reduction within the grid to find the maximum value @@ -283,6 +340,19 @@ __inline__ __device__ void gridReduceMax(Real val, Real* out) } // ===================================================================== +// ===================================================================== +__inline__ __device__ void Grid_Reduce_Add(Real val, Real* out) +{ + // Reduce the entire block in parallel + val = Block_Reduce_Add(val); + + // Write block level reduced value to the output scalar atomically + if (threadIdx.x == 0) { + Atomic_Add_Bits(out, val); + } +} +// ===================================================================== + // ===================================================================== /*! * \brief Find the maximum value in the array. Make sure to initialize @@ -302,5 +372,7 @@ __inline__ __device__ void gridReduceMax(Real val, Real* out) * \param[in] N The size of the `in` array */ __global__ void kernelReduceMax(Real* in, Real* out, size_t N); + +__global__ void Kernel_Reduce_Add(Real* in, Real* out, size_t N); // ===================================================================== } // namespace reduction_utilities diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu index 528de22a7..cd1eb5379 100644 --- a/src/utils/reduction_utilities_tests.cu +++ b/src/utils/reduction_utilities_tests.cu @@ -35,8 +35,7 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) // ==================================== size_t const gridSize = 64; size_t const size = std::pow(gridSize, 3); - ; - Real const maxValue = 4; + Real const maxValue = 4; std::vector host_grid(size); // Fill grid with random values and assign maximum value @@ -68,3 +67,41 @@ TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) // ============================================================================= // Tests for divergence max reduction // ============================================================================= + +TEST(tALLKernelReduceSum, CorrectInputExpectCorrectOutput) +{ + // Launch parameters + // ================= + cuda_utilities::AutomaticLaunchParams static const launch_params(reduction_utilities::Kernel_Reduce_Add); + + // Grid Parameters & testing parameters + // ==================================== + size_t const size = std::pow(64, 3); + std::vector host_grid(size); // host copy of array to be summed + std::vector host_sum(1); // variable to store result of sum reduction + + host_sum[0] = 0.0; + // Fill grid with ones + for (Real& host_data : host_grid) { + host_data = 1; + } + // Allocating and copying to device + // ================================ + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + cuda_utilities::DeviceVector static dev_sum(1); + dev_sum.assign(0, 0); + + // Do the reduction + // ================ + // .data() passes the a pointer to the kernel + hipLaunchKernelGGL(reduction_utilities::Kernel_Reduce_Add, launch_params.numBlocks, launch_params.threadsPerBlock, 0, + 0, dev_grid.data(), dev_sum.data(), host_grid.size()); + cudaDeviceSynchronize(); + + dev_sum.cpyDeviceToHost(host_sum); + + // Perform comparison + testing_utilities::Check_Results(size, host_sum[0], "sum found"); +} \ No newline at end of file