diff --git a/builds/make.type.rot_proj b/builds/make.type.rot_proj new file mode 100644 index 000000000..e6faa7514 --- /dev/null +++ b/builds/make.type.rot_proj @@ -0,0 +1,31 @@ +#-- Default hydro only build with rotated projection + +DFLAGS += -DCUDA +DFLAGS += -DMPI_CHOLLA +DFLAGS += -DPRECISION=2 +DFLAGS += -DPPMC +DFLAGS += -DHLLC + +# Integrator +DFLAGS += -DSIMPLE +#DFLAGS += -DVL + +# Apply a density and temperature floor +DFLAGS += -DDENSITY_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR + +# Solve the Gas Internal Energy usisng a Dual Energy Formalism +#DFLAGS += -DDE + +# Apply cooling on the GPU from precomputed tables +#DFLAGS += -DCOOLING_GPU + +# Measure the Timing of the different stages +#DFLAGS += -DCPU_TIME + +# Select output format +# Can also add -DSLICES and -DPROJECTIONS +OUTPUT ?= -DOUTPUT -DHDF5 +DFLAGS += $(OUTPUT) + +DFLAGS += -DROTATED_PROJECTION \ No newline at end of file diff --git a/builds/make.type.static_grav b/builds/make.type.static_grav new file mode 100644 index 000000000..ffa15c4ee --- /dev/null +++ b/builds/make.type.static_grav @@ -0,0 +1,32 @@ +#-- Default hydro only build with static_grav + +DFLAGS += -DCUDA +DFLAGS += -DMPI_CHOLLA +DFLAGS += -DPRECISION=2 +DFLAGS += -DPPMC +DFLAGS += -DHLLC + +# Integrator +DFLAGS += -DSIMPLE +#DFLAGS += -DVL + +# Apply a density and temperature floor +DFLAGS += -DDENSITY_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR + +# Solve the Gas Internal Energy usisng a Dual Energy Formalism +#DFLAGS += -DDE + +DFLAGS += -DSTATIC_GRAV + +# Apply cooling on the GPU from precomputed tables +#DFLAGS += -DCOOLING_GPU + +# Measure the Timing of the different stages +#DFLAGS += -DCPU_TIME + +# Select output format +# Can also add -DSLICES and -DPROJECTIONS +OUTPUT ?= -DOUTPUT -DHDF5 +DFLAGS += $(OUTPUT) + diff --git a/examples/3D/float32_sound_wave.txt b/examples/3D/float32_sound_wave.txt new file mode 100644 index 000000000..68b3d4952 --- /dev/null +++ b/examples/3D/float32_sound_wave.txt @@ -0,0 +1,59 @@ +# +# Parameter File for sound wave test with float32 output +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=0.05 +# time interval for output +outstep=0.05 +# name of initial conditions +init=Sound_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=4.0 +ylen=4.0 +zlen=4.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +# Enable float32 output +# Enable float32 density field +n_out_float32=1 +out_float32_density=1 + +# Uncomment this to enable momentum_x +# out_float32_momentum_x=1 + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# amplitude of perturbing oscillations +A=1e-4 +# value of gamma +gamma=1.666666666666667 + diff --git a/src/global/global.cpp b/src/global/global.cpp index a99c1360e..1f6a5cbfa 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -223,6 +223,30 @@ void parse_param(char *name,char *value, struct parameters *parms){ parms->n_rotated_projection = atoi(value); else if (strcmp(name, "n_slice")==0) parms->n_slice = atoi(value); + else if (strcmp(name, "n_out_float32")==0) + parms->n_out_float32 = atoi(value); + else if (strcmp(name, "out_float32_density")==0) + parms->out_float32_density = atoi(value); + else if (strcmp(name, "out_float32_momentum_x")==0) + parms->out_float32_momentum_x = atoi(value); + else if (strcmp(name, "out_float32_momentum_y")==0) + parms->out_float32_momentum_y = atoi(value); + else if (strcmp(name, "out_float32_momentum_z")==0) + parms->out_float32_momentum_z = atoi(value); + else if (strcmp(name, "out_float32_Energy")==0) + parms->out_float32_Energy = atoi(value); +#ifdef DE + else if (strcmp(name, "out_float32_GasEnergy")==0) + parms->out_float32_GasEnergy = atoi(value); +#endif // DE +#ifdef MHD + else if (strcmp(name, "out_float32_magnetic_x")==0) + parms->out_float32_magnetic_x = atoi(value); + else if (strcmp(name, "out_float32_magnetic_y")==0) + parms->out_float32_magnetic_y = atoi(value); + else if (strcmp(name, "out_float32_magnetic_z")==0) + parms->out_float32_magnetic_z = atoi(value); +#endif // MHD else if (strcmp(name, "xmin")==0) parms->xmin = atof(value); else if (strcmp(name, "ymin")==0) @@ -366,7 +390,7 @@ void parse_param(char *name,char *value, struct parameters *parms){ #ifdef CHEMISTRY_GPU else if (strcmp(name, "UVB_rates_file")==0) strncpy (parms->UVB_rates_file, value, MAXLEN); -#endif +#endif #ifdef COOLING_GRACKLE else if (strcmp(name, "UVB_rates_file")==0) strncpy (parms->UVB_rates_file, value, MAXLEN); diff --git a/src/global/global.h b/src/global/global.h index 560ddb5f4..4e6d8eeb9 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -204,6 +204,20 @@ struct parameters int n_projection; int n_rotated_projection; int n_slice; + int n_out_float32=0; + int out_float32_density=0; + int out_float32_momentum_x=0; + int out_float32_momentum_y=0; + int out_float32_momentum_z=0; + int out_float32_Energy=0; +#ifdef DE + int out_float32_GasEnergy=0; +#endif +#ifdef MHD + int out_float32_magnetic_x=0; + int out_float32_magnetic_y=0; + int out_float32_magnetic_z=0; +#endif Real xmin; Real ymin; Real zmin; diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 7a5beca55..35c0c355f 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -92,17 +92,6 @@ inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) } } - - -/*! \fn Real minof3(Real a, Real b, Real c) - * \brief Returns the minimum of three floating point numbers. */ -__device__ inline Real minof3(Real a, Real b, Real c) -{ - return fmin(a, fmin(b,c)); -} - - - /*! \fn int sgn_CUDA * \brief Mathematical sign function. Returns sign of x. */ __device__ inline int sgn_CUDA(Real x) @@ -111,11 +100,6 @@ __device__ inline int sgn_CUDA(Real x) else return 1; } - -__global__ void test_function(); - - - #endif //GLOBAL_CUDA_H #endif //CUDA diff --git a/src/gravity/gravity_cuda.h b/src/gravity/gravity_cuda.h deleted file mode 100644 index b4d885262..000000000 --- a/src/gravity/gravity_cuda.h +++ /dev/null @@ -1,18 +0,0 @@ -/*! \file gravity_cuda.h - * \brief Declarations of functions used to calculate gravitational accelerations. */ - -#ifdef CUDA -#ifndef GRAVITY_CUDA_H -#define GRAVITY_CUDA_H - -#include "../global/global.h" - - -__device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx); - -__device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy); - -__device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz); - -#endif // GRAVITY_CUDA_H -#endif // CUDA diff --git a/src/gravity/gravity_cuda.cu b/src/gravity/static_grav.h similarity index 90% rename from src/gravity/gravity_cuda.cu rename to src/gravity/static_grav.h index 0137c44f1..3ddbb86be 100644 --- a/src/gravity/gravity_cuda.cu +++ b/src/gravity/static_grav.h @@ -4,17 +4,16 @@ functions in hydro_cuda.cu. */ #ifdef CUDA +#pragma once + #include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../gravity/gravity_cuda.h" +#include // provides sqrt log cos sin atan etc. +#include "../global/global.h" // provides GN etc. // Work around lack of pow(Real,int) in Hip Clang for Rocm 3.5 static inline __device__ Real pow2(const Real x) { return x*x; } -__device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx) +inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx) { Real x_pos, r_disk, r_halo; x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; @@ -52,7 +51,7 @@ __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, } -__device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy) +inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy) { Real x_pos, y_pos, r, phi; // use the subgrid offset and global boundaries to calculate absolute positions on the grid @@ -108,7 +107,7 @@ __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, R } -__device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz) +inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz) { Real x_pos, y_pos, z_pos, r_disk, r_halo; // use the subgrid offset and global boundaries to calculate absolute positions on the grid diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index bf385d25f..ee033e334 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -10,7 +10,7 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../hydro/hydro_cuda.h" -#include "../gravity/gravity_cuda.h" +#include "../gravity/static_grav.h" #include "../utils/hydro_utilities.h" #include "../utils/cuda_utilities.h" #include "../utils/reduction_utilities.h" diff --git a/src/io/io.cpp b/src/io/io.cpp index 46ee71916..be0a1b9fa 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -16,6 +16,7 @@ #include "../mpi/mpi_routines.h" #endif //MPI_CHOLLA #include "../utils/error_handling.h" +#include "../utils/DeviceVector.h" #ifdef COSMOLOGY #include "../cosmology/cosmology.h" @@ -72,7 +73,7 @@ void Write_Message_To_Log_File( const char* message ){ out_file.close(); } -/* Write the initial conditions */ +/* Write Cholla Output Data */ void WriteData(Grid3D &G, struct parameters P, int nfile) { @@ -109,6 +110,11 @@ void WriteData(Grid3D &G, struct parameters P, int nfile) if (nfile % P.n_hydro == 0) OutputData(G,P,nfile); #endif + // This function does other checks to make sure it is valid (3D only) + #ifdef HDF5 + if (P.n_out_float32 && nfile % P.n_out_float32 == 0) OutputFloat32(G,P,nfile); + #endif + #ifdef PROJECTION if (nfile % P.n_projection == 0) OutputProjectedData(G,P,nfile); #endif /*PROJECTION*/ @@ -227,6 +233,92 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) #endif } +void OutputFloat32(Grid3D &G, struct parameters P, int nfile) +{ + + Header H = G.H; + // Do nothing in 1-D and 2-D case + if (H.ny_real == 1) { + return; + } + if (H.nz_real == 1) { + return; + } + // Do nothing if nfile is not multiple of n_out_float32 + if (nfile % P.n_out_float32 != 0) { + return; + } + + char filename[MAXLEN]; + char timestep[20]; + + // create the filename + sprintf(timestep, "%d", nfile); + strcpy(filename, P.outdir); + strcat(filename, timestep); + strcat(filename, ".float32.h5"); + #ifdef MPI_CHOLLA + sprintf(filename,"%s.%d",filename,procID); + #endif + + // create hdf5 file + hid_t file_id; /* file identifier */ + herr_t status; + + // Create a new file using default properties. + file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + // Write the header (file attributes) + G.Write_Header_HDF5(file_id); + + // write the conserved variables to the output file + + // 3-D Case + if (H.nx>1 && H.ny>1 && H.nz>1) { + int nx_dset = H.nx_real; + int ny_dset = H.ny_real; + int nz_dset = H.nz_real; + size_t buffer_size; + // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes. + // If the buffer is too large, it does not cause bugs (Oct 6 2022) +#ifdef MHD + buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1); +#else + buffer_size = nx_dset*ny_dset*nz_dset; +#endif + + // Using static DeviceVector here automatically allocates the buffer the first time it is needed + // It persists until program exit, and then calls Free upon destruction + cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; + float* device_dataset_buffer = device_dataset_vector.data(); + float* dataset_buffer = (float *) malloc(buffer_size*sizeof(float)); + + if (P.out_float32_density > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_density, "/density"); + if (P.out_float32_momentum_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_x, "/momentum_x"); + if (P.out_float32_momentum_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_y, "/momentum_y"); + if (P.out_float32_momentum_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_z, "/momentum_z"); + if (P.out_float32_Energy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_Energy, "/Energy"); +#ifdef DE + if (P.out_float32_GasEnergy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_GasEnergy, "/GasEnergy"); +#endif //DE +#ifdef MHD + if (P.out_float32_magnetic_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x"); + if (P.out_float32_magnetic_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y"); + if (P.out_float32_magnetic_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); +#endif + + + free(dataset_buffer); + + if (status < 0) {printf("File write failed.\n"); exit(-1); } + } // 3-D case + + // close the file + status = H5Fclose(file_id); + + +} + /* Output a projection of the grid data to file. */ void OutputProjectedData(Grid3D &G, struct parameters P, int nfile) @@ -1093,6 +1185,82 @@ void Grid3D::Write_Grid_Binary(FILE *fp) #ifdef HDF5 + +// Helper function which uses the correct HDF5 arguments based on the type of dataset_buffer to avoid writing garbage +herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name) +{ + // Create a dataset id for density + hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + // Write the density array to file + herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Free the dataset id + status = H5Dclose(dataset_id); + return status; +} + +herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name) +{ + // Create a dataset id for density + hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + // Write the density array to file + herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); + // Free the dataset id + status = H5Dclose(dataset_id); + return status; +} + + +void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name) +{ + // Copy non-ghost source to Buffer + int id = H.n_ghost; + memcpy(&dataset_buffer[0], &(source[id]), H.nx_real*sizeof(Real)); + // Buffer write to HDF5 Dataset + herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); +} + +void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float* dataset_buffer, double* source, const char* name) +{ + // Copy non-ghost source to Buffer with conversion from double to float + int i; + for (i=0; i 1 this substitution can be attempted. + // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); + id = H.n_ghost; memcpy(&dataset_buffer[0], &(C.scalar[id+s*H.n_cells]), H.nx_real*sizeof(Real)); - - // Create a dataset id for the scalar - dataset_id = H5Dcreate(file_id, dataset, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the scalar array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); + // dataset here is just a name + status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); } + #endif //SCALAR #ifdef DE - // Copy the internal energy array to the memory buffer - id = H.n_ghost; - memcpy(&dataset_buffer[0], &(C.GasEnergy[id]), H.nx_real*sizeof(Real)); - - // Create a dataset id for internal energy - dataset_id = H5Dcreate(file_id, "/GasEnergy", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the internal energy array to file // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); + Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); #endif //DE // Free the dataspace id @@ -1263,85 +1373,11 @@ void Grid3D::Write_Grid_HDF5(hid_t file_id) dims[1] = ny_dset; dataspace_id = H5Screate_simple(2, dims, NULL); - // Copy the density array to the memory buffer - for (j=0; j 1 this substitution can be attempted. + // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); + // Copy the scalar array to the memory buffer for (j=0; j static device_dataset_vector{buffer_size}; + double* device_dataset_buffer = device_dataset_vector.data(); + dataset_buffer = (Real*) malloc(buffer_size*sizeof(Real)); + //CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double))); - // Create the data space for the datasets + + // Create the data space for the datasets (note: WriteHDF5Field3D creates its own dataspace, does not use the shared one) dims[0] = nx_dset; dims[1] = ny_dset; dims[2] = nz_dset; dataspace_id = H5Screate_simple(3, dims, NULL); - - // Copy the density array to the memory buffer - for (k=0; k device_buffer, then copy device_buffer -> buffer, then write HDF5 field +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* source, const char* name); +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* source, const char* name); +#endif diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu new file mode 100644 index 000000000..c6cab6e8a --- /dev/null +++ b/src/io/io_gpu.cu @@ -0,0 +1,110 @@ +// Require HDF5 +#ifdef HDF5 + +#include + +#include "../grid/grid3D.h" + +#include "../io/io.h" // To provide io.h with OutputViz3D + +// Note that the HDF5 file and buffer will have size nx_real * ny_real * nz_real whereas the conserved variables have size nx,ny,nz +// Note that magnetic fields add +1 to nx_real ny_real nz_real since an extra face needs to be output, but also has the same size nx ny nz +// For the magnetic field case, a different nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs. + +// Copy Real (non-ghost) cells from source to a double destination (for writing HDF5 in double precision) +__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, double* destination, Real* source) +{ + + int dest_id,source_id,id,i,j,k; + id = threadIdx.x + blockIdx.x * blockDim.x; + + k = id/(nx_real*ny_real); + j = (id - k*nx_real*ny_real)/nx_real; + i = id - j*nx_real - k*nx_real*ny_real; + + if (k >= nz_real) { + return; + } + + // This converts into HDF5 indexing that plays well with Python + dest_id = k + j*nz_real + i*ny_real*nz_real; + source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny; + + destination[dest_id] = (double) source[source_id]; +} + +// Copy Real (non-ghost) cells from source to a float destination (for writing HDF5 in float precision) +__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, float* destination, Real* source) +{ + + int dest_id,source_id,id,i,j,k; + id = threadIdx.x + blockIdx.x * blockDim.x; + + k = id/(nx_real*ny_real); + j = (id - k*nx_real*ny_real)/nx_real; + i = id - j*nx_real - k*nx_real*ny_real; + + if (k >= nz_real) { + return; + } + + // This converts into HDF5 indexing that plays well with Python + dest_id = k + j*nz_real + i*ny_real*nz_real; + source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny; + + destination[dest_id] = (float) source[source_id]; +} + +// When buffer is double, automatically use the double version of everything using function overloading +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* device_source, const char* name) +{ + herr_t status; + hsize_t dims[3]; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; + hid_t dataspace_id = H5Screate_simple(3, dims, NULL); + + //Copy non-ghost parts of source to buffer + dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); + CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(double), cudaMemcpyDeviceToHost)); + + // Write Buffer to HDF5 + status = HDF5_Dataset(file_id, dataspace_id, buffer, name); + + status = H5Sclose(dataspace_id); + if (status < 0) {printf("File write failed.\n");} + + +} + + +// When buffer is float, automatically use the float version of everything using function overloading +void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* device_source, const char* name) +{ + + herr_t status; + hsize_t dims[3]; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; + hid_t dataspace_id = H5Screate_simple(3, dims, NULL); + + //Copy non-ghost parts of source to buffer + dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); + CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(float), cudaMemcpyDeviceToHost)); + + // Write Buffer to HDF5 + status = HDF5_Dataset(file_id, dataspace_id, buffer, name); + + status = H5Sclose(dataspace_id); + if (status < 0) {printf("File write failed.\n");} + +} + + +#endif //HDF5