Skip to content

Commit

Permalink
Merge branch 'dev' into dual-energy-fix
Browse files Browse the repository at this point in the history
  • Loading branch information
mabruzzo committed Dec 20, 2023
2 parents fc8f8d7 + b30468a commit 0c6efd8
Show file tree
Hide file tree
Showing 69 changed files with 679 additions and 637 deletions.
4 changes: 2 additions & 2 deletions builds/make.inc.template
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#To use GPUs, CUDA must be turned on here
#Optional error checking can also be enabled
DFLAGS += -DCUDA #-DCUDA_ERROR_CHECK
DFLAGS += -DCUDA

#To use MPI, DFLAGS must include -DMPI_CHOLLA
DFLAGS += -DMPI_CHOLLA
Expand Down Expand Up @@ -65,7 +65,7 @@ DFLAGS += -DTEMPERATURE_FLOOR
#DFLAGS += -DDYNAMIC_GPU_ALLOC

# Set the cooling function
#DFLAGS += -DCOOLING_GPU
#DFLAGS += -DCOOLING_GPU
#DFLAGS += -DCLOUDY_COOL

# Use Tiled Iitial Conditions for Scaling Tets
Expand Down
6 changes: 2 additions & 4 deletions builds/make.type.disk
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ DFLAGS += -DGRAVITY_5_POINTS_GRADIENT

#DFLAGS += -DOUTPUT_ALWAYS
DFLAGS += -DCUDA
DFLAGS += -DMPI_CHOLLA
DFLAGS += -DMPI_CHOLLA
DFLAGS += -DPRECISION=2
DFLAGS += -DPPMC
DFLAGS += -DHLLC
Expand All @@ -43,9 +43,7 @@ DFLAGS += -DHYDRO_GPU
OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES -DPROJECTION
DFLAGS += $(OUTPUT)

DFLAGS += $(MPI_GPU)
DFLAGS += $(MPI_GPU)

DFLAGS += -DPARALLEL_OMP
DFLAGS += -DN_OMP_THREADS=$(OMP_NUM_THREADS)

#DFLAGS += -DCUDA_ERROR_CHECK
6 changes: 3 additions & 3 deletions builds/make.type.mhd
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ DFLAGS += $(OUTPUT)
#This is set in the system make.host file
DFLAGS += $(MPI_GPU)

# Disable CUDA error checking
# DFLAGS += -DDISABLE_GPU_ERROR_CHECKING

# NOTE: The following macros are to help facilitate debugging and should not be
# used on scientific runs

# Do CUDA error checking
# DFLAGS += -DCUDA_ERROR_CHECK

# Limit the number of steps to evolve.
# DFLAGS += -DN_STEPS_LIMIT=1000

Expand Down
23 changes: 0 additions & 23 deletions clang-tidy-runner.sh

This file was deleted.

12 changes: 6 additions & 6 deletions src/analysis/feedback_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ FeedbackAnalysis::FeedbackAnalysis(Grid3D& G)
h_circ_vel_y = (Real*)malloc(G.H.n_cells * sizeof(Real));

#ifdef PARTICLES_GPU
CHECK(cudaMalloc((void**)&d_circ_vel_x, G.H.n_cells * sizeof(Real)));
CHECK(cudaMalloc((void**)&d_circ_vel_y, G.H.n_cells * sizeof(Real)));
GPU_Error_Check(cudaMalloc((void**)&d_circ_vel_x, G.H.n_cells * sizeof(Real)));
GPU_Error_Check(cudaMalloc((void**)&d_circ_vel_y, G.H.n_cells * sizeof(Real)));
#endif

// setup the (constant) circular speed arrays
Expand All @@ -40,8 +40,8 @@ FeedbackAnalysis::FeedbackAnalysis(Grid3D& G)
}

#ifdef PARTICLES_GPU
CHECK(cudaMemcpy(d_circ_vel_x, h_circ_vel_x, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice));
CHECK(cudaMemcpy(d_circ_vel_y, h_circ_vel_y, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice));
GPU_Error_Check(cudaMemcpy(d_circ_vel_x, h_circ_vel_x, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice));
GPU_Error_Check(cudaMemcpy(d_circ_vel_y, h_circ_vel_y, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice));
#endif
}

Expand All @@ -50,8 +50,8 @@ FeedbackAnalysis::~FeedbackAnalysis()
free(h_circ_vel_x);
free(h_circ_vel_y);
#ifdef PARTICLES_GPU
CHECK(cudaFree(d_circ_vel_x));
CHECK(cudaFree(d_circ_vel_y));
GPU_Error_Check(cudaFree(d_circ_vel_x));
GPU_Error_Check(cudaFree(d_circ_vel_y));
#endif
}

Expand Down
12 changes: 6 additions & 6 deletions src/analysis/feedback_analysis_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,8 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G)
Real *d_partial_vel;
Real *h_partial_mass = (Real *)malloc(ngrid * sizeof(Real));
Real *h_partial_vel = (Real *)malloc(ngrid * sizeof(Real));
CHECK(cudaMalloc((void **)&d_partial_mass, ngrid * sizeof(Real)));
CHECK(cudaMalloc((void **)&d_partial_vel, ngrid * sizeof(Real)));
GPU_Error_Check(cudaMalloc((void **)&d_partial_mass, ngrid * sizeof(Real)));
GPU_Error_Check(cudaMalloc((void **)&d_partial_vel, ngrid * sizeof(Real)));

Real total_mass = 0;
Real total_vel = 0;
Expand Down Expand Up @@ -177,8 +177,8 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G)

// cudaDeviceSynchronize();

CHECK(cudaMemcpy(h_partial_mass, d_partial_mass, ngrid * sizeof(Real), cudaMemcpyDeviceToHost));
CHECK(cudaMemcpy(h_partial_vel, d_partial_vel, ngrid * sizeof(Real), cudaMemcpyDeviceToHost));
GPU_Error_Check(cudaMemcpy(h_partial_mass, d_partial_mass, ngrid * sizeof(Real), cudaMemcpyDeviceToHost));
GPU_Error_Check(cudaMemcpy(h_partial_vel, d_partial_vel, ngrid * sizeof(Real), cudaMemcpyDeviceToHost));

#ifdef MPI_CHOLLA
MPI_Allreduce(h_partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world);
Expand All @@ -195,8 +195,8 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G)
chprintf("feedback: time %f, dt=%f, vrms = %f km/s\n", G.H.t, G.H.dt,
sqrt(total_vel / total_mass) * VELOCITY_UNIT / 1e5);

CHECK(cudaFree(d_partial_vel));
CHECK(cudaFree(d_partial_mass));
GPU_Error_Check(cudaFree(d_partial_vel));
GPU_Error_Check(cudaFree(d_partial_mass));

free(h_partial_mass);
free(h_partial_vel);
Expand Down
23 changes: 9 additions & 14 deletions src/chemistry_gpu/chemistry_functions_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,38 +17,33 @@

void Chem_GPU::Allocate_Array_GPU_float(float **array_dev, int size)
{
cudaMalloc((void **)array_dev, size * sizeof(float));
CudaCheckError();
GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(float)));
}

void Chem_GPU::Copy_Float_Array_to_Device(int size, float *array_h, float *array_d)
{
CudaSafeCall(cudaMemcpy(array_d, array_h, size * sizeof(float), cudaMemcpyHostToDevice));
GPU_Error_Check(cudaMemcpy(array_d, array_h, size * sizeof(float), cudaMemcpyHostToDevice));
cudaDeviceSynchronize();
}

void Chem_GPU::Free_Array_GPU_float(float *array_dev)
{
cudaFree(array_dev);
CudaCheckError();
}
void Chem_GPU::Free_Array_GPU_float(float *array_dev) { GPU_Error_Check(cudaFree(array_dev)); }

void Chem_GPU::Allocate_Array_GPU_Real(Real **array_dev, int size)
{
cudaMalloc((void **)array_dev, size * sizeof(Real));
CudaCheckError();
GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real)));
GPU_Error_Check();
}

void Chem_GPU::Copy_Real_Array_to_Device(int size, Real *array_h, Real *array_d)
{
CudaSafeCall(cudaMemcpy(array_d, array_h, size * sizeof(Real), cudaMemcpyHostToDevice));
GPU_Error_Check(cudaMemcpy(array_d, array_h, size * sizeof(Real), cudaMemcpyHostToDevice));
cudaDeviceSynchronize();
}

void Chem_GPU::Free_Array_GPU_Real(Real *array_dev)
{
cudaFree(array_dev);
CudaCheckError();
GPU_Error_Check(cudaFree(array_dev));
GPU_Error_Check();
}

class Thermal_State
Expand Down Expand Up @@ -622,7 +617,7 @@ void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghos
hipLaunchKernelGGL(Update_Chemistry_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields,
dt, Chem_H);

CudaCheckError();
GPU_Error_Check();
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
Expand Down
2 changes: 1 addition & 1 deletion src/cooling/cooling_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, in
dim3 dim1dBlock(TPB, 1, 1);
hipLaunchKernelGGL(cooling_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt,
gama, coolTexObj, heatTexObj);
CudaCheckError();
GPU_Error_Check();
}

/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int
Expand Down
18 changes: 9 additions & 9 deletions src/cooling/load_cloudy_texture.cu
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,16 @@ void Load_Cuda_Textures()

// allocate host arrays to be copied to textures
// these arrays are declared as external pointers in global.h
CudaSafeCall(cudaHostAlloc(&cooling_table, nx * ny * sizeof(float), cudaHostAllocDefault));
CudaSafeCall(cudaHostAlloc(&heating_table, nx * ny * sizeof(float), cudaHostAllocDefault));
GPU_Error_Check(cudaHostAlloc(&cooling_table, nx * ny * sizeof(float), cudaHostAllocDefault));
GPU_Error_Check(cudaHostAlloc(&heating_table, nx * ny * sizeof(float), cudaHostAllocDefault));

// Read cooling tables into the host arrays
Host_Read_Cooling_Tables(cooling_table, heating_table);

// Allocate CUDA arrays in device memory
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaMallocArray(&cuCoolArray, &channelDesc, nx, ny);
cudaMallocArray(&cuHeatArray, &channelDesc, nx, ny);
GPU_Error_Check(cudaMallocArray(&cuCoolArray, &channelDesc, nx, ny));
GPU_Error_Check(cudaMallocArray(&cuHeatArray, &channelDesc, nx, ny));

// Copy the cooling and heating arrays from host to device

Expand Down Expand Up @@ -164,8 +164,8 @@ void Load_Cuda_Textures()
cudaCreateTextureObject(&heatTexObj, &heatResDesc, &texDesc, NULL);

// Free the memory associated with the cooling tables on the host
CudaSafeCall(cudaFreeHost(cooling_table));
CudaSafeCall(cudaFreeHost(heating_table));
GPU_Error_Check(cudaFreeHost(cooling_table));
GPU_Error_Check(cudaFreeHost(heating_table));

// Run Test
// Test_Cloudy_Textures();
Expand Down Expand Up @@ -261,7 +261,7 @@ void Test_Cloudy_Textures()
dim3 dim1dGrid((num_n * num_T + TPB - 1) / TPB, 1, 1);
dim3 dim1dBlock(TPB, 1, 1);
hipLaunchKernelGGL(Test_Cloudy_Textures_Kernel, dim1dGrid, dim1dBlock, 0, 0, num_n, num_T, coolTexObj, heatTexObj);
CHECK(cudaDeviceSynchronize());
GPU_Error_Check(cudaDeviceSynchronize());
printf("Exiting due to Test_Cloudy_Textures() being called \n");
exit(0);
}
Expand All @@ -272,12 +272,12 @@ void Test_Cloudy_Speed()
int num_T = 1 + 80 * 81;
dim3 dim1dGrid((num_n * num_T + TPB - 1) / TPB, 1, 1);
dim3 dim1dBlock(TPB, 1, 1);
CHECK(cudaDeviceSynchronize());
GPU_Error_Check(cudaDeviceSynchronize());
Real time_start = Get_Time();
for (int i = 0; i < 100; i++) {
hipLaunchKernelGGL(Test_Cloudy_Speed_Kernel, dim1dGrid, dim1dBlock, 0, 0, num_n, num_T, coolTexObj, heatTexObj);
}
CHECK(cudaDeviceSynchronize());
GPU_Error_Check(cudaDeviceSynchronize());
Real time_end = Get_Time();
printf(" Cloudy Test Time %9.4f micro-s \n", (time_end - time_start));
printf("Exiting due to Test_Cloudy_Speed() being called \n");
Expand Down
2 changes: 1 addition & 1 deletion src/dust/dust_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n
dim3 dim1dGrid(ngrid, 1, 1);
dim3 dim1dBlock(TPB, 1, 1);
hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma);
CudaCheckError();
GPU_Error_Check();
}

__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma)
Expand Down
20 changes: 20 additions & 0 deletions src/global/global.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,26 @@ int Sgn(Real x)
}
}

// global mpi-related variables (they are declared here because they are initialized even when
// the MPI_CHOLLA variable is not defined)

int procID; /*process rank*/
int nproc; /*number of processes in global comm*/
int root; /*rank of root process*/

/* Used when MPI_CHOLLA is not defined to initialize a subset of the global mpi-related variables
* that still meaningful in non-mpi simulations.
*/
void Init_Global_Parallel_Vars_No_MPI()
{
#ifdef MPI_CHOLLA
CHOLLA_ERROR("This function should not be executed when compiled with MPI");
#endif
procID = 0;
nproc = 1;
root = 0;
}

/*! \fn char Trim(char *s)
* \brief Gets rid of trailing and leading whitespace. */
char *Trim(char *s)
Expand Down
14 changes: 14 additions & 0 deletions src/global/global.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,20 @@ extern double Get_Time(void);
* \brief Mathematical sign function. Returns sign of x. */
extern int Sgn(Real x);

/* Global variables for mpi (but they are also initialized to sensible defaults when not using mpi)
*
* It may make sense to move these back into mpi_routines (but reorganizing the ifdef statements
* would take some work). It may make sense to also put these into their own namespace.
*/
extern int procID; /*process rank*/
extern int nproc; /*number of processes executing simulation*/
extern int root; /*rank of root process*/

/* Used when MPI_CHOLLA is not defined to initialize a subset of the global mpi-related variables
* that still meaningful in non-mpi simulations.
*/
void Init_Global_Parallel_Vars_No_MPI();

struct Parameters {
int nx;
int ny;
Expand Down
Loading

0 comments on commit 0c6efd8

Please sign in to comment.