diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index 42e5fe810..b4bb60782 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -88,16 +88,15 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int d = dev_conserved[id]; E = dev_conserved[4 * n_cells + id]; - // don't apply cooling if this thread crashed if (E < 0.0 || E != E) { - /* Alwin: this is a leftover debug printf from when we were checking cooling for small temperature outputs. Delete at leisure. - kernel_printf("WARNING: bad energy in cooling_cuda E=%e [xid, yid, zid] = [%d, %d, %d] \n", E, xid, yid, zid); + /* Alwin: this is a leftover debug printf from when we were checking cooling for small temperature outputs. Delete + at leisure. kernel_printf("WARNING: bad energy in cooling_cuda E=%e [xid, yid, zid] = [%d, %d, %d] \n", E, xid, + yid, zid); */ return; } - // #ifndef DE vx = dev_conserved[1 * n_cells + id] / d; vy = dev_conserved[2 * n_cells + id] / d; @@ -113,7 +112,7 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int // calculate the number density of the gas (in cgs) n = d * DENSITY_UNIT / (mu * MP); - // calculate the temperature of the gas + // calculate the temperature of the gas #ifdef DE const Real T_init = ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); @@ -138,21 +137,23 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int #endif del_T = cool * dt * TIME_UNIT * (gamma - 1.0) / (n * KB); if (fabs(del_T) > 0.01 * T) { - // Evolve T and dt so T changes by 1% - dt -= dt * 0.01 * T / fabs(del_T); - T -= del_T * 0.01 * T / fabs(del_T); + // Evolve T and dt so T changes by 1% + dt -= dt * 0.01 * T / fabs(del_T); + T -= del_T * 0.01 * T / fabs(del_T); } else { - T -= del_T; - break; + T -= del_T; + break; } } const Real T_final = T; - /* Alwin: this is a leftover debug function from when we were checking cooling for small temperature outputs. Delete at leisure. + /* Alwin: this is a leftover debug function from when we were checking cooling for small temperature outputs. Delete + at leisure. // If the temperature has changed by a lot, whine vehemently if ((T_init / T_final > 1e3) || (T_final / T_init > 1e3)) { - //kernel_printf("Cooling Cuda Large Temperature Change whine: | T_init (K): %e | T_final (K): %e | dt (cholla): %e | n (cgs): %e \n", T_init, T_final, dt_init, n); + //kernel_printf("Cooling Cuda Large Temperature Change whine: | T_init (K): %e | T_final (K): %e | dt (cholla): %e + | n (cgs): %e \n", T_init, T_final, dt_init, n); } */ @@ -168,14 +169,12 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int #ifdef DE dev_conserved[(n_fields - 1) * n_cells + id] = ge; #endif - - - /* Alwin: this is a leftover debug function from when we were checking cooling for small temperature outputs. Delete at leisure. - if (T_final < 1e2) { - kernel_printf("Cooling Cuda Small Temperature Whine: | T_init (K): %e | T_final (K): %e | dt (cholla): %e | n (cgs): %e \n", T_init, T_final, dt_init, n); + + /* Alwin: this is a leftover debug function from when we were checking cooling for small temperature outputs. Delete + at leisure. if (T_final < 1e2) { kernel_printf("Cooling Cuda Small Temperature Whine: | T_init (K): %e | T_final + (K): %e | dt (cholla): %e | n (cgs): %e \n", T_init, T_final, dt_init, n); } */ - } } @@ -338,11 +337,12 @@ __device__ Real CIE_cool(Real n, Real T) tables at z = 0 with solar metallicity and an HM05 UV background. */ __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) { - Real lambda = 0.0; // log cooling rate, erg s^-1 cm^3 - Real cooling = 0.0; // cooling per unit volume, erg /s / cm^3 - Real heating = 0.0; // cooling per unit volume, erg /s / cm^3 + Real lambda = 0.0; // log cooling rate, erg s^-1 cm^3 + Real cooling = 0.0; // cooling per unit volume, erg /s / cm^3 + Real heating = 0.0; // cooling per unit volume, erg /s / cm^3 - // To keep texture code simple, we use floats (which have built-in support) as opposed to doubles (which would require casting) + // To keep texture code simple, we use floats (which have built-in support) as opposed to doubles (which would require + // casting) float log_n, log_T; log_n = log10(n); log_T = log10(T); @@ -362,9 +362,9 @@ __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cuda if (log10(T) > 9.0) { lambda = 0.45 * log10(T) - 26.065; } else if (log10(T) >= 1.0) { - lambda = Bilinear_Texture(coolTexObj, remap_log_T, remap_log_n); + lambda = Bilinear_Texture(coolTexObj, remap_log_T, remap_log_n); const Real H = Bilinear_Texture(heatTexObj, remap_log_T, remap_log_n); - heating = pow(10, H); + heating = pow(10, H); } else { // Do nothing below 10 K return 0.0; @@ -377,9 +377,9 @@ __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cuda __device__ Real Photoelectric_Heating(Real n, Real T, Real n_av) { -// Photoelectric heating based on description given in Kim et al. 2015 -// n_av is mean density in the sim volume, cm^-3 -// Returns a positive value, expect sign conversion elsewhere for cooling + // Photoelectric heating based on description given in Kim et al. 2015 + // n_av is mean density in the sim volume, cm^-3 + // Returns a positive value, expect sign conversion elsewhere for cooling if (T < 1e4) { return n * n_av * 1.0e-26; } else { @@ -392,39 +392,37 @@ __device__ Real Photoelectric_Heating(Real n, Real T, Real n_av) based on description given in Kim et al. 2015. */ __device__ Real TI_cool(Real n, Real T) { - Real lambda = 0.0; //cooling rate, erg s^-1 cm^3 - Real H = 0.0; //heating rate, erg s^-1 - Real cool = 0.0; //cooling per unit volume, erg /s / cm^3 - Real n_av = 100.0; //mean density in the sim volume + Real lambda = 0.0; // cooling rate, erg s^-1 cm^3 + Real H = 0.0; // heating rate, erg s^-1 + Real cool = 0.0; // cooling per unit volume, erg /s / cm^3 + Real n_av = 100.0; // mean density in the sim volume // Below 10K only include photoelectric heating if (log10(T) < 1.0) { - H = n_av*1.0e-26; + H = n_av * 1.0e-26; } // Koyama & Inutsaka 2002 analytic fit if (log10(T) >= 1.0 && log10(T) < 4.0) { - lambda = 2e-26 * (1e7 * exp(-1.148e5 / (T+1000.0)) + 1.4e-2 * sqrt(T) * exp(-92.0/T)); - H = n_av*1.0e-26; + lambda = 2e-26 * (1e7 * exp(-1.148e5 / (T + 1000.0)) + 1.4e-2 * sqrt(T) * exp(-92.0 / T)); + H = n_av * 1.0e-26; } - // fit to cloudy CIE cooling function + // fit to cloudy CIE cooling function if (log10(T) >= 4.0 && log10(T) < 5.9) { lambda = powf(10.0, (-1.3 * (log10(T) - 5.25) * (log10(T) - 5.25) - 21.25)); } if (log10(T) >= 5.9 && log10(T) < 7.4) { lambda = powf(10.0, (0.7 * (log10(T) - 7.1) * (log10(T) - 7.1) - 22.8)); } - if (log10(T) >= 7.4) { - lambda = powf(10.0, (0.45*log10(T) - 26.065)); + if (log10(T) >= 7.4) { + lambda = powf(10.0, (0.45 * log10(T) - 26.065)); } - + // cooling rate per unit volume - cool = n*(n*lambda - H); - //if (cool > 1.0e-20) printf("n: %e T: %e lambda: %e H: %e\n", n, T, lambda, H); + cool = n * (n * lambda - H); + // if (cool > 1.0e-20) printf("n: %e T: %e lambda: %e H: %e\n", n, T, lambda, H); return cool; } - - #endif // COOLING_GPU #endif // CUDA diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 5fd4ac7d8..7a1fad7a8 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -500,19 +500,17 @@ Real Grid3D::Update_Grid(void) #endif #ifdef VELOCITY_CEILING - const Real V_ceiling_cholla = 0.005; // roughly 10000 km/s + const Real V_ceiling_cholla = 0.005; // roughly 10000 km/s Velocity_Ceiling(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, gama, V_ceiling_cholla); -#endif //VELOCITY_CEILING + #endif // VELOCITY_CEILING // Temperature Ceiling #ifdef TEMPERATURE_CEILING // 1e51 ergs / (m_p * (pc/cm)^3) = 45000 km/s // sqrt(1e10 K * kB/ m_mp) = 9000 km/s - const Real T_ceiling_kelvin = 5e9;// 1e10; + const Real T_ceiling_kelvin = 5e9; // 1e10; Temperature_Ceiling(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, gama, T_ceiling_kelvin); -#endif //TEMPERATURE_CEILING - - + #endif // TEMPERATURE_CEILING #ifdef AVERAGE_SLOW_CELLS // Set the min_delta_t for averaging a slow cell diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index da4c09552..4e8e49328 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -594,19 +594,20 @@ Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n return dev_dti[0]; } + #ifdef VELOCITY_CEILING -#ifdef VELOCITY_CEILING - -__global__ void Velocity_Ceiling_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real gamma, Real V_ceiling, int* counter) +__global__ void Velocity_Ceiling_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, + Real gamma, Real V_ceiling, int *counter) { - const int id = threadIdx.x + blockIdx.x * blockDim.x; - const int n_cells = nx * ny * nz; + const int id = threadIdx.x + blockIdx.x * blockDim.x; + const int n_cells = nx * ny * nz; int xid, yid, zid; cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); - const bool real_cell = (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost); + const bool real_cell = (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && + zid > n_ghost - 1 && zid < nz - n_ghost); if (!real_cell) return; - const Real d = dev_conserved[id]; + const Real d = dev_conserved[id]; const Real max_momentum = d * V_ceiling; const Real d_inv = 1.0 / d; @@ -621,54 +622,53 @@ __global__ void Velocity_Ceiling_Kernel(Real *dev_conserved, int nx, int ny, int const Real momentum = dev_conserved[momentum_index * n_cells + id]; if (abs(momentum) > max_momentum) { - const Real new_momentum = max_momentum*momentum/abs(momentum); - const Real diff_energy = 0.5 * d_inv * ((momentum * momentum) - (max_momentum*max_momentum)); + const Real new_momentum = max_momentum * momentum / abs(momentum); + const Real diff_energy = 0.5 * d_inv * ((momentum * momentum) - (max_momentum * max_momentum)); // Write in the new momentum dev_conserved[momentum_index * n_cells + id] = new_momentum; - // Thermalize the energy - #ifdef DE + // Thermalize the energy + #ifdef DE dev_conserved[(n_fields - 1) * n_cells + id] += diff_energy; - #endif //DE + #endif // DE atomicAdd(counter, 1); } } - - } -void Velocity_Ceiling(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real gamma, Real V_ceiling) +void Velocity_Ceiling(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real gamma, + Real V_ceiling) { - int n_cells = nx * ny * nz; - int ngrid = (n_cells + TPB - 1) / TPB; + int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); cuda_utilities::DeviceVector counter(1, true); - int* dev_counter = counter.data(); + int *dev_counter = counter.data(); if (nx > 1 && ny > 1 && nz > 1) { // 3D - hipLaunchKernelGGL(Velocity_Ceiling_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, gamma, V_ceiling, dev_counter); + hipLaunchKernelGGL(Velocity_Ceiling_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, + n_fields, gamma, V_ceiling, dev_counter); } int host_counter = counter[0]; if (host_counter > 0) { - printf("HYDRO WARNING: Velocity Ceiling applied to num_cells: %d \n", host_counter); + printf("HYDRO WARNING: Velocity Ceiling applied to num_cells: %d \n", host_counter); } } + #endif -#endif - - -#ifdef TEMPERATURE_CEILING + #ifdef TEMPERATURE_CEILING -__global__ void Temperature_Ceiling_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real gamma, Real T_ceiling, int* counter) +__global__ void Temperature_Ceiling_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, + Real gamma, Real T_ceiling, int *counter) { - const int id = threadIdx.x + blockIdx.x * blockDim.x; - const int n_cells = nx * ny * nz; + const int id = threadIdx.x + blockIdx.x * blockDim.x; + const int n_cells = nx * ny * nz; int xid, yid, zid; cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); - const bool real_cell = (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && zid < nz - n_ghost); + const bool real_cell = (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && + zid > n_ghost - 1 && zid < nz - n_ghost); if (!real_cell) return; const Real d = dev_conserved[id]; @@ -678,41 +678,42 @@ __global__ void Temperature_Ceiling_Kernel(Real *dev_conserved, int nx, int ny, const Real vz = dev_conserved[3 * n_cells + id] * d_inv; const Real E = dev_conserved[4 * n_cells + id]; - const Real temperature_Kelvin = (gamma - 1) * (E - 0.5 * (vx*vx + vy*vy + vz*vz) * d) * ENERGY_UNIT / (d * DENSITY_UNIT / 0.6 / MP) / KB; + const Real temperature_Kelvin = + (gamma - 1) * (E - 0.5 * (vx * vx + vy * vy + vz * vz) * d) * ENERGY_UNIT / (d * DENSITY_UNIT / 0.6 / MP) / KB; if (temperature_Kelvin <= T_ceiling) return; const Real factor = T_ceiling / temperature_Kelvin; dev_conserved[4 * n_cells + id] *= factor; - #ifdef DE + #ifdef DE dev_conserved[(n_fields - 1) * n_cells + id] *= factor; - #endif //DE + #endif // DE atomicAdd(counter, 1); } -void Temperature_Ceiling(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real gamma, Real T_ceiling) +void Temperature_Ceiling(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real gamma, + Real T_ceiling) { - int n_cells = nx * ny * nz; - int ngrid = (n_cells + TPB - 1) / TPB; + int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); cuda_utilities::DeviceVector counter(1, true); - int* dev_counter = counter.data(); + int *dev_counter = counter.data(); if (nx > 1 && ny > 1 && nz > 1) { // 3D - hipLaunchKernelGGL(Temperature_Ceiling_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, gamma, T_ceiling, dev_counter); + hipLaunchKernelGGL(Temperature_Ceiling_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, + n_fields, gamma, T_ceiling, dev_counter); } int host_counter = counter[0]; if (host_counter > 0) { - printf("HYDRO WARNING: Temperature Ceiling applied to num_cells: %d \n", host_counter); + printf("HYDRO WARNING: Temperature Ceiling applied to num_cells: %d \n", host_counter); } } - -#endif //TEMPERATURE_CEILING + #endif // TEMPERATURE_CEILING #ifdef AVERAGE_SLOW_CELLS diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index c071aa33f..d6ac09bee 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -75,23 +75,25 @@ __global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, int n_ghos __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields); -#ifdef VELOCITY_CEILING -void Velocity_Ceiling(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real gamma, Real V_ceiling); -#endif // VELOCITY CEILING + #ifdef VELOCITY_CEILING +void Velocity_Ceiling(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real gamma, + Real V_ceiling); + #endif // VELOCITY CEILING -#ifdef TEMPERATURE_CEILING -void Temperature_Ceiling(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real gamma, Real T_ceiling); -#endif // TEMPERATURE CEILING + #ifdef TEMPERATURE_CEILING +void Temperature_Ceiling(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real gamma, + Real T_ceiling); + #endif // TEMPERATURE CEILING #ifdef AVERAGE_SLOW_CELLS void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow, Real xbound, Real ybound, Real zbound, int nx_offset, - int ny_offset, int nz_offset); + int ny_offset, int nz_offset); __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, - Real dy, Real dz, Real gamma, Real max_dti_slow, Real xbound, Real ybound, Real zbound, - int nx_offset, int ny_offset, int nz_offset); + Real dy, Real dz, Real gamma, Real max_dti_slow, Real xbound, Real ybound, + Real zbound, int nx_offset, int ny_offset, int nz_offset); #endif #ifdef TEMPERATURE_FLOOR diff --git a/src/utils/debug_utilities.cu b/src/utils/debug_utilities.cu index 5c90fe19f..604ad7c8b 100644 --- a/src/utils/debug_utilities.cu +++ b/src/utils/debug_utilities.cu @@ -5,15 +5,16 @@ #include "../io/io.h" // provides chprintf #include "../utils/error_handling.h" // provides chexit -__global__ void Check_For_Extreme_Temperature_Kernel(Real* dev_conserved, int n_cells, Real gamma, Real lower_limit, Real upper_limit, int marker, bool* out_bool) +__global__ void Check_For_Extreme_Temperature_Kernel(Real* dev_conserved, int n_cells, Real gamma, Real lower_limit, + Real upper_limit, int marker, bool* out_bool) { // Calculate temperature int id = threadIdx.x + blockIdx.x * blockDim.x; if (id >= n_cells) { return; } - const Real d = dev_conserved[id]; - const Real E = dev_conserved[id + n_cells * grid_enum::Energy]; + const Real d = dev_conserved[id]; + const Real E = dev_conserved[id + n_cells * grid_enum::Energy]; const Real px = dev_conserved[id + n_cells * grid_enum::momentum_x]; const Real py = dev_conserved[id + n_cells * grid_enum::momentum_y]; const Real pz = dev_conserved[id + n_cells * grid_enum::momentum_z]; @@ -25,24 +26,25 @@ __global__ void Check_For_Extreme_Temperature_Kernel(Real* dev_conserved, int n_ const Real vz = dev_conserved[3 * n_cells + id] / d; const Real E_kinetic = 0.5 * d * (vx * vx + vy * vy + vz * vz); */ - const Real E_thermal = E - E_kinetic; - const Real p = (E_thermal) * (gamma - 1.0); + const Real p = (E_thermal) * (gamma - 1.0); - const Real mu = 0.6; - const Real n = d * DENSITY_UNIT / (mu * MP); + const Real mu = 0.6; + const Real n = d * DENSITY_UNIT / (mu * MP); const Real T_kelvin = p * PRESSURE_UNIT / (n * KB); - if (T_kelvin <= lower_limit || T_kelvin >= upper_limit) { out_bool[0] = true; - kernel_printf("Check_For_Extreme_Temperature_Kernel found Value: %g E: %g E_thermal: %g E_kinetic: %g Marker: %d Thread: %d\n", T_kelvin, E, E_thermal, E_kinetic, marker, id); + kernel_printf( + "Check_For_Extreme_Temperature_Kernel found Value: %g E: %g E_thermal: %g E_kinetic: %g Marker: %d Thread: " + "%d\n", + T_kelvin, E, E_thermal, E_kinetic, marker, id); } } - -void Check_For_Extreme_Temperature(Real* dev_conserved, int n_cells, Real gamma, Real lower_limit, Real upper_limit, int marker) +void Check_For_Extreme_Temperature(Real* dev_conserved, int n_cells, Real gamma, Real lower_limit, Real upper_limit, + int marker) { bool host_out_bool[1] = {false}; bool* out_bool; @@ -52,7 +54,8 @@ void Check_For_Extreme_Temperature(Real* dev_conserved, int n_cells, Real gamma, dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(Check_For_Extreme_Temperature_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, n_cells, gamma, lower_limit, upper_limit, marker, out_bool); + hipLaunchKernelGGL(Check_For_Extreme_Temperature_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, n_cells, gamma, + lower_limit, upper_limit, marker, out_bool); cudaMemcpy(host_out_bool, out_bool, sizeof(bool), cudaMemcpyDeviceToHost); cudaFree(out_bool); @@ -62,10 +65,6 @@ void Check_For_Extreme_Temperature(Real* dev_conserved, int n_cells, Real gamma, } } - - - - __global__ void Dump_Values_Kernel(Real* device_array, int array_size, int marker) { int tid = threadIdx.x + blockIdx.x * blockDim.x;