Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

S99 feedback #316

Closed
wants to merge 12 commits into from
12 changes: 3 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,10 @@ include builds/make.type.$(TYPE)
# CUDA_ARCH defaults to sm_70 if not set in make.host
CUDA_ARCH ?= sm_70

DIRS := src src/analysis src/chemistry_gpu src/cooling src/cooling_grackle src/cosmology \
src/cpu src/global src/gravity src/gravity/paris src/grid src/hydro \
src/integrators src/io src/main.cpp src/main_tests.cpp src/mhd\
src/model src/mpi src/old_cholla src/particles src/reconstruction \
src/riemann_solvers src/system_tests src/utils src/dust

SUFFIX ?= .$(TYPE).$(MACHINE)

CPPFILES := $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.cpp))
GPUFILES := $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.cu))
CPPFILES := $(shell find src/ -type f -name '*.cpp')
GPUFILES := $(shell find src/ -type f -name '*.cu')

# Build a list of all potential object files so cleaning works properly
CLEAN_OBJS := $(subst .cpp,.o,$(CPPFILES)) \
Expand Down Expand Up @@ -101,7 +95,7 @@ ifeq ($(findstring -DPARIS,$(DFLAGS)),-DPARIS)
endif
endif

ifeq ($(findstring -DSUPERNOVA,$(DFLAGS)),-DSUPERNOVA)
ifeq ($(findstring -DFEEDBACK,$(DFLAGS)),-DFEEDBACK)
ifdef HIPCONFIG
CXXFLAGS += -I$(ROCM_PATH)/include/hiprand -I$(ROCM_PATH)/hiprand/include
GPUFLAGS += -I$(ROCM_PATH)/include/hiprand -I$(ROCM_PATH)/hiprand/include
Expand Down
15 changes: 6 additions & 9 deletions builds/make.type.disk
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
MPI_GPU = -DMPI_GPU
DFLAGS += -DPARTICLES
#DFLAGS += -DPARTICLES_CPU
DFLAGS += -DPARTICLES_GPU
#DFLAGS += -DONLY_PARTICLES
DFLAGS += -DPARTICLE_IDS
#DFLAGS += -DSINGLE_PARTICLE_MASS
DFLAGS += -DPARTICLE_AGE
DFLAGS += -DSUPERNOVA #this flag requires PARTICLE_AGE, PARTICLE_IDS
DFLAGS += -DFEEDBACK #this flag requires PARTICLE_AGE, PARTICLE_IDS
#DFLAGS += -DONLY_RESOLVED
#DFLAGS += -DNO_WIND_FEEDBACK
#DFLAGS += -DNO_SN_FEEDBACK
DFLAGS += -DANALYSIS
#DFLAGS += -DPARTICLES_KDK

Expand All @@ -29,16 +29,13 @@ DFLAGS += -DPPMC
DFLAGS += -DHLLC
DFLAGS += -DVL

DFLAGS += -DDISK_ICS

DFLAGS += -DDENSITY_FLOOR
DFLAGS += -DTEMPERATURE_FLOOR
DFLAGS += -DCOOLING_GPU
#DFLAGS += -DCLOUDY_COOL
DFLAGS += -DDE
DFLAGS += -DCPU_TIME
#DFLAGS += -DCPU_TIME
DFLAGS += -DAVERAGE_SLOW_CELLS
DFLAGS += -DHYDRO_GPU

OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES -DPROJECTION
DFLAGS += $(OUTPUT)
Expand All @@ -48,4 +45,4 @@ DFLAGS += $(MPI_GPU)
DFLAGS += -DPARALLEL_OMP
DFLAGS += -DN_OMP_THREADS=$(OMP_NUM_THREADS)

#DFLAGS += -DCUDA_ERROR_CHECK
DFLAGS += -DCUDA_ERROR_CHECK
7 changes: 6 additions & 1 deletion src/analysis/feedback_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@

#define VRMS_CUTOFF_DENSITY (0.01 * 0.6 * MP / DENSITY_UNIT)

FeedbackAnalysis::FeedbackAnalysis(Grid3D& G)
FeedbackAnalysis::FeedbackAnalysis(Grid3D& G, struct parameters* P)
{
// set distance limits beyond which contributions to the V_rms don't
// make sense, such as too close to the simulation volume edge or
// too high above the disk.
r_max = P->xlen / 2 * 0.975;
z_max = 0.15; // 150 pc above/below the disk plane
// allocate arrays
h_circ_vel_x = (Real*)malloc(G.H.n_cells * sizeof(Real));
h_circ_vel_y = (Real*)malloc(G.H.n_cells * sizeof(Real));
Expand Down
5 changes: 4 additions & 1 deletion src/analysis/feedback_analysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
class FeedbackAnalysis
{
Real *h_circ_vel_x, *h_circ_vel_y;
Real r_max, z_max;

#ifdef PARTICLES_GPU
Real *d_circ_vel_x, *d_circ_vel_y;
Expand All @@ -21,8 +22,10 @@ class FeedbackAnalysis
Real totalEnergy{0};
Real totalMomentum{0};
Real totalUnresEnergy{0};
Real totalWindMomentum{0};
Real totalWindEnergy{0};

FeedbackAnalysis(Grid3D& G);
FeedbackAnalysis(Grid3D& G, struct parameters* P);
~FeedbackAnalysis();

void Compute_Gas_Velocity_Dispersion(Grid3D& G);
Expand Down
40 changes: 29 additions & 11 deletions src/analysis/feedback_analysis_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "feedback_analysis.h"
#ifdef PARTICLES_GPU

#define MU 0.6
// in cgs, this is 0.01 cm^{-3}
#define MIN_DENSITY (0.01 * MP * MU * LENGTH_UNIT * LENGTH_UNIT * LENGTH_UNIT / MASS_UNIT) // 148279.7
#define TPB_ANALYSIS 1024
Expand All @@ -33,7 +32,9 @@ __device__ void warpReduce(volatile Real *buff, size_t tid)
}
}

void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, Real *density, Real *momentum_x,
void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, Real xbound, Real ybound, Real zbound,
Real dx, Real dy, Real dz, Real nx_local_start, Real ny_local_start,
Real nz_local_start, Real r_max, Real z_max, Real *density, Real *momentum_x,
Real *momentum_y, Real *momentum_z, Real *circ_vel_x, Real *circ_vel_y,
Real *partial_mass, Real *partial_vel)
{
Expand All @@ -50,15 +51,24 @@ void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, Rea
s_mass[tid] = 0;
s_vel[tid] = 0;
Real vx, vy, vz;
Real x, y, z, r;

if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 &&
zid < nz - n_ghost && density[id] > MIN_DENSITY) {
s_mass[tid] = density[id];
vx = momentum_x[id] / density[id];
vy = momentum_y[id] / density[id];
vz = momentum_z[id] / density[id];
s_vel[tid] =
((vx - circ_vel_x[id]) * (vx - circ_vel_x[id]) + (vy - circ_vel_y[id]) * (vy - circ_vel_y[id]) + (vz * vz)) *
density[id];
x = xbound + (nx_local_start + xid - n_ghost + 0.5) * dx;
y = ybound + (ny_local_start + yid - n_ghost + 0.5) * dy;
z = zbound + (nz_local_start + zid - n_ghost + 0.5) * dz;
r = sqrt(x * x + y * y + z * z);

if (r <= r_max && abs(z) <= z_max) {
s_mass[tid] = density[id];
vx = momentum_x[id] / density[id];
vy = momentum_y[id] / density[id];
vz = momentum_z[id] / density[id];
s_vel[tid] =
((vx - circ_vel_x[id]) * (vx - circ_vel_x[id]) + (vy - circ_vel_y[id]) * (vy - circ_vel_y[id]) + (vz * vz)) *
density[id];
}
}
__syncthreads();

Expand Down Expand Up @@ -153,9 +163,17 @@ void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G)
Real total_mass = 0;
Real total_vel = 0;

Real nx_loc_strt = 0, ny_loc_strt = 0, nz_loc_strt = 0;
#ifdef MPI_CHOLLA
nx_loc_strt = nx_local_start;
ny_loc_strt = ny_local_start;
nz_loc_strt = nz_local_start;
#endif

hipLaunchKernelGGL(Reduce_Tubulence_kernel, ngrid, TPB_ANALYSIS, 0, 0, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost,
G.C.d_density, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, d_circ_vel_x, d_circ_vel_y,
d_partial_mass, d_partial_vel);
G.H.xbound, G.H.ybound, G.H.zbound, G.H.dx, G.H.dy, G.H.dz, nx_loc_strt, ny_loc_strt, nz_loc_strt,
r_max, z_max, G.C.d_density, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, d_circ_vel_x,
d_circ_vel_y, d_partial_mass, d_partial_vel);

size_t n = ngrid;
Real *mass_input = d_partial_mass;
Expand Down
41 changes: 25 additions & 16 deletions src/cooling/cooling_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int
del_T = cool * dt * TIME_UNIT * (gamma - 1.0) / (n * KB);

// limit change in temperature to 1%
while (del_T / T > 0.01) {
while (fabs(del_T) / T > 0.01) {
// what dt gives del_T = 0.01*T?
dt_sub = 0.01 * T * n * KB / (cool * TIME_UNIT * (gamma - 1.0));
dt_sub = 0.01 * T * n * KB / (fabs(cool) * TIME_UNIT * (gamma - 1.0));
// apply that dt
T -= cool * dt_sub * TIME_UNIT * (gamma - 1.0) / (n * KB);
// how much time is left from the original timestep?
Expand All @@ -149,14 +149,6 @@ __global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int
ge -= KB * del_T / (mu * MP * (gamma - 1.0) * SP_ENERGY_UNIT);
#endif

// calculate cooling rate for new T
#ifdef CLOUDY_COOL
cool = Cloudy_cool(n, T, coolTexObj, heatTexObj);
#else
cool = CIE_cool(n, T);
// printf("%d %d %d %e %e %e\n", xid, yid, zid, n, T, cool);
#endif

// and send back from kernel
dev_conserved[4 * n_cells + id] = E;
#ifdef DE
Expand Down Expand Up @@ -327,6 +319,10 @@ __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cuda
Real lambda = 0.0; // cooling rate, erg s^-1 cm^3
Real H = 0.0; // heating rate, erg s^-1 cm^3
Real cool = 0.0; // cooling per unit volume, erg /s / cm^3
// the following two values are for photoelectric heating
// based on description given in Kim et al. 2015.
Real n_av = 100.0; // TODO mean density in the sim volume, cm^3 (make this an argument?)
Real H_pe = 0.0;
float log_n, log_T;
log_n = log10(n);
log_T = log10(T);
Expand All @@ -341,15 +337,28 @@ __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cuda
// variable so it is treated as "x" This is why the Texture calls are T first,
// then n: Bilinear_Texture(tex, log_T, log_n)

// don't cool below 10 K
if (log10(T) > 1.0) {
// cloudy cooling tables cut off at 10^9 K, use the CIE analytic fit above
// this temp.
if (log10(T) > 9.0) {
lambda = 0.45 * log10(T) - 26.065;
} else if (log10(T) >= 1.0) { // don't cool below 10 K
lambda = Bilinear_Texture(coolTexObj, log_T, log_n);
} else
lambda = 0.0;
H = Bilinear_Texture(heatTexObj, log_T, log_n);
H = Bilinear_Texture(heatTexObj, log_T, log_n);
}

// apply photoelectric heating under 10,000 K
if (log10(T) < 4.0) {
H_pe = n_av * 1.0e-26;
}

// cooling rate per unit volume
cool = n * n * (powf(10, lambda) - powf(10, H));
if (log10(T) > 9.0) {
cool = n * n * pow(10, lambda);
} else if (log10(T) >= 4.0) {
cool = n * n * (pow(10, lambda) - pow(10, H));
} else {
cool = n * (n * (pow(10, lambda) - pow(10, H)) - H_pe);
}
// printf("DEBUG Cloudy L350: %.17e\n",cool);
return cool;
}
Expand Down
Loading
Loading