Skip to content

Commit

Permalink
Merge pull request #37 from magpowell/liq_or_ice_only_cloud_optics
Browse files Browse the repository at this point in the history
add liq or ice cloud optics flags
  • Loading branch information
MennoVeerman authored Dec 19, 2024
2 parents eb01e87 + dd1e611 commit c6859e6
Show file tree
Hide file tree
Showing 4 changed files with 325 additions and 131 deletions.
1 change: 1 addition & 0 deletions include_test/Radiation_solver_rt.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class Radiation_solver_shortwave
#ifdef __CUDACC__
void solve_gpu(
const bool switch_fluxes,
const bool switch_disable_2s,
const bool switch_raytracing,
const bool switch_independent_column,
const bool switch_cloud_optics,
Expand Down
249 changes: 183 additions & 66 deletions src_cuda_rt/Cloud_optics_rt.cu
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,23 @@ namespace
}
}

__global__
void combine_and_store_kernel_single_phase(const int ncol, const int nlay, const Float tmin,
Float* __restrict__ tau,
const Float* __restrict__ l_or_i_tau, const Float* __restrict__ l_or_i_taussa)
{
const int icol = blockIdx.x*blockDim.x + threadIdx.x;
const int ilay = blockIdx.y*blockDim.y + threadIdx.y;

if ( (icol < ncol) && (ilay < nlay) )
{
const int idx = icol + ilay*ncol;
const Float tau_t = (l_or_i_tau[idx] - l_or_i_taussa[idx]);

tau[idx] = tau_t;
}
}

__global__
void combine_and_store_kernel(const int ncol, const int nlay, const Float tmin,
Float* __restrict__ tau, Float* __restrict__ ssa, Float* __restrict__ g,
Expand All @@ -108,6 +125,28 @@ namespace
}
}

__global__
void combine_and_store_kernel_single_phase(const int ncol, const int nlay, const Float tmin,
Float* __restrict__ tau, Float* __restrict__ ssa, Float* __restrict__ g,
const Float* __restrict__ l_or_i_tau, const Float* __restrict__ l_or_i_taussa, const Float* __restrict__ l_or_i_taussag
)
{
const int icol = blockIdx.x*blockDim.x + threadIdx.x;
const int ilay = blockIdx.y*blockDim.y + threadIdx.y;

if ( (icol < ncol) && (ilay < nlay) )
{
const int idx = icol + ilay*ncol;
const Float tau_t = l_or_i_tau[idx];
const Float taussa = l_or_i_taussa[idx];
const Float taussag = l_or_i_taussag[idx];

tau[idx] = tau_t;
ssa[idx] = taussa / max(tau_t, tmin);
g[idx] = taussag/ max(taussa, tmin);
}
}

__global__
void set_mask(const int ncol, const int nlay, const Float min_value,
Bool* __restrict__ mask, const Float* __restrict__ values)
Expand Down Expand Up @@ -182,11 +221,19 @@ void Cloud_optics_rt::cloud_optics(
const Array_gpu<Float,2>& reliq, const Array_gpu<Float,2>& reice,
Optical_props_2str_rt& optical_props)
{
const int ncol = clwp.dim(1);
const int nlay = clwp.dim(2);

Optical_props_2str_rt clouds_liq(ncol, nlay, optical_props);
Optical_props_2str_rt clouds_ice(ncol, nlay, optical_props);
int ncol = -1;
int nlay = -1;
if (clwp.ptr() != nullptr)
{
ncol = clwp.dim(1);
nlay = clwp.dim(2);
Optical_props_2str_rt clouds_liq(ncol, nlay, optical_props);
} else if (ciwp.ptr() != nullptr)
{
ncol = ciwp.dim(1);
nlay = ciwp.dim(2);
Optical_props_2str_rt clouds_ice(ncol, nlay, optical_props);
}

// Set the mask.
constexpr Float mask_min_value = Float(0.);
Expand All @@ -199,53 +246,81 @@ void Cloud_optics_rt::cloud_optics(
dim3 grid_m_gpu(grid_col_m, grid_lay_m);
dim3 block_m_gpu(block_col_m, block_lay_m);

Array_gpu<Bool,2> liqmsk({ncol, nlay});
set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());

Array_gpu<Bool,2> icemsk({ncol, nlay});
set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());

// Temporary arrays for storage.
Array_gpu<Float,2> ltau ({ncol, nlay});
Array_gpu<Float,2> ltaussa ({ncol, nlay});
Array_gpu<Float,2> ltaussag({ncol, nlay});

Array_gpu<Float,2> itau ({ncol, nlay});
Array_gpu<Float,2> itaussa ({ncol, nlay});
Array_gpu<Float,2> itaussag({ncol, nlay});

Array_gpu<Bool,2> liqmsk({0, 0});
Array_gpu<Float,2> ltau ({0, 0});
Array_gpu<Float,2> ltaussa ({0, 0});
Array_gpu<Float,2> ltaussag({0, 0});
Array_gpu<Bool,2> icemsk({0, 0});
Array_gpu<Float,2> itau ({0, 0});
Array_gpu<Float,2> itaussa ({0, 0});
Array_gpu<Float,2> itaussag({0, 0});
if (clwp.ptr() != nullptr){
liqmsk.set_dims({ncol, nlay});
ltau.set_dims({ncol, nlay});
ltaussa.set_dims({ncol, nlay});
ltaussag.set_dims({ncol, nlay});

set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());
}
if (ciwp.ptr() != nullptr){
icemsk.set_dims({ncol, nlay});
itau.set_dims({ncol, nlay});
itaussa.set_dims({ncol, nlay});
itaussag.set_dims({ncol, nlay});

set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());
}
const int block_col = 64;
const int block_lay = 1;

const int grid_col = ncol/block_col + (ncol%block_col > 0);
const int grid_lay = nlay/block_lay + (nlay%block_lay > 0);

dim3 grid_gpu(grid_col, grid_lay);
dim3 block_gpu(block_col, block_lay);

// Liquid water
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
if (clwp.ptr() != nullptr){
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
}

// Ice.
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());

if (ciwp.ptr() != nullptr){
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());
}
constexpr Float eps = std::numeric_limits<Float>::epsilon();

combine_and_store_kernel<<<grid_gpu, block_gpu>>>(
if ((ciwp.ptr() != nullptr) && (clwp.ptr() != nullptr))
{
combine_and_store_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, eps,
optical_props.get_tau().ptr(), optical_props.get_ssa().ptr(), optical_props.get_g().ptr(),
ltau.ptr(), ltaussa.ptr(), ltaussag.ptr(),
itau.ptr(), itaussa.ptr(), itaussag.ptr());
} else if(ciwp.ptr() == nullptr)
{
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
ncol, nlay, eps,
optical_props.get_tau().ptr(), optical_props.get_ssa().ptr(), optical_props.get_g().ptr(),
ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
} else if (clwp.ptr() == nullptr)
{
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
ncol, nlay, eps,
optical_props.get_tau().ptr(), optical_props.get_ssa().ptr(), optical_props.get_g().ptr(),
itau.ptr(), itaussa.ptr(), itaussag.ptr());
}

}

// 1scl variant of cloud optics.
Expand All @@ -255,12 +330,20 @@ void Cloud_optics_rt::cloud_optics(
const Array_gpu<Float,2>& reliq, const Array_gpu<Float,2>& reice,
Optical_props_1scl_rt& optical_props)
{
const int ncol = clwp.dim(1);
const int nlay = clwp.dim(2);

Optical_props_1scl_rt clouds_liq(ncol, nlay, optical_props);
Optical_props_1scl_rt clouds_ice(ncol, nlay, optical_props);

int ncol = -1;
int nlay = -1;
if (clwp.ptr() != nullptr)
{
ncol = clwp.dim(1);
nlay = clwp.dim(2);
Optical_props_1scl_rt clouds_liq(ncol, nlay, optical_props);
} else if (ciwp.ptr() != nullptr)
{
ncol = ciwp.dim(1);
nlay = ciwp.dim(2);
Optical_props_1scl_rt clouds_ice(ncol, nlay, optical_props);
}

// Set the mask.
constexpr Float mask_min_value = Float(0.);
const int block_col_m = 16;
Expand All @@ -272,22 +355,36 @@ void Cloud_optics_rt::cloud_optics(
dim3 grid_m_gpu(grid_col_m, grid_lay_m);
dim3 block_m_gpu(block_col_m, block_lay_m);

Array_gpu<Bool,2> liqmsk({ncol, nlay});
set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());

Array_gpu<Bool,2> icemsk({ncol, nlay});
set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());

// Temporary arrays for storage.
Array_gpu<Float,2> ltau ({ncol, nlay});
Array_gpu<Float,2> ltaussa ({ncol, nlay});
Array_gpu<Float,2> ltaussag({ncol, nlay});
Array_gpu<Bool,2> liqmsk({0, 0});
Array_gpu<Float,2> ltau ({0, 0});
Array_gpu<Float,2> ltaussa ({0, 0});
Array_gpu<Float,2> ltaussag({0, 0});
Array_gpu<Bool,2> icemsk({0, 0});
Array_gpu<Float,2> itau ({0, 0});
Array_gpu<Float,2> itaussa ({0, 0});
Array_gpu<Float,2> itaussag({0, 0});

if (clwp.ptr() != nullptr)
{
liqmsk.set_dims({ncol, nlay});
ltau.set_dims({ncol, nlay});
ltaussa.set_dims({ncol, nlay});
ltaussag.set_dims({ncol, nlay});

set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());
}
if (ciwp.ptr() != nullptr)
{
icemsk.set_dims({ncol, nlay});
itau.set_dims({ncol, nlay});
itaussa.set_dims({ncol, nlay});
itaussag.set_dims({ncol, nlay});

Array_gpu<Float,2> itau ({ncol, nlay});
Array_gpu<Float,2> itaussa ({ncol, nlay});
Array_gpu<Float,2> itaussag({ncol, nlay});
set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());
}

const int block_col = 64;
const int block_lay = 1;
Expand All @@ -299,25 +396,45 @@ void Cloud_optics_rt::cloud_optics(
dim3 block_gpu(block_col, block_lay);

// Liquid water
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
if (clwp.ptr() != nullptr){
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
}

// Ice.
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());
if (ciwp.ptr() != nullptr){
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());
}

constexpr Float eps = std::numeric_limits<Float>::epsilon();

combine_and_store_kernel<<<grid_gpu, block_gpu>>>(
if ((ciwp.ptr() != nullptr) && (clwp.ptr() != nullptr))
{
combine_and_store_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, eps,
optical_props.get_tau().ptr(),
ltau.ptr(), ltaussa.ptr(),
itau.ptr(), itaussa.ptr());
} else if(ciwp.ptr() == nullptr)
{
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
ncol, nlay, eps,
optical_props.get_tau().ptr(),
ltau.ptr(), ltaussa.ptr());
} else if(clwp.ptr() == nullptr)
{
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
ncol, nlay, eps,
optical_props.get_tau().ptr(),
itau.ptr(), itaussa.ptr());

}

}

22 changes: 14 additions & 8 deletions src_test/Radiation_solver_rt.cu
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ void Radiation_solver_shortwave::load_mie_tables(

void Radiation_solver_shortwave::solve_gpu(
const bool switch_fluxes,
const bool switch_disable_2s,
const bool switch_raytracing,
const bool switch_independent_column,
const bool switch_cloud_optics,
Expand Down Expand Up @@ -636,10 +637,13 @@ void Radiation_solver_shortwave::solve_gpu(

if (switch_fluxes)
{
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_up.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_dn.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_dn_dir.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_net.ptr());
if (!switch_disable_2s)
{
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_up.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_dn.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_dn_dir.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_net.ptr());
}
if (switch_raytracing)
{
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.y, grid_cells.x, rt_flux_tod_up.ptr());
Expand Down Expand Up @@ -836,10 +840,12 @@ void Radiation_solver_shortwave::solve_gpu(
}

(*fluxes).net_flux();

Gpt_combine_kernels_cuda_rt::add_from_gpoint(
n_col, n_lev, sw_flux_up.ptr(), sw_flux_dn.ptr(), sw_flux_dn_dir.ptr(), sw_flux_net.ptr(),
(*fluxes).get_flux_up().ptr(), (*fluxes).get_flux_dn().ptr(), (*fluxes).get_flux_dn_dir().ptr(), (*fluxes).get_flux_net().ptr());
if (!switch_disable_2s)
{
Gpt_combine_kernels_cuda_rt::add_from_gpoint(
n_col, n_lev, sw_flux_up.ptr(), sw_flux_dn.ptr(), sw_flux_dn_dir.ptr(), sw_flux_net.ptr(),
(*fluxes).get_flux_up().ptr(), (*fluxes).get_flux_dn().ptr(), (*fluxes).get_flux_dn_dir().ptr(), (*fluxes).get_flux_net().ptr());
}

if (switch_raytracing)
{
Expand Down
Loading

0 comments on commit c6859e6

Please sign in to comment.