Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add liq or ice cloud optics flags #37

Merged
merged 11 commits into from
Dec 19, 2024
Merged
1 change: 1 addition & 0 deletions include_test/Radiation_solver_rt.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class Radiation_solver_shortwave
#ifdef __CUDACC__
void solve_gpu(
const bool switch_fluxes,
const bool switch_disable_2s,
const bool switch_raytracing,
const bool switch_independent_column,
const bool switch_cloud_optics,
Expand Down
249 changes: 183 additions & 66 deletions src_cuda_rt/Cloud_optics_rt.cu
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,23 @@ namespace
}
}

__global__
void combine_and_store_kernel_single_phase(const int ncol, const int nlay, const Float tmin,
Float* __restrict__ tau,
const Float* __restrict__ l_or_i_tau, const Float* __restrict__ l_or_i_taussa)
{
const int icol = blockIdx.x*blockDim.x + threadIdx.x;
const int ilay = blockIdx.y*blockDim.y + threadIdx.y;

if ( (icol < ncol) && (ilay < nlay) )
{
const int idx = icol + ilay*ncol;
const Float tau_t = (l_or_i_tau[idx] - l_or_i_taussa[idx]);

tau[idx] = tau_t;
}
}

__global__
void combine_and_store_kernel(const int ncol, const int nlay, const Float tmin,
Float* __restrict__ tau, Float* __restrict__ ssa, Float* __restrict__ g,
Expand All @@ -109,6 +126,28 @@ namespace
}
}

__global__
void combine_and_store_kernel_single_phase(const int ncol, const int nlay, const Float tmin,
Float* __restrict__ tau, Float* __restrict__ ssa, Float* __restrict__ g,
const Float* __restrict__ l_or_i_tau, const Float* __restrict__ l_or_i_taussa, const Float* __restrict__ l_or_i_taussag
)
{
const int icol = blockIdx.x*blockDim.x + threadIdx.x;
const int ilay = blockIdx.y*blockDim.y + threadIdx.y;

if ( (icol < ncol) && (ilay < nlay) )
{
const int idx = icol + ilay*ncol;
const Float tau_t = l_or_i_tau[idx];
const Float taussa = l_or_i_taussa[idx];
const Float taussag = l_or_i_taussag[idx];

tau[idx] = tau_t;
ssa[idx] = taussa / max(tau_t, tmin);
g[idx] = taussag/ max(taussa, tmin);
}
}

__global__
void set_mask(const int ncol, const int nlay, const Float min_value,
Bool* __restrict__ mask, const Float* __restrict__ values)
Expand Down Expand Up @@ -183,11 +222,19 @@ void Cloud_optics_rt::cloud_optics(
const Array_gpu<Float,2>& reliq, const Array_gpu<Float,2>& reice,
Optical_props_2str_rt& optical_props)
{
const int ncol = clwp.dim(1);
const int nlay = clwp.dim(2);

Optical_props_2str_rt clouds_liq(ncol, nlay, optical_props);
Optical_props_2str_rt clouds_ice(ncol, nlay, optical_props);
int ncol = -1;
int nlay = -1;
if (clwp.ptr() != nullptr)
{
ncol = clwp.dim(1);
nlay = clwp.dim(2);
Optical_props_2str_rt clouds_liq(ncol, nlay, optical_props);
} else if (ciwp.ptr() != nullptr)
{
ncol = ciwp.dim(1);
nlay = ciwp.dim(2);
Optical_props_2str_rt clouds_ice(ncol, nlay, optical_props);
}

// Set the mask.
constexpr Float mask_min_value = Float(0.);
Expand All @@ -200,53 +247,81 @@ void Cloud_optics_rt::cloud_optics(
dim3 grid_m_gpu(grid_col_m, grid_lay_m);
dim3 block_m_gpu(block_col_m, block_lay_m);

Array_gpu<Bool,2> liqmsk({ncol, nlay});
set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());

Array_gpu<Bool,2> icemsk({ncol, nlay});
set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());

// Temporary arrays for storage.
Array_gpu<Float,2> ltau ({ncol, nlay});
Array_gpu<Float,2> ltaussa ({ncol, nlay});
Array_gpu<Float,2> ltaussag({ncol, nlay});

Array_gpu<Float,2> itau ({ncol, nlay});
Array_gpu<Float,2> itaussa ({ncol, nlay});
Array_gpu<Float,2> itaussag({ncol, nlay});

Array_gpu<Bool,2> liqmsk({0, 0});
Array_gpu<Float,2> ltau ({0, 0});
Array_gpu<Float,2> ltaussa ({0, 0});
Array_gpu<Float,2> ltaussag({0, 0});
Array_gpu<Bool,2> icemsk({0, 0});
Array_gpu<Float,2> itau ({0, 0});
Array_gpu<Float,2> itaussa ({0, 0});
Array_gpu<Float,2> itaussag({0, 0});
if (clwp.ptr() != nullptr){
liqmsk.set_dims({ncol, nlay});
ltau.set_dims({ncol, nlay});
ltaussa.set_dims({ncol, nlay});
ltaussag.set_dims({ncol, nlay});

set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());
}
if (ciwp.ptr() != nullptr){
icemsk.set_dims({ncol, nlay});
itau.set_dims({ncol, nlay});
itaussa.set_dims({ncol, nlay});
itaussag.set_dims({ncol, nlay});

set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());
}
const int block_col = 64;
const int block_lay = 1;

const int grid_col = ncol/block_col + (ncol%block_col > 0);
const int grid_lay = nlay/block_lay + (nlay%block_lay > 0);

dim3 grid_gpu(grid_col, grid_lay);
dim3 block_gpu(block_col, block_lay);

// Liquid water
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
if (clwp.ptr() != nullptr){
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
}

// Ice.
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());

if (ciwp.ptr() != nullptr){
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());
}
constexpr Float eps = std::numeric_limits<Float>::epsilon();

combine_and_store_kernel<<<grid_gpu, block_gpu>>>(
if ((ciwp.ptr() != nullptr) && (clwp.ptr() != nullptr))
{
combine_and_store_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, eps,
optical_props.get_tau().ptr(), optical_props.get_ssa().ptr(), optical_props.get_g().ptr(),
ltau.ptr(), ltaussa.ptr(), ltaussag.ptr(),
itau.ptr(), itaussa.ptr(), itaussag.ptr());
} else if(ciwp.ptr() == nullptr)
{
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
ncol, nlay, eps,
optical_props.get_tau().ptr(), optical_props.get_ssa().ptr(), optical_props.get_g().ptr(),
ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
} else if (clwp.ptr() == nullptr)
{
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
ncol, nlay, eps,
optical_props.get_tau().ptr(), optical_props.get_ssa().ptr(), optical_props.get_g().ptr(),
itau.ptr(), itaussa.ptr(), itaussag.ptr());
}

}

// 1scl variant of cloud optics.
Expand All @@ -256,12 +331,20 @@ void Cloud_optics_rt::cloud_optics(
const Array_gpu<Float,2>& reliq, const Array_gpu<Float,2>& reice,
Optical_props_1scl_rt& optical_props)
{
const int ncol = clwp.dim(1);
const int nlay = clwp.dim(2);

Optical_props_1scl_rt clouds_liq(ncol, nlay, optical_props);
Optical_props_1scl_rt clouds_ice(ncol, nlay, optical_props);

int ncol = -1;
int nlay = -1;
if (clwp.ptr() != nullptr)
{
ncol = clwp.dim(1);
nlay = clwp.dim(2);
Optical_props_1scl_rt clouds_liq(ncol, nlay, optical_props);
} else if (ciwp.ptr() != nullptr)
{
ncol = ciwp.dim(1);
nlay = ciwp.dim(2);
Optical_props_1scl_rt clouds_ice(ncol, nlay, optical_props);
}

// Set the mask.
constexpr Float mask_min_value = Float(0.);
const int block_col_m = 16;
Expand All @@ -273,22 +356,36 @@ void Cloud_optics_rt::cloud_optics(
dim3 grid_m_gpu(grid_col_m, grid_lay_m);
dim3 block_m_gpu(block_col_m, block_lay_m);

Array_gpu<Bool,2> liqmsk({ncol, nlay});
set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());

Array_gpu<Bool,2> icemsk({ncol, nlay});
set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());

// Temporary arrays for storage.
Array_gpu<Float,2> ltau ({ncol, nlay});
Array_gpu<Float,2> ltaussa ({ncol, nlay});
Array_gpu<Float,2> ltaussag({ncol, nlay});
Array_gpu<Bool,2> liqmsk({0, 0});
Array_gpu<Float,2> ltau ({0, 0});
Array_gpu<Float,2> ltaussa ({0, 0});
Array_gpu<Float,2> ltaussag({0, 0});
Array_gpu<Bool,2> icemsk({0, 0});
Array_gpu<Float,2> itau ({0, 0});
Array_gpu<Float,2> itaussa ({0, 0});
Array_gpu<Float,2> itaussag({0, 0});

if (clwp.ptr() != nullptr)
{
liqmsk.set_dims({ncol, nlay});
ltau.set_dims({ncol, nlay});
ltaussa.set_dims({ncol, nlay});
ltaussag.set_dims({ncol, nlay});

set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, liqmsk.ptr(), clwp.ptr());
}
if (ciwp.ptr() != nullptr)
{
icemsk.set_dims({ncol, nlay});
itau.set_dims({ncol, nlay});
itaussa.set_dims({ncol, nlay});
itaussag.set_dims({ncol, nlay});

Array_gpu<Float,2> itau ({ncol, nlay});
Array_gpu<Float,2> itaussa ({ncol, nlay});
Array_gpu<Float,2> itaussag({ncol, nlay});
set_mask<<<grid_m_gpu, block_m_gpu>>>(
ncol, nlay, mask_min_value, icemsk.ptr(), ciwp.ptr());
}

const int block_col = 64;
const int block_lay = 1;
Expand All @@ -300,25 +397,45 @@ void Cloud_optics_rt::cloud_optics(
dim3 block_gpu(block_col, block_lay);

// Liquid water
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
if (clwp.ptr() != nullptr){
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, liqmsk.ptr(), clwp.ptr(), reliq.ptr(),
this->liq_nsteps, this->liq_step_size, this->radliq_lwr,
this->lut_extliq_gpu.ptr(), this->lut_ssaliq_gpu.ptr(),
this->lut_asyliq_gpu.ptr(), ltau.ptr(), ltaussa.ptr(), ltaussag.ptr());
}

// Ice.
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());
if (ciwp.ptr() != nullptr){
compute_from_table_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, ibnd-1, icemsk.ptr(), ciwp.ptr(), reice.ptr(),
this->ice_nsteps, this->ice_step_size, this->radice_lwr,
this->lut_extice_gpu.ptr(), this->lut_ssaice_gpu.ptr(),
this->lut_asyice_gpu.ptr(), itau.ptr(), itaussa.ptr(), itaussag.ptr());
}

constexpr Float eps = std::numeric_limits<Float>::epsilon();

combine_and_store_kernel<<<grid_gpu, block_gpu>>>(
if ((ciwp.ptr() != nullptr) && (clwp.ptr() != nullptr))
{
combine_and_store_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, eps,
optical_props.get_tau().ptr(),
ltau.ptr(), ltaussa.ptr(),
itau.ptr(), itaussa.ptr());
} else if(ciwp.ptr() == nullptr)
{
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
ncol, nlay, eps,
optical_props.get_tau().ptr(),
ltau.ptr(), ltaussa.ptr());
} else if(clwp.ptr() == nullptr)
{
combine_and_store_kernel_single_phase<<<grid_gpu, block_gpu>>>(
ncol, nlay, eps,
optical_props.get_tau().ptr(),
itau.ptr(), itaussa.ptr());

}

}

22 changes: 14 additions & 8 deletions src_test/Radiation_solver_rt.cu
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ void Radiation_solver_shortwave::load_mie_tables(

void Radiation_solver_shortwave::solve_gpu(
const bool switch_fluxes,
const bool switch_disable_2s,
const bool switch_raytracing,
const bool switch_independent_column,
const bool switch_cloud_optics,
Expand Down Expand Up @@ -636,10 +637,13 @@ void Radiation_solver_shortwave::solve_gpu(

if (switch_fluxes)
{
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_up.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_dn.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_dn_dir.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_net.ptr());
if (!switch_disable_2s)
{
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_up.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_dn.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_dn_dir.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(n_lev, grid_cells.y, grid_cells.x, sw_flux_net.ptr());
}
if (switch_raytracing)
{
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.y, grid_cells.x, rt_flux_tod_up.ptr());
Expand Down Expand Up @@ -836,10 +840,12 @@ void Radiation_solver_shortwave::solve_gpu(
}

(*fluxes).net_flux();

Gpt_combine_kernels_cuda_rt::add_from_gpoint(
n_col, n_lev, sw_flux_up.ptr(), sw_flux_dn.ptr(), sw_flux_dn_dir.ptr(), sw_flux_net.ptr(),
(*fluxes).get_flux_up().ptr(), (*fluxes).get_flux_dn().ptr(), (*fluxes).get_flux_dn_dir().ptr(), (*fluxes).get_flux_net().ptr());
if (!switch_disable_2s)
{
Gpt_combine_kernels_cuda_rt::add_from_gpoint(
n_col, n_lev, sw_flux_up.ptr(), sw_flux_dn.ptr(), sw_flux_dn_dir.ptr(), sw_flux_net.ptr(),
(*fluxes).get_flux_up().ptr(), (*fluxes).get_flux_dn().ptr(), (*fluxes).get_flux_dn_dir().ptr(), (*fluxes).get_flux_net().ptr());
}

if (switch_raytracing)
{
Expand Down
Loading