Skip to content

Commit

Permalink
Updated Planck source function.
Browse files Browse the repository at this point in the history
  • Loading branch information
Chiil committed Jul 2, 2024
1 parent 2f439f3 commit 8d54bdf
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 87 deletions.
2 changes: 1 addition & 1 deletion include_test/Netcdf_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ inline Netcdf_file::Netcdf_file(const std::string& name, Netcdf_mode mode) :
int nc_check_code = 0;

if (mode == Netcdf_mode::Create)
nc_check_code = nc_create(name.c_str(), NC_NOCLOBBER | NC_NETCDF4, &ncid);
nc_check_code = nc_create(name.c_str(), NC_CLOBBER | NC_NETCDF4, &ncid);
else if (mode == Netcdf_mode::Write)
nc_check_code = nc_open(name.c_str(), NC_WRITE | NC_NETCDF4, &ncid);
else if (mode == Netcdf_mode::Read)
Expand Down
23 changes: 1 addition & 22 deletions src_cuda/Rte_lw.cu
Original file line number Diff line number Diff line change
Expand Up @@ -56,27 +56,6 @@ namespace
}
}

// template<typename Float>
// void apply_BC(
// int ncol, int nlay, int ngpt,
// Bool top_at_1, Array<Float,3>& gpt_flux_dn)
// {
// rrtmgp_kernels::apply_BC_0(
// &ncol, &nlay, &ngpt,
// &top_at_1, gpt_flux_dn.ptr());
// }
//
// template<typename Float>
// void apply_BC(
// int ncol, int nlay, int ngpt,
// Bool top_at_1, const Array<Float,2>& inc_flux,
// Array<Float,3>& gpt_flux_dn)
// {
// rrtmgp_kernels::apply_BC_gpt(
// &ncol, &nlay, &ngpt,
// &top_at_1, const_cast<Float*>(inc_flux.ptr()), gpt_flux_dn.ptr());
// }


void Rte_lw_gpu::rte_lw(
const std::unique_ptr<Optical_props_arry_gpu>& optical_props,
Expand Down Expand Up @@ -134,7 +113,7 @@ void Rte_lw_gpu::rte_lw(

// pass null ptr if size of inc_flux is zero
const Float* inc_flux_ptr = (inc_flux.size() == 0) ? nullptr : inc_flux.ptr();

Rte_solver_kernels_cuda::lw_solver_noscat(
ncol, nlay, ngpt, top_at_1, n_quad_angs,
secants.ptr(), gauss_wts_subset.ptr(),
Expand Down
102 changes: 50 additions & 52 deletions src_kernels_cuda/gas_optics_rrtmgp_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ void Planck_source_kernel(
// THIS KERNEL USES FORTRAN INDEXING TO AVOID MISTAKES.
const int icol = blockIdx.x*blockDim.x + threadIdx.x + 1;
const int ilay = blockIdx.y*blockDim.y + threadIdx.y + 1;
const int igpt = blockIdx.z*blockDim.z + threadIdx.z + 1;

// Input arrays, use Index functor to simplify index porting from Fortran to CUDA
const Index_2d<const Float> tlay (tlay_ptr, ncol, nlay);
Expand All @@ -251,66 +252,63 @@ void Planck_source_kernel(
Index_3d<Float> lev_src(lev_src_ptr, ncol, nlay+1, ngpt);
Index_2d<Float> sfc_src_jac(sfc_src_jac_ptr, ncol, ngpt);

if ( (icol <= ncol) && (ilay <= nlay) )
if ( (icol <= ncol) && (ilay <= nlay) && (igpt <= ngpt) )
{
for (int igpt=1; igpt<=ngpt; ++igpt)
const int ibnd = gpoint_bands(igpt);
const int itropo = (tropo(icol, ilay) == Bool(true)) ? 1 : 2;
const int iflav = gpoint_flavor(itropo, igpt);

// 3D interp.
const Float pfrac =
( fmajor(1, 1, 1, icol, ilay, iflav) * pfracin(jtemp(icol, ilay), jeta(1, icol, ilay, iflav) , jpress(icol, ilay)-1 + itropo, igpt)
+ fmajor(2, 1, 1, icol, ilay, iflav) * pfracin(jtemp(icol, ilay), jeta(1, icol, ilay, iflav)+1, jpress(icol, ilay)-1 + itropo, igpt)
+ fmajor(1, 2, 1, icol, ilay, iflav) * pfracin(jtemp(icol, ilay), jeta(1, icol, ilay, iflav) , jpress(icol, ilay) + itropo, igpt)
+ fmajor(2, 2, 1, icol, ilay, iflav) * pfracin(jtemp(icol, ilay), jeta(1, icol, ilay, iflav)+1, jpress(icol, ilay) + itropo, igpt) )

+ ( fmajor(1, 1, 2, icol, ilay, iflav) * pfracin(jtemp(icol, ilay)+1, jeta(2, icol, ilay, iflav) , jpress(icol, ilay)-1 + itropo, igpt)
+ fmajor(2, 1, 2, icol, ilay, iflav) * pfracin(jtemp(icol, ilay)+1, jeta(2, icol, ilay, iflav)+1, jpress(icol, ilay)-1 + itropo, igpt)
+ fmajor(1, 2, 2, icol, ilay, iflav) * pfracin(jtemp(icol, ilay)+1, jeta(2, icol, ilay, iflav) , jpress(icol, ilay) + itropo, igpt)
+ fmajor(2, 2, 2, icol, ilay, iflav) * pfracin(jtemp(icol, ilay)+1, jeta(2, icol, ilay, iflav)+1, jpress(icol, ilay) + itropo, igpt) );

Float planck_function_1 = interpolate1D(tlay(icol, ilay), temp_ref_min, totplnk_delta, nPlanckTemp, &totplnk(1, ibnd));
lay_src(icol, ilay, igpt) = pfrac * planck_function_1;

planck_function_1 = interpolate1D(tlev(icol, ilay), temp_ref_min, totplnk_delta, nPlanckTemp, &totplnk(1, ibnd));
if (ilay == 1)
{
const int ibnd = gpoint_bands(igpt);
const int itropo = (tropo(icol, ilay) == Bool(true)) ? 1 : 2;
lev_src(icol, ilay, igpt) = pfrac * planck_function_1;
}
else
{
const int itropo = (tropo(icol, ilay-1) == Bool(true)) ? 1 : 2;
const int iflav = gpoint_flavor(itropo, igpt);

// 3D interp.
const Float pfrac =
( fmajor(1, 1, 1, icol, ilay, iflav) * pfracin(jtemp(icol, ilay), jeta(1, icol, ilay, iflav) , jpress(icol, ilay)-1 + itropo, igpt)
+ fmajor(2, 1, 1, icol, ilay, iflav) * pfracin(jtemp(icol, ilay), jeta(1, icol, ilay, iflav)+1, jpress(icol, ilay)-1 + itropo, igpt)
+ fmajor(1, 2, 1, icol, ilay, iflav) * pfracin(jtemp(icol, ilay), jeta(1, icol, ilay, iflav) , jpress(icol, ilay) + itropo, igpt)
+ fmajor(2, 2, 1, icol, ilay, iflav) * pfracin(jtemp(icol, ilay), jeta(1, icol, ilay, iflav)+1, jpress(icol, ilay) + itropo, igpt) )

+ ( fmajor(1, 1, 2, icol, ilay, iflav) * pfracin(jtemp(icol, ilay)+1, jeta(2, icol, ilay, iflav) , jpress(icol, ilay)-1 + itropo, igpt)
+ fmajor(2, 1, 2, icol, ilay, iflav) * pfracin(jtemp(icol, ilay)+1, jeta(2, icol, ilay, iflav)+1, jpress(icol, ilay)-1 + itropo, igpt)
+ fmajor(1, 2, 2, icol, ilay, iflav) * pfracin(jtemp(icol, ilay)+1, jeta(2, icol, ilay, iflav) , jpress(icol, ilay) + itropo, igpt)
+ fmajor(2, 2, 2, icol, ilay, iflav) * pfracin(jtemp(icol, ilay)+1, jeta(2, icol, ilay, iflav)+1, jpress(icol, ilay) + itropo, igpt) );

Float planck_function_1 = interpolate1D(tlay(icol, ilay), temp_ref_min, totplnk_delta, nPlanckTemp, &totplnk(1, ibnd));
lay_src(icol, ilay, igpt) = pfrac * planck_function_1;

planck_function_1 = interpolate1D(tlev(icol, ilay), temp_ref_min, totplnk_delta, nPlanckTemp, &totplnk(1, ibnd));
if (ilay == 1)
{
lev_src(icol, ilay, igpt) = pfrac * planck_function_1;
}
else
{
const int itropo = (tropo(icol, ilay-1) == Bool(true)) ? 1 : 2;
const int iflav = gpoint_flavor(itropo, igpt);

const Float pfrac_m1 =
( fmajor(1, 1, 1, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1), jeta(1, icol, ilay-1, iflav) , jpress(icol, ilay-1)-1 + itropo, igpt)
+ fmajor(2, 1, 1, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1), jeta(1, icol, ilay-1, iflav)+1, jpress(icol, ilay-1)-1 + itropo, igpt)
+ fmajor(1, 2, 1, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1), jeta(1, icol, ilay-1, iflav) , jpress(icol, ilay-1) + itropo, igpt)
+ fmajor(2, 2, 1, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1), jeta(1, icol, ilay-1, iflav)+1, jpress(icol, ilay-1) + itropo, igpt) )
const Float pfrac_m1 =
( fmajor(1, 1, 1, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1), jeta(1, icol, ilay-1, iflav) , jpress(icol, ilay-1)-1 + itropo, igpt)
+ fmajor(2, 1, 1, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1), jeta(1, icol, ilay-1, iflav)+1, jpress(icol, ilay-1)-1 + itropo, igpt)
+ fmajor(1, 2, 1, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1), jeta(1, icol, ilay-1, iflav) , jpress(icol, ilay-1) + itropo, igpt)
+ fmajor(2, 2, 1, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1), jeta(1, icol, ilay-1, iflav)+1, jpress(icol, ilay-1) + itropo, igpt) )

+ ( fmajor(1, 1, 2, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1)+1, jeta(2, icol, ilay-1, iflav) , jpress(icol, ilay-1)-1 + itropo, igpt)
+ fmajor(2, 1, 2, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1)+1, jeta(2, icol, ilay-1, iflav)+1, jpress(icol, ilay-1)-1 + itropo, igpt)
+ fmajor(1, 2, 2, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1)+1, jeta(2, icol, ilay-1, iflav) , jpress(icol, ilay-1) + itropo, igpt)
+ fmajor(2, 2, 2, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1)+1, jeta(2, icol, ilay-1, iflav)+1, jpress(icol, ilay-1) + itropo, igpt) );
+ ( fmajor(1, 1, 2, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1)+1, jeta(2, icol, ilay-1, iflav) , jpress(icol, ilay-1)-1 + itropo, igpt)
+ fmajor(2, 1, 2, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1)+1, jeta(2, icol, ilay-1, iflav)+1, jpress(icol, ilay-1)-1 + itropo, igpt)
+ fmajor(1, 2, 2, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1)+1, jeta(2, icol, ilay-1, iflav) , jpress(icol, ilay-1) + itropo, igpt)
+ fmajor(2, 2, 2, icol, ilay-1, iflav) * pfracin(jtemp(icol, ilay-1)+1, jeta(2, icol, ilay-1, iflav)+1, jpress(icol, ilay-1) + itropo, igpt) );

lev_src(icol, ilay, igpt) = sqrt(pfrac * pfrac_m1) * planck_function_1;
}
lev_src(icol, ilay, igpt) = sqrt(pfrac * pfrac_m1) * planck_function_1;
}

if (ilay == nlay)
{
planck_function_1 = interpolate1D(tlev(icol, ilay+1), temp_ref_min, totplnk_delta, nPlanckTemp, &totplnk(1, ibnd));
lev_src(icol, ilay+1, igpt) = pfrac * planck_function_1;
}
if (ilay == nlay)
{
planck_function_1 = interpolate1D(tlev(icol, ilay+1), temp_ref_min, totplnk_delta, nPlanckTemp, &totplnk(1, ibnd));
lev_src(icol, ilay+1, igpt) = pfrac * planck_function_1;
}

if (ilay == sfc_lay)
{
planck_function_1 = interpolate1D(tsfc(icol) , temp_ref_min, totplnk_delta, nPlanckTemp, &totplnk(1, ibnd));
const Float planck_function_2 = interpolate1D(tsfc(icol) + delta_Tsurf, temp_ref_min, totplnk_delta, nPlanckTemp, &totplnk(1, ibnd));
sfc_src (icol, igpt) = pfrac * planck_function_1;
sfc_src_jac(icol, igpt) = pfrac * (planck_function_2 - planck_function_1);
}
if (ilay == sfc_lay)
{
planck_function_1 = interpolate1D(tsfc(icol) , temp_ref_min, totplnk_delta, nPlanckTemp, &totplnk(1, ibnd));
const Float planck_function_2 = interpolate1D(tsfc(icol) + delta_Tsurf, temp_ref_min, totplnk_delta, nPlanckTemp, &totplnk(1, ibnd));
sfc_src (icol, igpt) = pfrac * planck_function_1;
sfc_src_jac(icol, igpt) = pfrac * (planck_function_2 - planck_function_1);
}
}
}
Expand Down
18 changes: 6 additions & 12 deletions src_kernels_cuda/gas_optics_rrtmgp_kernels_launchers.cu
Original file line number Diff line number Diff line change
Expand Up @@ -473,23 +473,17 @@ namespace Gas_optics_rrtmgp_kernels_cuda

const Float delta_Tsurf = Float(1.);

const int block_col = 32;
const int block_lay = 4;

const int grid_col = ncol/block_col + (ncol%block_col > 0);
const int grid_lay = nlay/block_lay + (nlay%block_lay > 0);

dim3 grid_gpu(grid_col, grid_lay);
dim3 block_gpu(block_col, block_lay);
dim3 grid_gpu;
dim3 block_gpu;

if (tunings.count("Planck_source_kernel") == 0)
{
std::tie(grid_gpu, block_gpu) = tune_kernel(
"Planck_source_kernel",
dim3(ncol, nlay),
{1, 2, 4, 8, 16, 32, 48, 64, 96, 128, 256, 512},
{1, 2, 4, 8, 16, 32, 48, 64, 96, 128, 256, 512},
dim3(ncol, nlay, ngpt),
{4, 8, 16, 32, 48, 64, 96, 128},
{1},
{4, 8, 16, 32, 48, 64, 96, 128},
Planck_source_kernel,
ncol, nlay, nbnd, ngpt,
nflav, neta, npres, ntemp, nPlanckTemp,
Expand All @@ -510,7 +504,7 @@ namespace Gas_optics_rrtmgp_kernels_cuda
block_gpu = tunings["Planck_source_kernel"].second;
}

grid_gpu = calc_grid_size(block_gpu, dim3(ncol, nlay));
grid_gpu = calc_grid_size(block_gpu, dim3(ncol, nlay, ngpt));

Planck_source_kernel<<<grid_gpu, block_gpu>>>(
ncol, nlay, nbnd, ngpt,
Expand Down

0 comments on commit 8d54bdf

Please sign in to comment.