diff --git a/include_rt_kernels/rte_solver_kernels_cuda_rt.h b/include_rt_kernels/rte_solver_kernels_cuda_rt.h index c0b0ed8..14793b9 100644 --- a/include_rt_kernels/rte_solver_kernels_cuda_rt.h +++ b/include_rt_kernels/rte_solver_kernels_cuda_rt.h @@ -38,6 +38,8 @@ namespace Rte_solver_kernels_cuda_rt void apply_BC(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, Float* gpt_flux_dn); void apply_BC(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float* inc_flux_dif, Float* gpt_flux_dn); + + void apply_BC(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float inc_flux, Float* gpt_flux_dn); void sw_solver_2stream( const int ncol, const int nlay, const int ngpt, const Bool top_at_1, diff --git a/python/set_virtual_camera.py b/python/set_virtual_camera.py index 79cf166..d0f4086 100644 --- a/python/set_virtual_camera.py +++ b/python/set_virtual_camera.py @@ -91,7 +91,13 @@ cam[var][:] = args[var] if not args['sza'] is None: + try: + ncf.createVariable('sza','f4',ncf['mu0'].dimensions) + except: + pass + ncf['sza'][:] = np.deg2rad(args['sza']) ncf['mu0'][:] = np.cos(np.deg2rad(args['sza'])) + if not args['azi'] is None: ncf['azi'][:] = np.deg2rad(args['azi']) diff --git a/src_cuda_rt/Raytracer.cu b/src_cuda_rt/Raytracer.cu index 2ce5388..a43506c 100644 --- a/src_cuda_rt/Raytracer.cu +++ b/src_cuda_rt/Raytracer.cu @@ -53,18 +53,18 @@ namespace const Float fz = Float(grid_cells.z) / Float(kn_grid.z); const int x0 = grid_x*fx; - const int x1 = floor((grid_x+1)*fx); + const int x1 = min(grid_cells.x-1, int(floor((grid_x+1)*fx))); const int y0 = grid_y*fy; - const int y1 = floor((grid_y+1)*fy); + const int y1 = min(grid_cells.y-1, int(floor((grid_y+1)*fy))); const int z0 = grid_z*fz; - const int z1 = floor((grid_z+1)*fz); + const int z1 = min(grid_cells.z-1, int(floor((grid_z+1)*fz))); const int ijk_grid = grid_x + grid_y*kn_grid.x + grid_z*kn_grid.y*kn_grid.x; Float k_null = k_ext_null_min; - for (int k=z0; k 0); + + dim3 grid_gpu(grid_col); + dim3 block_gpu(block_col); + apply_BC_kernel<<>>(ncol, nlay, ngpt, top_at_1, inc_flux, gpt_flux_dn); + } + void apply_BC(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float* inc_flux_dif, Float* gpt_flux_dn) { const int block_col = 32; diff --git a/src_kernels_cuda_rt/rte_solver_kernels_rt.cu b/src_kernels_cuda_rt/rte_solver_kernels_rt.cu index 03261c7..9ee6a66 100644 --- a/src_kernels_cuda_rt/rte_solver_kernels_rt.cu +++ b/src_kernels_cuda_rt/rte_solver_kernels_rt.cu @@ -305,6 +305,18 @@ void apply_BC_kernel_lw(const int isfc, int ncol, const int nlay, const int ngpt } } +__global__ +void apply_BC_kernel(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float inc_flux, Float* __restrict__ flux_dn) +{ + const int icol = blockIdx.x*blockDim.x + threadIdx.x; + if ( (icol < ncol) ) + { + const int idx_out = icol + ((top_at_1 ? 0 : (nlay * ncol))); + const int idx_in = icol; + flux_dn[idx_out] = inc_flux; + } +} + __global__ void apply_BC_kernel(const int ncol, const int nlay, const int ngpt, const Bool top_at_1, const Float* __restrict__ inc_flux, Float* __restrict__ flux_dn) { diff --git a/src_test/test_rt_lite.cu b/src_test/test_rt_lite.cu index aa49a86..617425c 100644 --- a/src_test/test_rt_lite.cu +++ b/src_test/test_rt_lite.cu @@ -30,6 +30,8 @@ #include "Raytracer_bw.h" #include "raytracer_kernels_bw.h" #include "types.h" +#include "rte_solver_kernels_cuda_rt.h" +#include "Rte_sw_rt.h" #include "tools_gpu.h" @@ -129,20 +131,22 @@ void solve_radiation(int argc, char** argv) // Parse the command line options. std::map> command_line_switches { {"raytracing" , { true, "Use forward raytracer for irradiances. '--raytracing 256': use 256 rays per pixel" }}, - {"bw_raytracing" , { true, "Use backward raytracer radiances. '--raytracing 256': use 256 rays per pixel" }}, + {"bw-raytracing" , { true, "Use backward raytracer radiances. '--raytracing 256': use 256 rays per pixel" }}, + {"two-stream" , { true, "Perform two-stream computations"}}, {"cloud-mie" , { false, "Use Mie tables for cloud scattering in ray tracer" }}, {"independent-column", { false, "run raytracer in independent column mode"}}, {"profiling" , { false, "Perform additional profiling run." }} }; std::map> command_line_ints { {"raytracing", {32, "Number of rays initialised at TOD per pixel."}}, - {"bw_raytracing", {32, "Number of rays initialised at per camera pixel."}}} ; + {"bw-raytracing", {32, "Number of rays initialised at per camera pixel."}}} ; if (parse_command_line_options(command_line_switches, command_line_ints, argc, argv)) return; const bool switch_raytracing = command_line_switches.at("raytracing" ).first; - const bool switch_bw_raytracing = command_line_switches.at("bw_raytracing" ).first; + const bool switch_bw_raytracing = command_line_switches.at("bw-raytracing" ).first; + const bool switch_two_stream = command_line_switches.at("two-stream" ).first; const bool switch_cloud_mie = command_line_switches.at("cloud-mie" ).first; const bool switch_independent_column = command_line_switches.at("independent-column").first; const bool switch_profiling = command_line_switches.at("profiling" ).first; @@ -207,6 +211,7 @@ void solve_radiation(int argc, char** argv) // Read the atmospheric fields. const Array tot_tau(input_nc.get_variable("tot_tau", {n_lay, ny, nx}), {ncol, n_lay}); const Array tot_ssa(input_nc.get_variable("tot_ssa", {n_lay, ny, nx}), {ncol, n_lay}); + const Array tot_asy(input_nc.get_variable("tot_asy", {n_lay, ny, nx}), {ncol, n_lay}); Array cld_tau({ncol, n_lay}); Array cld_ssa({ncol, n_lay}); @@ -225,13 +230,16 @@ void solve_radiation(int argc, char** argv) aer_asy = std::move(input_nc.get_variable("aer_asy", {n_lay, ny, nx})); // read albedo, solar angles, and top-of-domain fluxes - Array sfc_albedo({1,ncol}); - sfc_albedo.fill(input_nc.get_variable("albedo")); + Array sfc_albedo({ncol, 1}); + sfc_albedo = std::move(input_nc.get_variable("albedo", {ny, nx})); + const Float zenith_angle = input_nc.get_variable("sza"); const Float azimuth_angle = input_nc.get_variable("azi"); const Float tod_dir = input_nc.get_variable("tod_direct"); - + Array mu0({ncol}); + mu0.fill(cos(zenith_angle)); + Camera camera; if (switch_bw_raytracing) { @@ -267,6 +275,10 @@ void solve_radiation(int argc, char** argv) Array_gpu flux_abs_dif({nx, ny, nz}); Array_gpu radiance({camera.nx, camera.ny}); + Array_gpu flux_dn_2stream; + Array_gpu flux_up_2stream; + Array_gpu flux_dn_dir_2stream; + // empty arrays (mie scattering not (yet) supported in lite version) Array mie_cdfs_c; Array mie_angs_c; @@ -317,17 +329,30 @@ void solve_radiation(int argc, char** argv) lum_c.fill(Float(0.)); Array_gpu land_use_map(lum_c); - + //// GPU arrays + Array_gpu tot_tau_g(tot_tau); + Array_gpu tot_ssa_g(tot_ssa); + Array_gpu tot_asy_g(tot_asy); + Array_gpu cld_tau_g(cld_tau); + Array_gpu cld_ssa_g(cld_ssa); + Array_gpu cld_asy_g(cld_asy); + Array_gpu aer_tau_g(aer_tau); + Array_gpu aer_ssa_g(aer_ssa); + Array_gpu aer_asy_g(aer_asy); + Array_gpu sfc_albedo_g(sfc_albedo); + Array_gpu mu0_g(mu0); + ////// CREATE THE OUTPUT FILE ////// // Create the general dimensions and arrays. Status::print_message("Preparing NetCDF output file."); Netcdf_file output_nc("rt_lite_output.nc", Netcdf_mode::Create); - if (switch_raytracing) + if (switch_raytracing || switch_two_stream) { output_nc.add_dimension("x", nx); output_nc.add_dimension("y", ny); - output_nc.add_dimension("z", n_z_in); + output_nc.add_dimension("z", nz); + output_nc.add_dimension("lev", n_lay+1); } if (switch_bw_raytracing) { @@ -335,16 +360,38 @@ void solve_radiation(int argc, char** argv) output_nc.add_dimension("ny", camera.ny); } - //// GPU arrays - Array_gpu tot_tau_g(tot_tau); - Array_gpu tot_ssa_g(tot_ssa); - Array_gpu cld_tau_g(cld_tau); - Array_gpu cld_ssa_g(cld_ssa); - Array_gpu cld_asy_g(cld_asy); - Array_gpu aer_tau_g(aer_tau); - Array_gpu aer_ssa_g(aer_ssa); - Array_gpu aer_asy_g(aer_asy); - Array_gpu sfc_albedo_g(sfc_albedo); + + if (switch_two_stream) + { + flux_up_2stream.set_dims({ncol, n_lay+1}); + flux_dn_2stream.set_dims({ncol, n_lay+1}); + flux_dn_dir_2stream.set_dims({ncol, n_lay+1}); + + Rte_sw_rt rte_sw; + Rte_solver_kernels_cuda_rt::apply_BC(ncol, n_lay, 1, 0, tod_dir * cos(zenith_angle), flux_dn_dir_2stream.ptr()); + Rte_solver_kernels_cuda_rt::apply_BC(ncol, n_lay, 1, 0, flux_dn_2stream.ptr()); + + Rte_solver_kernels_cuda_rt::sw_solver_2stream( + ncol, n_lay, 1, 0, + tot_tau_g.ptr(), + tot_ssa_g.ptr(), + tot_asy_g.ptr(), + mu0_g.ptr(), + sfc_albedo_g.ptr(), sfc_albedo_g.ptr(), + flux_up_2stream.ptr(), flux_dn_2stream.ptr(), flux_dn_dir_2stream.ptr()); + + Array flux_up_2stream_c(flux_up_2stream); + Array flux_dn_2stream_c(flux_dn_2stream); + Array flux_dn_dir_2stream_c(flux_dn_dir_2stream); + + auto nc_up_2stream = output_nc.add_variable("flux_up_2stream" , {"lev", "y", "x"}); + auto nc_dn_2stream = output_nc.add_variable("flux_dn_2stream" , {"lev", "y", "x"}); + auto nc_dn_dir_2stream = output_nc.add_variable("flux_dn_dir_2stream" , {"lev", "y", "x"}); + + nc_up_2stream.insert(flux_up_2stream_c .v(), {0, 0, 0}); + nc_dn_2stream.insert(flux_dn_2stream_c .v(), {0, 0, 0}); + nc_dn_dir_2stream.insert(flux_dn_dir_2stream_c .v(), {0, 0, 0}); + } if (switch_raytracing) {