Skip to content

Commit

Permalink
Merge pull request #32 from MennoVeerman/main
Browse files Browse the repository at this point in the history
fix #232
  • Loading branch information
Chiil authored Feb 20, 2024
2 parents 129363e + aaf3337 commit 416a670
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 30 deletions.
54 changes: 27 additions & 27 deletions src_cuda_rt/Raytracer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ Raytracer::Raytracer()


void Raytracer::trace_rays(
const int qrng_gpt_offset,
const int igpt,
const Int photons_per_pixel,
const Vector<int> grid_cells,
const Vector<Float> grid_d,
Expand Down Expand Up @@ -279,41 +279,36 @@ void Raytracer::trace_rays(
Int photons_per_thread = pow(Float(2.), std::floor(std::log2(photons_per_thread_tmp)));

dim3 grid,block;

// with very low number of columns and photons_per_pixel, we may have too many threads firing a single photons, actually exceeding photons_per pixel
// In that case, reduce grid and block size, but issue a warning that we do so
// In that case, reduce grid and block size
Int actual_photons_per_pixel = photons_per_thread * rt_kernel_grid * rt_kernel_block / (qrng_grid_x * qrng_grid_y);
if (actual_photons_per_pixel > photons_per_pixel)
{
int rt_kernel_grid_new = rt_kernel_grid;
int rt_kernel_block_new = rt_kernel_block;
Int n_too_many = actual_photons_per_pixel / photons_per_pixel;
int flip = true;
while (n_too_many > 1)
{
if (flip)
rt_kernel_grid_new /= 2;
else
rt_kernel_block_new /= 2;
n_too_many /= 2;
flip = not flip;
grid = {rt_kernel_grid_new};
block = {rt_kernel_block_new};
printf("Using only block size %d and grid size %d due to low column count and low number of requested photons\n",rt_kernel_block_new,rt_kernel_grid_new);
}
}
else

int rt_kernel_grid_size = rt_kernel_grid;
int rt_kernel_block_size = rt_kernel_block;
while ( (actual_photons_per_pixel > photons_per_pixel) )
{
grid = {rt_kernel_grid};
block = {rt_kernel_block};
if (rt_kernel_grid_size > 1)
rt_kernel_grid_size /= 2;
else
rt_kernel_block_size /= 2;

photons_per_thread_tmp = std::max(Float(1), static_cast<Float>(photons_total) / (rt_kernel_grid_size * rt_kernel_block_size));
photons_per_thread = pow(Float(2.), std::floor(std::log2(photons_per_thread_tmp)));
actual_photons_per_pixel = photons_per_thread * rt_kernel_grid_size * rt_kernel_block_size / (qrng_grid_x * qrng_grid_y);
}
// size of mie table, will be zero if HG is used for cloud scattering

grid = {rt_kernel_grid_size};
block = {rt_kernel_block_size};

const int mie_table_size = mie_cdf.size();


const int qrng_gpt_offset = (igpt-1) * rt_kernel_grid_size * rt_kernel_block_size * photons_per_thread;
ray_tracer_kernel<<<grid, block,sizeof(Float)*mie_table_size>>>(
photons_per_thread,
qrng_grid_x,
qrng_grid_y,
Int(qrng_gpt_offset-1),
qrng_gpt_offset,
k_null_grid.ptr(),
tod_dn_count.ptr(),
tod_up_count.ptr(),
Expand Down Expand Up @@ -357,4 +352,9 @@ void Raytracer::trace_rays(
atmos_diffuse_count.ptr(),
flux_abs_dir.ptr(),
flux_abs_dif.ptr());





}
6 changes: 3 additions & 3 deletions src_kernels_cuda_rt/raytracer_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ void ray_tracer_kernel(
const Vector<int> grid_cells,
const Vector<int> kn_grid,
const Vector<Float> sun_direction,
curandDirectionVectors32_t* qrng_vectors, unsigned int* qrng_constants, // const Float* __restrict__ cloud_dims)
curandDirectionVectors32_t* qrng_vectors, unsigned int* qrng_constants,
const Float* __restrict__ mie_cdf,
const Float* __restrict__ mie_ang,
const int mie_table_size)
Expand All @@ -163,8 +163,8 @@ void ray_tracer_kernel(
const int n = blockDim.x * blockIdx.x + threadIdx.x;

Photon photon;
Random_number_generator<Float> rng(n);
Quasi_random_number_generator_2d qrng(qrng_vectors, qrng_constants, n*photons_to_shoot + qrng_gpt_offset*photons_to_shoot*rt_kernel_block*rt_kernel_grid);
Random_number_generator<Float> rng(n+qrng_gpt_offset);
Quasi_random_number_generator_2d qrng(qrng_vectors, qrng_constants, n*photons_to_shoot + qrng_gpt_offset);

const Float s_min = max(grid_size.z, max(grid_size.y, grid_size.x)) * Float_epsilon;

Expand Down

0 comments on commit 416a670

Please sign in to comment.