Skip to content

Commit

Permalink
Add launch bounds to PLMC and PPMC_VL kernels
Browse files Browse the repository at this point in the history
This improved performance on AMD systems quite a lot due to being able
to allocate additional registers
  • Loading branch information
bcaddy committed Jul 14, 2023
1 parent fb804d8 commit c6cb442
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 8 deletions.
4 changes: 2 additions & 2 deletions src/reconstruction/plmc_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
gamma, int dir)
* \brief When passed a stencil of conserved variables, returns the left and
right boundary values for the interface calculated using plm. */
__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx,
Real dt, Real gamma, int dir, int n_fields)
__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx,
int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields)
{
// get a thread ID
int const thread_id = threadIdx.x + blockIdx.x * blockDim.x;
Expand Down
4 changes: 2 additions & 2 deletions src/reconstruction/plmc_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
gamma, int dir)
* \brief When passed a stencil of conserved variables, returns the left and
right boundary values for the interface calculated using plm. */
__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx,
Real dt, Real gamma, int dir, int n_fields);
__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx,
int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields);

#endif // PLMC_CUDA_H
4 changes: 2 additions & 2 deletions src/reconstruction/ppmc_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -539,8 +539,8 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun
// =====================================================================================================================

// =====================================================================================================================
__global__ void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real gamma,
int dir)
__global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx,
int ny, int nz, Real gamma, int dir)
{
// get a thread ID
int const thread_id = threadIdx.x + blockIdx.x * blockDim.x;
Expand Down
4 changes: 2 additions & 2 deletions src/reconstruction/ppmc_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun
* \param[in] gamma The adiabatic index
* \param[in] dir The direction to reconstruct. 0=X, 1=Y, 2=Z
*/
__global__ void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real gamma,
int dir);
__global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx,
int ny, int nz, Real gamma, int dir);

#endif // PPMC_CUDA_H

0 comments on commit c6cb442

Please sign in to comment.