Skip to content

Commit

Permalink
Add CUDA and NVRTC testing
Browse files Browse the repository at this point in the history
  • Loading branch information
mborland committed Aug 16, 2024
1 parent b356bd3 commit 7844bd0
Show file tree
Hide file tree
Showing 6 changed files with 625 additions and 3 deletions.
31 changes: 28 additions & 3 deletions test/cuda_jamfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ run test_arcsine_pdf_float.cu ;
run test_arcsine_quan_double.cu ;
run test_arcsine_quan_float.cu ;
run test_arcsine_range_support_double.cu ;

run test_bernoulli_cdf_double.cu ;
run test_bernoulli_cdf_float.cu ;
run test_bernoulli_pdf_double.cu ;
run test_bernoulli_pdf_float.cu ;
run test_bernoulli_range_support_double.cu ;
run test_bernoulli_range_support_float.cu ;

run test_cauchy_cdf_double.cu ;
run test_cauchy_cdf_float.cu ;
run test_cauchy_pdf_double.cu ;
Expand All @@ -32,6 +34,7 @@ run test_cauchy_quan_double.cu ;
run test_cauchy_quan_float.cu ;
run test_cauchy_range_support_double.cu ;
run test_cauchy_range_support_float.cu ;

run test_exponential_cdf_double.cu ;
run test_exponential_cdf_float.cu ;
run test_exponential_pdf_double.cu ;
Expand All @@ -40,40 +43,47 @@ run test_exponential_quan_double.cu ;
run test_exponential_quan_float.cu ;
run test_exponential_range_support_double.cu ;
run test_exponential_range_support_float.cu ;

run test_extreme_value_cdf_double.cu ;
run test_extreme_value_cdf_float.cu ;
run test_extreme_value_pdf_double.cu ;
run test_extreme_value_pdf_float.cu ;
run test_extreme_value_quan_double.cu ;
run test_extreme_value_quan_float.cu ;

run test_holtsmark_cdf_double.cu ;
run test_holtsmark_cdf_float.cu ;
run test_holtsmark_pdf_double.cu ;
run test_holtsmark_pdf_float.cu ;

run test_landau_cdf_double.cu ;
run test_landau_cdf_float.cu ;
run test_landau_pdf_double.cu ;
run test_landau_pdf_float.cu ;
run test_landau_quan_double.cu;
run test_landau_quan_float.cu ;

run test_laplace_cdf_double.cu ;
run test_laplace_cdf_float.cu ;
run test_laplace_pdf_double.cu ;
run test_laplace_pdf_float.cu ;
run test_laplace_quan_double.cu ;
run test_laplace_quan_float.cu ;

run test_logistic_cdf_double.cu ;
run test_logistic_cdf_float.cu ;
run test_logistic_pdf_double.cu ;
run test_logistic_pdf_float.cu ;
run test_logistic_quan_double.cu ;
run test_logistic_quan_float.cu ;

run test_mapairy_cdf_double.cu ;
run test_mapairy_cdf_float.cu ;
run test_mapairy_pdf_double.cu ;
run test_mapairy_pdf_float.cu ;
run test_mapairy_quan_double.cu ;
run test_mapairy_quan_float.cu ;

run test_saspoint5_cdf_double.cu ;
run test_saspoint5_cdf_float.cu ;
run test_saspoint5_pdf_double.cu ;
Expand All @@ -82,17 +92,24 @@ run test_saspoint5_quan_double.cu ;
run test_saspoint5_quan_float.cu ;

# Special Functions
# run test_beta_simple.cpp ;
run test_beta_double.cu ;
run test_beta_float.cu ;

run test_bessel_i0_double.cu ;
run test_bessel_i0_float.cu ;

run test_cbrt_double.cu ;
run test_cbrt_float.cu ;

run test_changesign_double.cu ;
run test_changesign_float.cu ;

run test_cos_pi_double.cu ;
run test_cos_pi_float.cu ;

run test_digamma_double.cu ;
run test_digamma_float.cu ;

run test_erf_double.cu ;
run test_erf_float.cu ;
run test_erf_inv_double.cu ;
Expand All @@ -101,21 +118,29 @@ run test_erfc_double.cu ;
run test_erfc_float.cu ;
run test_erfc_inv_double.cu ;
run test_erfc_inv_float.cu ;

run test_expm1_double.cu ;
run test_expm1_float.cu ;

run test_lgamma_double.cu ;
run test_lgamma_float.cu ;
run test_tgamma_double.cu ;
run test_tgamma_float.cu ;

run test_log1p_double.cu ;
run test_log1p_float.cu ;

run test_modf_double.cu ;
run test_modf_float.cu ;

run test_round_double.cu ;
run test_round_float.cu ;

run test_sin_pi_double.cu ;
run test_sin_pi_float.cu ;
run test_tgamma_double.cu ;
run test_tgamma_float.cu ;

run test_trigamma_double.cu ;
run test_trigamma_float.cu ;

run test_trunc_double.cu ;
run test_trunc_float.cu ;
17 changes: 17 additions & 0 deletions test/nvrtc_jamfile
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,19 @@ run test_saspoint5_quan_nvrtc_float.cpp ;
# Special Functions
run test_beta_nvrtc_double.cpp ;
run test_beta_nvrtc_float.cpp ;

run test_bessel_i0_nvrtc_double.cpp ;
run test_bessel_i0_nvrtc_float.cpp ;

run test_cbrt_nvrtc_double.cpp ;
run test_cbrt_nvrtc_float.cpp ;

run test_cos_pi_nvrtc_double.cpp ;
run test_cos_pi_nvrtc_float.cpp ;

run test_digamma_nvrtc_double.cpp ;
run test_digamma_nvrtc_float.cpp ;

run test_erf_nvrtc_double.cpp ;
run test_erf_nvrtc_float.cpp ;
run test_erfc_nvrtc_double.cpp ;
Expand All @@ -104,22 +111,32 @@ run test_erf_inv_nvrtc_double.cpp ;
run test_erf_inv_nvrtc_float.cpp ;
run test_erfc_inv_nvrtc_double.cpp ;
run test_erfc_inv_nvrtc_float.cpp ;

run test_expm1_nvrtc_double.cpp ;
run test_expm1_nvrtc_float.cpp ;

run test_fpclassify_nvrtc_double.cpp ;
run test_fpclassify_nvrtc_float.cpp ;

run test_gamma_nvrtc_double.cpp ;
run test_gamma_nvrtc_float.cpp ;

run test_log1p_nvrtc_double.cpp ;
run test_log1p_nvrtc_float.cpp ;

run test_modf_nvrtc_double.cpp ;
run test_modf_nvrtc_float.cpp ;

run test_round_nvrtc_double.cpp ;
run test_round_nvrtc_float.cpp ;

run test_sign_nvrtc_double.cpp ;
run test_sign_nvrtc_float.cpp ;

run test_sin_pi_nvrtc_double.cpp ;
run test_sin_pi_nvrtc_float.cpp ;

run test_trigamma_nvrtc_double.cpp ;
run test_trigamma_nvrtc_float.cpp ;

run test_trunc_nvrtc_double.cpp ;
100 changes: 100 additions & 0 deletions test/test_bessel_i0_double.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@

// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <iostream>
#include <iomanip>
#include <vector>
#include <boost/math/special_functions.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>

typedef double float_type;

/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
{
using std::cos;
int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < numElements)
{
out[i] = boost::math::detail::bessel_i0(in[i]);
}
}

/**
* Host main routine
*/
int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;

// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector(numElements);

// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);

// Initialize the input vectors
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = rand()/(float_type)RAND_MAX;
}

// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 1024;
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
cudaDeviceSynchronize();

std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;

err = cudaGetLastError();

if (err != cudaSuccess)
{
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}

// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
for(int i = 0; i < numElements; ++i)
results.push_back(boost::math::detail::bessel_i0(input_vector[i]));
double t = w.elapsed();
// check the results
for(int i = 0; i < numElements; ++i)
{
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
{
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
return EXIT_FAILURE;
}
}

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
100 changes: 100 additions & 0 deletions test/test_bessel_i0_float.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@

// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <iostream>
#include <iomanip>
#include <vector>
#include <boost/math/special_functions.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>

typedef float float_type;

/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
{
using std::cos;
int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < numElements)
{
out[i] = boost::math::detail::bessel_i0(in[i]);
}
}

/**
* Host main routine
*/
int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;

// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector(numElements);

// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);

// Initialize the input vectors
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = rand()/(float_type)RAND_MAX;
}

// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 1024;
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
cudaDeviceSynchronize();

std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;

err = cudaGetLastError();

if (err != cudaSuccess)
{
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}

// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
for(int i = 0; i < numElements; ++i)
results.push_back(boost::math::detail::bessel_i0(input_vector[i]));
double t = w.elapsed();
// check the results
for(int i = 0; i < numElements; ++i)
{
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
{
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
return EXIT_FAILURE;
}
}

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
Loading

0 comments on commit 7844bd0

Please sign in to comment.