Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reformated device_api and host_api benchmark to include engine, distribution, mode, throughput gigabytes per second, lambda columns #536

Merged
merged 10 commits into from
Aug 21, 2024
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ Documentation for rocRAND is available at

* Added host generator for MT19937
* Support for `rocrand_generate_poisson` in hipGraphs
* Added engine, distribution, mode, throughput_gigabytes_per_second, and lambda columns for csv format in
benchmark_rocrand_host_api and benchmark_rocrand_device_api. To see these new columns set --benchmark_format=csv
or --benchmark_out_format=csv --benchmark_out="outName.csv"

### Changes

Expand Down
104 changes: 77 additions & 27 deletions benchmark/benchmark_rocrand_device_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
#include <rocrand/rocrand_kernel.h>
#include <rocrand/rocrand_mtgp32_11213.h>

#include "custom_csv_formater.hpp"
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <numeric>
Expand All @@ -40,8 +42,10 @@
#endif

template<typename EngineState>
__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void init_kernel(
EngineState* states, const unsigned long long seed, const unsigned long long offset)
__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE)
void init_kernel(EngineState* states,
const unsigned long long seed,
const unsigned long long offset)
{
const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x;
EngineState state;
Expand All @@ -50,8 +54,9 @@ __global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void init_kernel(
}

template<typename EngineState, typename T, typename Generator>
__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void generate_kernel(
EngineState* states, T* data, const size_t size, Generator generator)
__global__
__launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE)
void generate_kernel(EngineState* states, T* data, const size_t size, Generator generator)
{
const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int stride = gridDim.x * blockDim.x;
Expand Down Expand Up @@ -119,12 +124,13 @@ struct runner
};

template<typename T, typename Generator>
__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void generate_kernel(
rocrand_state_mtgp32* states, T* data, const size_t size, Generator generator)
__global__
__launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE)
void generate_kernel(rocrand_state_mtgp32* states, T* data, const size_t size, Generator generator)
{
const unsigned int state_id = blockIdx.x;
unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int stride = gridDim.x * blockDim.x;
unsigned int stride = gridDim.x * blockDim.x;

__shared__ rocrand_state_mtgp32 state;
rocrand_mtgp32_block_copy(&states[state_id], &state);
Expand Down Expand Up @@ -191,8 +197,8 @@ struct runner<rocrand_state_mtgp32>
}
};

__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void init_kernel(
rocrand_state_lfsr113* states, const uint4 seed)
__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE)
void init_kernel(rocrand_state_lfsr113* states, const uint4 seed)
{
const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x;
rocrand_state_lfsr113 state;
Expand Down Expand Up @@ -255,8 +261,9 @@ struct runner<rocrand_state_lfsr113>
};

template<typename EngineState, typename SobolType>
__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void init_sobol_kernel(
EngineState* states, SobolType* directions, SobolType offset)
__global__
__launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE)
void init_sobol_kernel(EngineState* states, SobolType* directions, SobolType offset)
{
const unsigned int dimension = blockIdx.y;
const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x;
Expand All @@ -266,8 +273,12 @@ __global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void init_sobol_ker
}

template<typename EngineState, typename SobolType>
__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void init_scrambled_sobol_kernel(
EngineState* states, SobolType* directions, SobolType* scramble_constants, SobolType offset)
__global__
__launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE)
void init_scrambled_sobol_kernel(EngineState* states,
SobolType* directions,
SobolType* scramble_constants,
SobolType offset)
{
const unsigned int dimension = blockIdx.y;
const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x;
Expand All @@ -281,8 +292,9 @@ __global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void init_scrambled

// generate_kernel for the normal and scrambled sobol generators
template<typename EngineState, typename T, typename Generator>
__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void generate_sobol_kernel(
EngineState* states, T* data, const size_t size, Generator generator)
__global__
__launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE)
void generate_sobol_kernel(EngineState* states, T* data, const size_t size, Generator generator)
{
const unsigned int dimension = blockIdx.y;
const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x;
Expand Down Expand Up @@ -614,7 +626,9 @@ struct generator_uint : public generator_type
return "uniform-uint";
}

__device__ data_type operator()(Engine* state) const
__device__
data_type
operator()(Engine* state) const
{
return rocrand(state);
}
Expand All @@ -630,7 +644,9 @@ struct generator_ullong : public generator_type
return "uniform-ullong";
}

__device__ data_type operator()(Engine* state) const
__device__
data_type
operator()(Engine* state) const
{
return rocrand(state);
}
Expand All @@ -646,7 +662,9 @@ struct generator_uniform : public generator_type
return "uniform-float";
}

__device__ data_type operator()(Engine* state) const
__device__
data_type
operator()(Engine* state) const
{
return rocrand_uniform(state);
}
Expand All @@ -662,7 +680,9 @@ struct generator_uniform_double : public generator_type
return "uniform-double";
}

__device__ data_type operator()(Engine* state) const
__device__
data_type
operator()(Engine* state) const
{
return rocrand_uniform_double(state);
}
Expand All @@ -678,7 +698,9 @@ struct generator_normal : public generator_type
return "normal-float";
}

__device__ data_type operator()(Engine* state) const
__device__
data_type
operator()(Engine* state) const
{
return rocrand_normal(state);
}
Expand All @@ -694,7 +716,9 @@ struct generator_normal_double : public generator_type
return "normal-double";
}

__device__ data_type operator()(Engine* state) const
__device__
data_type
operator()(Engine* state) const
{
return rocrand_normal_double(state);
}
Expand All @@ -710,7 +734,9 @@ struct generator_log_normal : public generator_type
return "log-normal-float";
}

__device__ data_type operator()(Engine* state) const
__device__
data_type
operator()(Engine* state) const
{
return rocrand_log_normal(state, 0.f, 1.f);
}
Expand All @@ -726,7 +752,9 @@ struct generator_log_normal_double : public generator_type
return "log-normal-double";
}

__device__ data_type operator()(Engine* state) const
__device__
data_type
operator()(Engine* state) const
{
return rocrand_log_normal_double(state, 0., 1.);
}
Expand All @@ -744,7 +772,9 @@ struct generator_poisson : public generator_type
return "poisson(lambda=" + stream.str() + ")";
}

__device__ data_type operator()(Engine* state)
__device__
data_type
operator()(Engine* state)
{
return rocrand_poisson(state, lambda);
}
Expand Down Expand Up @@ -774,7 +804,9 @@ struct generator_discrete_poisson : public generator_type
ROCRAND_CHECK(rocrand_destroy_discrete_distribution(discrete_distribution));
}

__device__ data_type operator()(Engine* state)
__device__
data_type
operator()(Engine* state)
{
return rocrand_discrete(state, discrete_distribution);
}
Expand Down Expand Up @@ -814,7 +846,9 @@ struct generator_discrete_custom : public generator_type
ROCRAND_CHECK(rocrand_destroy_discrete_distribution(discrete_distribution));
}

__device__ data_type operator()(Engine* state)
__device__
data_type
operator()(Engine* state)
{
return rocrand_discrete(state, discrete_distribution);
}
Expand Down Expand Up @@ -961,6 +995,14 @@ void add_benchmarks(const benchmark_context& ctx,

int main(int argc, char* argv[])
{
// get paramaters before they are passed into
// benchmark::Initialize()
std::string outFormat = "";
std::string filter = "";
std::string consoleFormat = "";

getFormats(argc, argv, outFormat, filter, consoleFormat);

benchmark::Initialize(&argc, argv);

cli::Parser parser(argc, argv);
Expand Down Expand Up @@ -1045,8 +1087,16 @@ int main(int argc, char* argv[])
b->Unit(benchmark::kMillisecond);
}

benchmark::BenchmarkReporter* console_reporter = getConsoleReporter(consoleFormat);
benchmark::BenchmarkReporter* out_file_reporter = getOutFileReporter(outFormat);

std::string spec = (filter == "" || filter == "all") ? "." : filter;

// Run benchmarks
benchmark::RunSpecifiedBenchmarks();
if(outFormat == "") // default case
benchmark::RunSpecifiedBenchmarks(console_reporter, spec);
else
benchmark::RunSpecifiedBenchmarks(console_reporter, out_file_reporter, spec);
HIP_CHECK(hipStreamDestroy(stream));

return 0;
Expand Down
Loading
Loading