Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[auto tuning] [rocRAND] Fixed auto tuning kernel launch error and updated gfx942 auto tuning parameters #595

Merged
merged 5 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
Documentation for rocRAND is available at
[https://rocm.docs.amd.com/projects/rocRAND/en/latest/](https://rocm.docs.amd.com/projects/rocRAND/en/latest/)

## (unreleased) rocRAND 3.3.0 for ROCm 6.5

### Changed
* Updated several `gfx942` auto tuning parameters.

### Resolved issues
* Fixed an issue where `mt19937.hpp` would cause kernel errors during auto tuning.

## rocRAND 3.3.0 for ROCm 6.4

### Added
Expand Down
2 changes: 1 addition & 1 deletion library/src/rng/config/lfsr113_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_LFSR113, T>
case target_arch::gfx1101: return 128;
case target_arch::gfx1100: return 64;
case target_arch::gfx1030: return 64;
case target_arch::gfx942: return 512;
case target_arch::gfx942: return 256;
case target_arch::gfx90a: return 64;
case target_arch::gfx908: return 256;
case target_arch::gfx906: return 256;
Expand Down
2 changes: 1 addition & 1 deletion library/src/rng/config/mrg32k3a_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MRG32K3A, T>
case target_arch::gfx1102: return 128;
case target_arch::gfx1101: return 128;
case target_arch::gfx1100: return 128;
case target_arch::gfx942: return 256;
case target_arch::gfx942: return 1024;
case target_arch::gfx90a: return 256;
case target_arch::gfx1030: return 256;
case target_arch::gfx908: return 1024;
Expand Down
6 changes: 3 additions & 3 deletions library/src/rng/config/mt19937_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
* This file is automatically generated by `/scripts/config-tuning/select_best_config.py`.
*/

namespace rocrand_host::detail
namespace rocrand_impl::host
{

template<class T>
Expand All @@ -41,7 +41,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MT19937, T>
case target_arch::gfx1102: return 128;
case target_arch::gfx1101: return 128;
case target_arch::gfx1100: return 64;
case target_arch::gfx942: return 128;
case target_arch::gfx942: return 256;
case target_arch::gfx90a: return 1024;
case target_arch::gfx908: return 512;
default:
Expand All @@ -67,4 +67,4 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MT19937, T>

} // end namespace rocrand_host::detail

#endif // ROCRAND_RNG_CONFIG_MT19937_HPP_
#endif // ROCRAND_RNG_CONFIG_MT19937_HPP_
2 changes: 1 addition & 1 deletion library/src/rng/config/philox4_32_10_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_PHILOX4_32_10, T>
case target_arch::gfx1101: return 1024;
case target_arch::gfx1100: return 512;
case target_arch::gfx1030: return 1024;
case target_arch::gfx942: return 1024;
case target_arch::gfx942: return 512;
case target_arch::gfx90a: return 512;
case target_arch::gfx908: return 512;
case target_arch::gfx906: return 64;
Expand Down
2 changes: 1 addition & 1 deletion library/src/rng/config/threefry2_32_20_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY2_32_20, T>
case target_arch::gfx1101: return 256;
case target_arch::gfx1100: return 1024;
case target_arch::gfx1030: return 256;
case target_arch::gfx942: return 256;
case target_arch::gfx942: return 512;
case target_arch::gfx90a: return 512;
case target_arch::gfx908: return 512;
case target_arch::gfx906: return 256;
Expand Down
2 changes: 1 addition & 1 deletion library/src/rng/config/threefry4_32_20_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY4_32_20, T>
case target_arch::gfx1101: return 512;
case target_arch::gfx1100: return 1024;
case target_arch::gfx1030: return 1024;
case target_arch::gfx942: return 1024;
case target_arch::gfx942: return 512;
case target_arch::gfx90a: return 256;
case target_arch::gfx908: return 256;
case target_arch::gfx906: return 256;
Expand Down
88 changes: 47 additions & 41 deletions library/src/rng/mt19937.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@

#ifndef ROCRAND_RNG_MT19937_H_
#define ROCRAND_RNG_MT19937_H_
#include "config/mt19937_config.hpp"

#include "common.hpp"
#include "config_types.hpp"
Expand Down Expand Up @@ -874,18 +875,19 @@ class mt19937_generator_template : public generator_impl_base
system_type::free(d_mt19937_jump);

// This kernel is not actually tuned for ordering, but config is needed for device-side compile time check of the generator count
dynamic_dispatch(m_order,
[&, this](auto is_dynamic)
{
status = system_type::template launch<
init_engines_mt19937<ConfigProvider, is_dynamic>>(
dim3(config.blocks),
dim3(config.threads),
0,
m_stream,
m_engines,
d_engines);
});
dynamic_dispatch(
m_order,
[&, this](auto is_dynamic)
{
status
= system_type::template launch<init_engines_mt19937<ConfigProvider, is_dynamic>,
ConfigProvider>(dim3(config.blocks),
dim3(config.threads),
0,
m_stream,
m_engines,
d_engines);
});
if(status != ROCRAND_STATUS_SUCCESS)
{
system_type::free(d_engines);
Expand Down Expand Up @@ -983,20 +985,22 @@ class mt19937_generator_template : public generator_impl_base
is_dynamic,
T,
vec_type,
Distribution>>(
dim3(config.blocks),
dim3(config.threads),
0,
m_stream,
m_engines,
m_start_input,
data,
size,
vec_data,
vec_size,
head_size,
tail_size,
distribution);
Distribution>,
ConfigProvider,
T,
is_dynamic>(dim3(config.blocks),
dim3(config.threads),
0,
m_stream,
m_engines,
m_start_input,
data,
size,
vec_data,
vec_size,
head_size,
tail_size,
distribution);
});
if(status != ROCRAND_STATUS_SUCCESS)
{
Expand All @@ -1014,20 +1018,22 @@ class mt19937_generator_template : public generator_impl_base
is_dynamic,
T,
vec_type,
Distribution>>(
dim3(config.blocks),
dim3(config.threads),
0,
m_stream,
m_engines,
m_start_input,
data,
size,
vec_data,
vec_size,
head_size,
tail_size,
distribution);
Distribution>,
ConfigProvider,
T,
is_dynamic>(dim3(config.blocks),
dim3(config.threads),
0,
m_stream,
m_engines,
m_start_input,
data,
size,
vec_data,
vec_size,
head_size,
tail_size,
distribution);
});
if(status != ROCRAND_STATUS_SUCCESS)
{
Expand Down Expand Up @@ -1122,4 +1128,4 @@ using mt19937_generator_host

} // namespace rocrand_impl::host

#endif // ROCRAND_RNG_MT19937_H_
#endif // ROCRAND_RNG_MT19937_H_
Loading