Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[auto tuning] [rocRAND] Fixed auto tuning kernel launch error and updated gfx942 auto tuning parameters #595

Merged
merged 5 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
Documentation for rocRAND is available at
[https://rocm.docs.amd.com/projects/rocRAND/en/latest/](https://rocm.docs.amd.com/projects/rocRAND/en/latest/)

## (unreleased) rocRAND 3.3.0 for ROCm 6.5

### Changed
* Updated several `gfx942` auto tuning parameters.

### Fixed
NguyenNhuDi marked this conversation as resolved.
Show resolved Hide resolved
* Fixed an issue where `mt19937.hpp` would cause kernel errors during auto tuning.

## rocRAND 3.3.0 for ROCm 6.4

### Added
Expand Down
4 changes: 3 additions & 1 deletion library/src/rng/config/lfsr113_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_LFSR113, T>
case target_arch::gfx1101: return 128;
case target_arch::gfx1100: return 64;
case target_arch::gfx1030: return 64;
case target_arch::gfx942: return 512;
case target_arch::gfx942: return 256;
case target_arch::gfx90a: return 64;
case target_arch::gfx908: return 256;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 128;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_LFSR113, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_LFSR113, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 1024;
case target_arch::gfx906: return 2048;
case target_arch::gfx1201: return 512;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_LFSR113, T>::blocks;
}
Expand Down
2 changes: 2 additions & 0 deletions library/src/rng/config/mrg31k3p_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MRG31K3P, T>
case target_arch::gfx90a: return 512;
case target_arch::gfx908: return 1024;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MRG31K3P, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MRG31K3P, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 600;
case target_arch::gfx906: return 1792;
case target_arch::gfx1201: return 512;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MRG31K3P, T>::blocks;
}
Expand Down
4 changes: 3 additions & 1 deletion library/src/rng/config/mrg32k3a_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MRG32K3A, T>
case target_arch::gfx1102: return 128;
case target_arch::gfx1101: return 128;
case target_arch::gfx1100: return 128;
case target_arch::gfx942: return 256;
case target_arch::gfx942: return 1024;
case target_arch::gfx90a: return 256;
case target_arch::gfx1030: return 256;
case target_arch::gfx908: return 1024;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MRG32K3A, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MRG32K3A, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 600;
case target_arch::gfx906: return 2048;
case target_arch::gfx1201: return 256;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MRG32K3A, T>::blocks;
}
Expand Down
8 changes: 5 additions & 3 deletions library/src/rng/config/mt19937_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
* This file is automatically generated by `/scripts/config-tuning/select_best_config.py`.
*/

namespace rocrand_host::detail
namespace rocrand_impl::host
{

template<class T>
Expand All @@ -41,9 +41,10 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MT19937, T>
case target_arch::gfx1102: return 128;
case target_arch::gfx1101: return 128;
case target_arch::gfx1100: return 64;
case target_arch::gfx942: return 128;
case target_arch::gfx942: return 256;
case target_arch::gfx90a: return 1024;
case target_arch::gfx908: return 512;
case target_arch::gfx1201: return 64;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MT19937, T>::threads;
}
Expand All @@ -59,6 +60,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MT19937, T>
case target_arch::gfx942: return 1024;
case target_arch::gfx90a: return 64;
case target_arch::gfx908: return 64;
case target_arch::gfx1201: return 512;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MT19937, T>::blocks;
}
Expand All @@ -67,4 +69,4 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MT19937, T>

} // end namespace rocrand_host::detail

#endif // ROCRAND_RNG_CONFIG_MT19937_HPP_
#endif // ROCRAND_RNG_CONFIG_MT19937_HPP_
2 changes: 2 additions & 0 deletions library/src/rng/config/mtgp32_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MTGP32, T>
case target_arch::gfx90a: return 256;
case target_arch::gfx908: return 256;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 256;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MTGP32, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MTGP32, T>
case target_arch::gfx90a: return 512;
case target_arch::gfx908: return 480;
case target_arch::gfx906: return 448;
case target_arch::gfx1201: return 512;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MTGP32, T>::blocks;
}
Expand Down
4 changes: 3 additions & 1 deletion library/src/rng/config/philox4_32_10_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_PHILOX4_32_10, T>
case target_arch::gfx1101: return 1024;
case target_arch::gfx1100: return 512;
case target_arch::gfx1030: return 1024;
case target_arch::gfx942: return 1024;
case target_arch::gfx942: return 512;
case target_arch::gfx90a: return 512;
case target_arch::gfx908: return 512;
case target_arch::gfx906: return 64;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_PHILOX4_32_10, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_PHILOX4_32_10, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 3840;
case target_arch::gfx906: return 896;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_PHILOX4_32_10, T>::blocks;
}
Expand Down
4 changes: 3 additions & 1 deletion library/src/rng/config/threefry2_32_20_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY2_32_20, T>
case target_arch::gfx1101: return 256;
case target_arch::gfx1100: return 1024;
case target_arch::gfx1030: return 256;
case target_arch::gfx942: return 256;
case target_arch::gfx942: return 512;
case target_arch::gfx90a: return 512;
case target_arch::gfx908: return 512;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY2_32_20, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY2_32_20, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 1200;
case target_arch::gfx906: return 896;
case target_arch::gfx1201: return 2048;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY2_32_20, T>::blocks;
}
Expand Down
2 changes: 2 additions & 0 deletions library/src/rng/config/threefry2_64_20_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY2_64_20, T>
case target_arch::gfx90a: return 256;
case target_arch::gfx908: return 256;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 512;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY2_64_20, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY2_64_20, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 960;
case target_arch::gfx906: return 560;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY2_64_20, T>::blocks;
}
Expand Down
4 changes: 3 additions & 1 deletion library/src/rng/config/threefry4_32_20_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY4_32_20, T>
case target_arch::gfx1101: return 512;
case target_arch::gfx1100: return 1024;
case target_arch::gfx1030: return 1024;
case target_arch::gfx942: return 1024;
case target_arch::gfx942: return 512;
case target_arch::gfx90a: return 256;
case target_arch::gfx908: return 256;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY4_32_20, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY4_32_20, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 1200;
case target_arch::gfx906: return 896;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY4_32_20, T>::blocks;
}
Expand Down
2 changes: 2 additions & 0 deletions library/src/rng/config/threefry4_64_20_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY4_64_20, T>
case target_arch::gfx90a: return 256;
case target_arch::gfx908: return 128;
case target_arch::gfx906: return 128;
case target_arch::gfx1201: return 128;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY4_64_20, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY4_64_20, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 1920;
case target_arch::gfx906: return 1792;
case target_arch::gfx1201: return 2048;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY4_64_20, T>::blocks;
}
Expand Down
2 changes: 2 additions & 0 deletions library/src/rng/config/xorwow_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_XORWOW, T>
case target_arch::gfx90a: return 128;
case target_arch::gfx908: return 256;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_XORWOW, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_XORWOW, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 600;
case target_arch::gfx906: return 560;
case target_arch::gfx1201: return 256;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_XORWOW, T>::blocks;
}
Expand Down
17 changes: 16 additions & 1 deletion library/src/rng/config_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ enum class target_arch : unsigned int
gfx1100 = 1100,
gfx1101 = 1101,
gfx1102 = 1102,
gfx1201 = 1201,
unknown = std::numeric_limits<unsigned int>::max(),
};

Expand Down Expand Up @@ -85,6 +86,8 @@ __host__ __device__ constexpr target_arch get_device_arch()
return target_arch::gfx1101;
#elif defined(__gfx1102__)
return target_arch::gfx1102;
#elif defined(__gfx1201__)
return target_arch::gfx1201;
#else
return target_arch::unknown;
#endif
Expand All @@ -106,7 +109,8 @@ inline target_arch parse_gcn_arch(const std::string& arch_name)
"gfx1030",
"gfx1100",
"gfx1101",
"gfx1102"};
"gfx1102",
"gfx1201"};
const target_arch target_architectures[] = {
target_arch::gfx900,
target_arch::gfx902,
Expand All @@ -120,6 +124,7 @@ inline target_arch parse_gcn_arch(const std::string& arch_name)
target_arch::gfx1100,
target_arch::gfx1101,
target_arch::gfx1102,
target_arch::gfx1201,
};
static_assert(sizeof(target_names) / sizeof(target_names[0])
== sizeof(target_architectures) / sizeof(target_architectures[0]),
Expand Down Expand Up @@ -320,6 +325,10 @@ hipError_t get_generator_config(const hipStream_t stream,
{
target_arch current_arch;
const hipError_t error = get_device_arch(stream, current_arch);
printf("%d\n", current_arch);
printf("%d\n", current_arch);
printf("%d\n", current_arch);
printf("%d\n", current_arch);
if(error != hipSuccess)
{
return error;
Expand All @@ -329,6 +338,12 @@ hipError_t get_generator_config(const hipStream_t stream,
}
else
{
printf("IN ELSE OF IS_ORDERING_DYNAMIC\n");
printf("IN ELSE OF IS_ORDERING_DYNAMIC\n");
printf("IN ELSE OF IS_ORDERING_DYNAMIC\n");
printf("IN ELSE OF IS_ORDERING_DYNAMIC\n");
printf("IN ELSE OF IS_ORDERING_DYNAMIC\n");

config.threads = generator_config_defaults<GeneratorType, T>::threads;
config.blocks = generator_config_defaults<GeneratorType, T>::blocks;
}
Expand Down
1 change: 1 addition & 0 deletions library/src/rng/lfsr113.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ class lfsr113_generator_template : public generator_impl_base
return ROCRAND_STATUS_SUCCESS;
}

printf("Blocks: %d ---- Threads: %d\n", config.blocks, config.threads);
status = dynamic_dispatch(
m_order,
[&, this](auto is_dynamic)
Expand Down
Loading
Loading