Skip to content

Commit

Permalink
[auto tuning] [rocRAND] Fixed auto tuning kernel launch error and upd…
Browse files Browse the repository at this point in the history
…ated gfx942 auto tuning parameters (#595)

* fixed auto tuning mt19937 kernel error and updated auto tuning paramater

* updated changelog

* Update CHANGELOG.md

Co-authored-by: Jeffrey Novotny <[email protected]>

This reverts commit fb16e25.

---------

Co-authored-by: Jeffrey Novotny <[email protected]>
  • Loading branch information
NguyenNhuDi and amd-jnovotny authored Jan 2, 2025
1 parent ad8cdd8 commit 55f8819
Show file tree
Hide file tree
Showing 15 changed files with 102 additions and 50 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
Documentation for rocRAND is available at
[https://rocm.docs.amd.com/projects/rocRAND/en/latest/](https://rocm.docs.amd.com/projects/rocRAND/en/latest/)

## (unreleased) rocRAND 3.3.0 for ROCm 6.5

### Changed
* Updated several `gfx942` auto tuning parameters.

### Fixed
* Fixed an issue where `mt19937.hpp` would cause kernel errors during auto tuning.

## rocRAND 3.3.0 for ROCm 6.4

### Added
Expand Down
4 changes: 3 additions & 1 deletion library/src/rng/config/lfsr113_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_LFSR113, T>
case target_arch::gfx1101: return 128;
case target_arch::gfx1100: return 64;
case target_arch::gfx1030: return 64;
case target_arch::gfx942: return 512;
case target_arch::gfx942: return 256;
case target_arch::gfx90a: return 64;
case target_arch::gfx908: return 256;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 128;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_LFSR113, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_LFSR113, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 1024;
case target_arch::gfx906: return 2048;
case target_arch::gfx1201: return 512;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_LFSR113, T>::blocks;
}
Expand Down
2 changes: 2 additions & 0 deletions library/src/rng/config/mrg31k3p_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MRG31K3P, T>
case target_arch::gfx90a: return 512;
case target_arch::gfx908: return 1024;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MRG31K3P, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MRG31K3P, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 600;
case target_arch::gfx906: return 1792;
case target_arch::gfx1201: return 512;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MRG31K3P, T>::blocks;
}
Expand Down
4 changes: 3 additions & 1 deletion library/src/rng/config/mrg32k3a_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MRG32K3A, T>
case target_arch::gfx1102: return 128;
case target_arch::gfx1101: return 128;
case target_arch::gfx1100: return 128;
case target_arch::gfx942: return 256;
case target_arch::gfx942: return 1024;
case target_arch::gfx90a: return 256;
case target_arch::gfx1030: return 256;
case target_arch::gfx908: return 1024;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MRG32K3A, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MRG32K3A, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 600;
case target_arch::gfx906: return 2048;
case target_arch::gfx1201: return 256;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MRG32K3A, T>::blocks;
}
Expand Down
8 changes: 5 additions & 3 deletions library/src/rng/config/mt19937_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
* This file is automatically generated by `/scripts/config-tuning/select_best_config.py`.
*/

namespace rocrand_host::detail
namespace rocrand_impl::host
{

template<class T>
Expand All @@ -41,9 +41,10 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MT19937, T>
case target_arch::gfx1102: return 128;
case target_arch::gfx1101: return 128;
case target_arch::gfx1100: return 64;
case target_arch::gfx942: return 128;
case target_arch::gfx942: return 256;
case target_arch::gfx90a: return 1024;
case target_arch::gfx908: return 512;
case target_arch::gfx1201: return 64;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MT19937, T>::threads;
}
Expand All @@ -59,6 +60,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MT19937, T>
case target_arch::gfx942: return 1024;
case target_arch::gfx90a: return 64;
case target_arch::gfx908: return 64;
case target_arch::gfx1201: return 512;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MT19937, T>::blocks;
}
Expand All @@ -67,4 +69,4 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MT19937, T>

} // end namespace rocrand_host::detail

#endif // ROCRAND_RNG_CONFIG_MT19937_HPP_
#endif // ROCRAND_RNG_CONFIG_MT19937_HPP_
2 changes: 2 additions & 0 deletions library/src/rng/config/mtgp32_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MTGP32, T>
case target_arch::gfx90a: return 256;
case target_arch::gfx908: return 256;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 256;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MTGP32, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_MTGP32, T>
case target_arch::gfx90a: return 512;
case target_arch::gfx908: return 480;
case target_arch::gfx906: return 448;
case target_arch::gfx1201: return 512;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_MTGP32, T>::blocks;
}
Expand Down
4 changes: 3 additions & 1 deletion library/src/rng/config/philox4_32_10_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_PHILOX4_32_10, T>
case target_arch::gfx1101: return 1024;
case target_arch::gfx1100: return 512;
case target_arch::gfx1030: return 1024;
case target_arch::gfx942: return 1024;
case target_arch::gfx942: return 512;
case target_arch::gfx90a: return 512;
case target_arch::gfx908: return 512;
case target_arch::gfx906: return 64;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_PHILOX4_32_10, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_PHILOX4_32_10, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 3840;
case target_arch::gfx906: return 896;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_PHILOX4_32_10, T>::blocks;
}
Expand Down
4 changes: 3 additions & 1 deletion library/src/rng/config/threefry2_32_20_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY2_32_20, T>
case target_arch::gfx1101: return 256;
case target_arch::gfx1100: return 1024;
case target_arch::gfx1030: return 256;
case target_arch::gfx942: return 256;
case target_arch::gfx942: return 512;
case target_arch::gfx90a: return 512;
case target_arch::gfx908: return 512;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY2_32_20, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY2_32_20, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 1200;
case target_arch::gfx906: return 896;
case target_arch::gfx1201: return 2048;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY2_32_20, T>::blocks;
}
Expand Down
2 changes: 2 additions & 0 deletions library/src/rng/config/threefry2_64_20_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY2_64_20, T>
case target_arch::gfx90a: return 256;
case target_arch::gfx908: return 256;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 512;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY2_64_20, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY2_64_20, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 960;
case target_arch::gfx906: return 560;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY2_64_20, T>::blocks;
}
Expand Down
4 changes: 3 additions & 1 deletion library/src/rng/config/threefry4_32_20_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY4_32_20, T>
case target_arch::gfx1101: return 512;
case target_arch::gfx1100: return 1024;
case target_arch::gfx1030: return 1024;
case target_arch::gfx942: return 1024;
case target_arch::gfx942: return 512;
case target_arch::gfx90a: return 256;
case target_arch::gfx908: return 256;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY4_32_20, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY4_32_20, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 1200;
case target_arch::gfx906: return 896;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY4_32_20, T>::blocks;
}
Expand Down
2 changes: 2 additions & 0 deletions library/src/rng/config/threefry4_64_20_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY4_64_20, T>
case target_arch::gfx90a: return 256;
case target_arch::gfx908: return 128;
case target_arch::gfx906: return 128;
case target_arch::gfx1201: return 128;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY4_64_20, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_THREEFRY4_64_20, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 1920;
case target_arch::gfx906: return 1792;
case target_arch::gfx1201: return 2048;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_THREEFRY4_64_20, T>::blocks;
}
Expand Down
2 changes: 2 additions & 0 deletions library/src/rng/config/xorwow_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_XORWOW, T>
case target_arch::gfx90a: return 128;
case target_arch::gfx908: return 256;
case target_arch::gfx906: return 256;
case target_arch::gfx1201: return 1024;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_XORWOW, T>::threads;
}
Expand All @@ -63,6 +64,7 @@ struct generator_config_selector<ROCRAND_RNG_PSEUDO_XORWOW, T>
case target_arch::gfx90a: return 2048;
case target_arch::gfx908: return 600;
case target_arch::gfx906: return 560;
case target_arch::gfx1201: return 256;
default:
return generator_config_defaults<ROCRAND_RNG_PSEUDO_XORWOW, T>::blocks;
}
Expand Down
17 changes: 16 additions & 1 deletion library/src/rng/config_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ enum class target_arch : unsigned int
gfx1100 = 1100,
gfx1101 = 1101,
gfx1102 = 1102,
gfx1201 = 1201,
unknown = std::numeric_limits<unsigned int>::max(),
};

Expand Down Expand Up @@ -85,6 +86,8 @@ __host__ __device__ constexpr target_arch get_device_arch()
return target_arch::gfx1101;
#elif defined(__gfx1102__)
return target_arch::gfx1102;
#elif defined(__gfx1201__)
return target_arch::gfx1201;
#else
return target_arch::unknown;
#endif
Expand All @@ -106,7 +109,8 @@ inline target_arch parse_gcn_arch(const std::string& arch_name)
"gfx1030",
"gfx1100",
"gfx1101",
"gfx1102"};
"gfx1102",
"gfx1201"};
const target_arch target_architectures[] = {
target_arch::gfx900,
target_arch::gfx902,
Expand All @@ -120,6 +124,7 @@ inline target_arch parse_gcn_arch(const std::string& arch_name)
target_arch::gfx1100,
target_arch::gfx1101,
target_arch::gfx1102,
target_arch::gfx1201,
};
static_assert(sizeof(target_names) / sizeof(target_names[0])
== sizeof(target_architectures) / sizeof(target_architectures[0]),
Expand Down Expand Up @@ -320,6 +325,10 @@ hipError_t get_generator_config(const hipStream_t stream,
{
target_arch current_arch;
const hipError_t error = get_device_arch(stream, current_arch);
printf("%d\n", current_arch);
printf("%d\n", current_arch);
printf("%d\n", current_arch);
printf("%d\n", current_arch);
if(error != hipSuccess)
{
return error;
Expand All @@ -329,6 +338,12 @@ hipError_t get_generator_config(const hipStream_t stream,
}
else
{
printf("IN ELSE OF IS_ORDERING_DYNAMIC\n");
printf("IN ELSE OF IS_ORDERING_DYNAMIC\n");
printf("IN ELSE OF IS_ORDERING_DYNAMIC\n");
printf("IN ELSE OF IS_ORDERING_DYNAMIC\n");
printf("IN ELSE OF IS_ORDERING_DYNAMIC\n");

config.threads = generator_config_defaults<GeneratorType, T>::threads;
config.blocks = generator_config_defaults<GeneratorType, T>::blocks;
}
Expand Down
1 change: 1 addition & 0 deletions library/src/rng/lfsr113.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ class lfsr113_generator_template : public generator_impl_base
return ROCRAND_STATUS_SUCCESS;
}

printf("Blocks: %d ---- Threads: %d\n", config.blocks, config.threads);
status = dynamic_dispatch(
m_order,
[&, this](auto is_dynamic)
Expand Down
Loading

0 comments on commit 55f8819

Please sign in to comment.