Skip to content

Commit

Permalink
Rename parameters from "nworkers" to "num-threads" (#1270)
Browse files Browse the repository at this point in the history
  • Loading branch information
albestro authored Feb 18, 2025
1 parent c966a24 commit a9abb81
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ inline size_t get_red2band_panel_worker_minwork() noexcept {
return 1;
}

inline size_t get_red2band_panel_nworkers() noexcept {
inline size_t get_red2band_panel_num_workers() noexcept {
// Note: precautionarily we leave at least 1 thread "free" to do other stuff (if possible)
const std::size_t available_workers = pika::resource::get_thread_pool("default").get_os_thread_count();
const std::size_t min_workers = 1;
const auto max_workers = std::max(min_workers, available_workers - 1);

const std::size_t nworkers = getTuneParameters().red2band_panel_nworkers;
const std::size_t nworkers = getTuneParameters().red2band_panel_num_threads;
return std::clamp(nworkers, min_workers, max_workers);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@

namespace dlaf::eigensolver::internal {

inline std::size_t getTridiagRank1NWorkers() noexcept {
inline std::size_t get_tridiag_rank1_num_workers() noexcept {
// Note: precautionarily we leave at least 1 thread "free" to do other stuff (if possible)
const std::size_t available_workers = pika::resource::get_thread_pool("default").get_os_thread_count();
const std::size_t min_workers = 1;
const auto max_workers = std::max(min_workers, available_workers);

const std::size_t nworkers = getTuneParameters().tridiag_rank1_nworkers;
const std::size_t nworkers = getTuneParameters().tridiag_rank1_num_threads;
return std::clamp(nworkers, min_workers, max_workers);
}

Expand Down
4 changes: 2 additions & 2 deletions include/dlaf/eigensolver/reduction_to_band/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ void computePanelReflectors(MatrixLikeA& mat_a, MatrixLikeTaus& mat_taus, const

const std::size_t nworkers = [nrtiles = panel_tiles.size()]() {
const std::size_t min_workers = 1;
const std::size_t available_workers = get_red2band_panel_nworkers();
const std::size_t available_workers = get_red2band_panel_num_workers();
const std::size_t ideal_workers =
util::ceilDiv(to_sizet(nrtiles), get_red2band_panel_worker_minwork());
return std::clamp(ideal_workers, min_workers, available_workers);
Expand Down Expand Up @@ -639,7 +639,7 @@ void computePanelReflectors(TriggerSender&& trigger, comm::IndexT_MPI rank_v0,

const std::size_t nworkers = [nrtiles = panel_tiles.size()]() {
const std::size_t min_workers = 1;
const std::size_t available_workers = get_red2band_panel_nworkers();
const std::size_t available_workers = get_red2band_panel_num_workers();
const std::size_t ideal_workers =
util::ceilDiv(to_sizet(nrtiles), get_red2band_panel_worker_minwork());
return std::clamp(ideal_workers, min_workers, available_workers);
Expand Down
4 changes: 2 additions & 2 deletions include/dlaf/eigensolver/tridiag_solver/merge.h
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ void solveRank1Problem(const SizeType i_begin, const SizeType i_end, KSender&& k
// Note: at least two column of tiles per-worker, in the range [1, getTridiagRank1NWorkers()]
const std::size_t nthreads = [nrtiles = (i_end - i_begin)]() {
const std::size_t min_workers = 1;
const std::size_t available_workers = getTridiagRank1NWorkers();
const std::size_t available_workers = get_tridiag_rank1_num_workers();
const std::size_t ideal_workers = util::ceilDiv(to_sizet(nrtiles), to_sizet(2));
return std::clamp(ideal_workers, min_workers, available_workers);
}();
Expand Down Expand Up @@ -1346,7 +1346,7 @@ void solveRank1ProblemDist(CommSender&& row_comm, CommSender&& col_comm, const S
const std::size_t workload_unit = 2 * to_sizet(dist_sub.tile_size().linear_size());

const std::size_t min_workers = 1;
const std::size_t available_workers = getTridiagRank1NWorkers();
const std::size_t available_workers = get_tridiag_rank1_num_workers();

const std::size_t ideal_workers = util::ceilDiv(to_sizet(workload), workload_unit);
return std::clamp(ideal_workers, min_workers, available_workers);
Expand Down
20 changes: 10 additions & 10 deletions include/dlaf/tune.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,23 +58,23 @@ namespace dlaf {
/// - tfactor_num_threads:
/// The maximum number of threads to use for computing tfactor (e.g. which is used for
/// instance in red2band and its backtransformation). Set with --dlaf:tfactor-num-threads or env
/// variable DLAF_TFACTOR_NTHREADS.
/// variable DLAF_TFACTOR_NUM_THREADS.
/// - tfactor_num_streams:
/// The maximum number of streams to use for computing tfactor (e.g. which is used for
/// instance in red2band and its backtransformation). Set with --dlaf:tfactor-num-streams or env
/// variable DLAF_TFACTOR_NSTREAMS.
/// variable DLAF_TFACTOR_NUM_STREAMS.
/// - tfactor_barrier_busy_wait_us:
/// The duration in microseconds to busy-wait in barriers in the tfactor algorithm.
/// Set with --dlaf:tfactor-barrier-busy-wait-us or env variable DLAF_TFACTOR_BARRIER_BUSY_WAIT_US.
/// - red2band_panel_nworkers:
/// - red2band_panel_num_threads:
/// The maximum number of threads to use for computing the panel in the reduction to band algorithm.
/// Set with --dlaf:red2band-panel-nworkers or env variable DLAF_RED2BAND_PANEL_NWORKERS.
/// Set with --dlaf:red2band-panel-num-threads or env variable DLAF_RED2BAND_PANEL_NUM_THREADS.
/// - red2band_barrier_busy_wait_us:
/// The duration in microseconds to busy-wait in barriers in the reduction to band algorithm.
/// Set with --dlaf:red2band-barrier-busy-wait-us or env variable DLAF_RED2BAND_BARRIER_BUSY_WAIT_US.
/// - tridiag_rank1_nworkers:
/// - tridiag_rank1_num_threads:
/// The maximum number of threads to use for computing rank1 problem solution in tridiagonal solver
/// algorithm. Set with --dlaf:tridiag-rank1-nworkers or env variable DLAF_TRIDIAG_RANK1_NWORKERS.
/// algorithm. Set with --dlaf:tridiag-rank1-num-threads or env variable DLAF_TRIDIAG_RANK1_NUM_THREADS.
/// - tridiag_rank1_barrier_busy_wait_us:
/// The duration in microseconds to busy-wait in barriers when computing rank1 problem solution in
/// the tridiagonal solver algorithm. Set with --dlaf:tridiag-rank1-barrier-busy-wait-us or env
Expand Down Expand Up @@ -120,8 +120,8 @@ struct TuneParameters {
const auto default_pool_thread_count =
pika::resource::get_thread_pool("default").get_os_thread_count();
tfactor_num_threads = std::max<std::size_t>(1, default_pool_thread_count / 2);
red2band_panel_nworkers = std::max<std::size_t>(1, default_pool_thread_count / 2);
tridiag_rank1_nworkers = default_pool_thread_count;
red2band_panel_num_threads = std::max<std::size_t>(1, default_pool_thread_count / 2);
tridiag_rank1_num_threads = default_pool_thread_count;
}
bool debug_dump_cholesky_factorization_data = false;
bool debug_dump_generalized_to_standard_data = false;
Expand All @@ -134,9 +134,9 @@ struct TuneParameters {
std::size_t tfactor_num_threads = 1;
std::size_t tfactor_num_streams = 4;
std::size_t tfactor_barrier_busy_wait_us = 0;
std::size_t red2band_panel_nworkers = 1;
std::size_t red2band_panel_num_threads = 1;
std::size_t red2band_barrier_busy_wait_us = 1000;
std::size_t tridiag_rank1_nworkers = 1;
std::size_t tridiag_rank1_num_threads = 1;
std::size_t tridiag_rank1_barrier_busy_wait_us = 0;

SizeType eigensolver_min_band = 100;
Expand Down
8 changes: 4 additions & 4 deletions src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ void updateConfiguration(const pika::program_options::variables_map& vm, configu
updateConfigurationValue(vm, param.tfactor_num_threads, "TFACTOR_NUM_THREADS", "tfactor-num-threads");
updateConfigurationValue(vm, param.tfactor_num_streams, "TFACTOR_NUM_STREAMS", "tfactor-num-streams");
updateConfigurationValue(vm, param.tfactor_barrier_busy_wait_us, "TFACTOR_BARRIER_BUSY_WAIT_US", "tfactor-barrier-busy-wait-us");
updateConfigurationValue(vm, param.red2band_panel_nworkers, "RED2BAND_PANEL_NWORKERS", "red2band-panel-nworkers");
updateConfigurationValue(vm, param.red2band_panel_num_threads, "RED2BAND_PANEL_NUM_THREADS", "red2band-panel-num-threads");
updateConfigurationValue(vm, param.red2band_barrier_busy_wait_us, "RED2BAND_BARRIER_BUSY_WAIT_US", "red2band-barrier-busy-wait-us");
updateConfigurationValue(vm, param.eigensolver_min_band, "EIGENSOLVER_MIN_BAND", "eigensolver-min-band");
updateConfigurationValue(vm, param.band_to_tridiag_1d_block_size_base, "BAND_TO_TRIDIAG_1D_BLOCK_SIZE_BASE", "band-to-tridiag-1d-block-size-base");
Expand All @@ -297,7 +297,7 @@ void updateConfiguration(const pika::program_options::variables_map& vm, configu
updateConfigurationValue(vm, param.debug_dump_band_to_tridiagonal_data, "DEBUG_DUMP_BAND_TO_TRIDIAGONAL_DATA", "");
updateConfigurationValue(vm, param.debug_dump_tridiag_solver_data, "DEBUG_DUMP_TRIDIAG_SOLVER_DATA", "");

updateConfigurationValue(vm, param.tridiag_rank1_nworkers, "TRIDIAG_RANK1_NWORKERS", "tridiag-rank1-nworkers");
updateConfigurationValue(vm, param.tridiag_rank1_num_threads, "TRIDIAG_RANK1_NUM_THREADS", "tridiag-rank1-num-threads");

updateConfigurationValue(vm, param.tridiag_rank1_barrier_busy_wait_us, "TRIDIAG_RANK1_BARRIER_BUSY_WAIT_US", "tridiag-rank1-barrier-busy-wait-us");

Expand Down Expand Up @@ -341,11 +341,11 @@ pika::program_options::options_description getOptionsDescription() {
desc.add_options()("dlaf:tfactor-num-threads", pika::program_options::value<std::size_t>(), "The maximum number of threads to use for computing the tfactor.");
desc.add_options()("dlaf:tfactor-num-streams", pika::program_options::value<std::size_t>(), "The maximum number of GPU streams to use for computing the tfactor.");
desc.add_options()("dlaf:tfactor-barrier-busy-wait-us", pika::program_options::value<std::size_t>(), "The duration in microseconds to busy-wait in barriers in the tfactor algorithm.");
desc.add_options()("dlaf:red2band-panel-nworkers", pika::program_options::value<std::size_t>(), "The maximum number of threads to use for computing the panel in the reduction to band algorithm.");
desc.add_options()("dlaf:red2band-panel-num-threads", pika::program_options::value<std::size_t>(), "The maximum number of threads to use for computing the panel in the reduction to band algorithm.");
desc.add_options()("dlaf:red2band-barrier-busy-wait-us", pika::program_options::value<std::size_t>(), "The duration in microseconds to busy-wait in barriers in the reduction to band algorithm.");
desc.add_options()("dlaf:eigensolver-min-band", pika::program_options::value<SizeType>(), "The minimum value to start looking for a divisor of the block size. When larger than the block size, the block size will be used instead.");
desc.add_options()("dlaf:band-to-tridiag-1d-block-size-base", pika::program_options::value<SizeType>(), "The 1D block size for band_to_tridiagonal is computed as 1d_block_size_base / nb * nb. (The input matrix is distributed with a {nb x nb} block size.)");
desc.add_options()("dlaf:tridiag-rank1-nworkers", pika::program_options::value<std::size_t>(), "The maximum number of threads to use for computing rank1 problem solution in tridiagonal solver algorithm.");
desc.add_options()("dlaf:tridiag-rank1-num-threads", pika::program_options::value<std::size_t>(), "The maximum number of threads to use for computing rank1 problem solution in tridiagonal solver algorithm.");
desc.add_options()("dlaf:tridiag-rank1-barrier-busy-wait-us", pika::program_options::value<std::size_t>(), "The duration in microseconds to busy-wait in barriers when computing rank1 problem solution in the tridiagonal solver algorithm.");
desc.add_options()("dlaf:bt-band-to-tridiag-hh-apply-group-size", pika::program_options::value<SizeType>(), "The application of the HH reflector is splitted in smaller applications of group size reflectors.");
desc.add_options()("dlaf:communicator-grid-num-pipelines", pika::program_options::value<std::size_t>(), "The default number of row, column, and full communicator pipelines to initialize in CommunicatorGrid.");
Expand Down
4 changes: 2 additions & 2 deletions src/tune.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ std::ostream& operator<<(std::ostream& os, const TuneParameters& params) {
os << " tfactor_num_threads = " << params.tfactor_num_threads << std::endl;
os << " tfactor_num_streams = " << params.tfactor_num_streams << std::endl;
os << " tfactor_barrier_busy_wait_us = " << params.tfactor_barrier_busy_wait_us << std::endl;
os << " red2band_panel_nworkers = " << params.red2band_panel_nworkers << std::endl;
os << " red2band_panel_num_threads = " << params.red2band_panel_num_threads << std::endl;
os << " red2band_barrier_busy_wait_us = " << params.red2band_barrier_busy_wait_us << std::endl;
os << " tridiag_rank1_nworkers = " << params.tridiag_rank1_nworkers << std::endl;
os << " tridiag_rank1_num_threads = " << params.tridiag_rank1_num_threads << std::endl;
os << " tridiag_rank1_barrier_busy_wait_us = " << params.tridiag_rank1_barrier_busy_wait_us
<< std::endl;
os << " eigensolver_min_band = " << params.eigensolver_min_band << std::endl;
Expand Down

0 comments on commit a9abb81

Please sign in to comment.