From ddc8569f798c13c4ca9526120a05ddd64bd2553a Mon Sep 17 00:00:00 2001 From: LTLA Date: Tue, 21 May 2024 13:41:19 -0700 Subject: [PATCH] Adopt some aspects of Google's C++ style guide. - Only use structs for passive data carriers like Options. - Prefix all private members with 'my_' to avoid constructor ambiguity. --- include/tatami_stats/grouped_sums.hpp | 2 +- include/tatami_stats/ranges.hpp | 88 +++++++++---------- include/tatami_stats/sums.hpp | 48 +++++------ include/tatami_stats/variances.hpp | 116 +++++++++++++------------- 4 files changed, 129 insertions(+), 125 deletions(-) diff --git a/include/tatami_stats/grouped_sums.hpp b/include/tatami_stats/grouped_sums.hpp index acf2f7c..b213233 100644 --- a/include/tatami_stats/grouped_sums.hpp +++ b/include/tatami_stats/grouped_sums.hpp @@ -109,7 +109,7 @@ void apply(bool row, const tatami::Matrix* p, const Group_* grou for (size_t g = 0; g < num_groups; ++g) { local_output.emplace_back(thread, start, len, output[g]); - runners.emplace_back(len, local_output.back().data(), sopt.skip_nan, start); + runners.emplace_back(local_output.back().data(), sopt.skip_nan, start); } auto ext = tatami::consecutive_extractor(p, !row, 0, otherdim, start, len, opt); diff --git a/include/tatami_stats/ranges.hpp b/include/tatami_stats/ranges.hpp index b962a38..261f94f 100644 --- a/include/tatami_stats/ranges.hpp +++ b/include/tatami_stats/ranges.hpp @@ -164,40 +164,41 @@ Value_ direct(const Value_* value, Index_ num_nonzero, Index_ num_all, bool skip * @tparam Index_ Type of the row/column indices. */ template -struct RunningDense { +class RunningDense { +public: /** * @param num Number of objective vectors, i.e., n. * @param[out] store Pointer to an output array of length `num`. * After `finish()` is called, this will contain the minimum/maximum for each objective vector. * @param skip_nan See `Options::skip_nan` for details. */ - RunningDense(Index_ num, Output_* store, bool skip_nan) : num(num), store(store), skip_nan(skip_nan) {} + RunningDense(Index_ num, Output_* store, bool skip_nan) : my_num(num), my_store(store), my_skip_nan(skip_nan) {} /** * Add the next observed vector to the running min/max calculation. * @param[in] ptr Pointer to an array of values of length `num`, corresponding to an observed vector. */ void add(const Value_* ptr) { - if (init) { - init = false; - if (skip_nan) { - for (Index_ i = 0; i < num; ++i, ++ptr) { + if (my_init) { + my_init = false; + if (my_skip_nan) { + for (Index_ i = 0; i < my_num; ++i, ++ptr) { auto val = *ptr; if (std::isnan(val)) { - store[i] = internal::choose_placeholder(); + my_store[i] = internal::choose_placeholder(); } else { - store[i] = val; + my_store[i] = val; } } } else { - std::copy_n(ptr, num, store); + std::copy_n(ptr, my_num, my_store); } } else { - for (Index_ i = 0; i < num; ++i, ++ptr) { + for (Index_ i = 0; i < my_num; ++i, ++ptr) { auto val = *ptr; - if (internal::is_better(store[i], val)) { // this should implicitly skip NaNs, any NaN comparison will be false. - store[i] = val; + if (internal::is_better(my_store[i], val)) { // this should implicitly skip NaNs, any NaN comparison will be false. + my_store[i] = val; } } } @@ -207,16 +208,16 @@ struct RunningDense { * Finish the running calculation once all observed vectors have been passed to `add()`. */ void finish() { - if (init) { - std::fill_n(store, num, internal::choose_placeholder()); + if (my_init) { + std::fill_n(my_store, my_num, internal::choose_placeholder()); } } private: - bool init = true; - Index_ num; - Output_* store; - bool skip_nan; + bool my_init = true; + Index_ my_num; + Output_* my_store; + bool my_skip_nan; }; /** @@ -231,7 +232,8 @@ struct RunningDense { * @tparam Index_ Type of the row/column indices. */ template -struct RunningSparse { +class RunningSparse { +public: /** * @param num Number of objective vectors. * @param[out] store Pointer to an output array of length `num`. @@ -242,7 +244,7 @@ struct RunningSparse { * e.g., during task allocation for parallelization. */ RunningSparse(Index_ num, Output_* store, bool skip_nan, Index_ subtract = 0) : - num(num), store(store), skip_nan(skip_nan), subtract(subtract) {} + my_num(num), my_store(store), my_skip_nan(skip_nan), my_subtract(subtract) {} /** * Add the next observed vector to the min/max calculation. @@ -251,60 +253,60 @@ struct RunningSparse { * @param number Number of non-zero elements in `value` and `index`. */ void add(const Value_* value, const Index_* index, Index_ number) { - if (count == 0) { - nonzero.resize(num); - std::fill_n(store, num, internal::choose_placeholder()); + if (my_count == 0) { + my_nonzero.resize(my_num); + std::fill_n(my_store, my_num, internal::choose_placeholder()); - if (!skip_nan) { + if (!my_skip_nan) { for (Index_ i = 0; i < number; ++i, ++value, ++index) { auto val = *value; - auto idx = *index - subtract; - store[idx] = val; - ++nonzero[idx]; + auto idx = *index - my_subtract; + my_store[idx] = val; + ++my_nonzero[idx]; } - count = 1; + my_count = 1; return; } } for (Index_ i = 0; i < number; ++i, ++value, ++index) { auto val = *value; - auto idx = *index - subtract; - auto& current = store[idx]; + auto idx = *index - my_subtract; + auto& current = my_store[idx]; if (internal::is_better(current, val)) { // this should implicitly skip NaNs, any NaN comparison will be false. current = val; } - ++nonzero[idx]; + ++my_nonzero[idx]; } - ++count; + ++my_count; } /** * Finish the min/max calculation once all observed vectors have been passed to `add()`. */ void finish() { - if (count) { - for (Index_ i = 0; i < num; ++i) { - if (count > nonzero[i]) { - auto& current = store[i]; + if (my_count) { + for (Index_ i = 0; i < my_num; ++i) { + if (my_count > my_nonzero[i]) { + auto& current = my_store[i]; if (internal::is_better(current, 0)) { current = 0; } } } } else { - std::fill_n(store, num, internal::choose_placeholder()); + std::fill_n(my_store, my_num, internal::choose_placeholder()); } } private: - Index_ num; - Output_* store; - bool skip_nan; - Index_ subtract; - Index_ count = 0; - std::vector nonzero; + Index_ my_num; + Output_* my_store; + bool my_skip_nan; + Index_ my_subtract; + Index_ my_count = 0; + std::vector my_nonzero; }; /** diff --git a/include/tatami_stats/sums.hpp b/include/tatami_stats/sums.hpp index d3696a4..6549530 100644 --- a/include/tatami_stats/sums.hpp +++ b/include/tatami_stats/sums.hpp @@ -84,38 +84,39 @@ Output_ direct(const Value_* ptr, Index_ num, bool skip_nan) { * @tparam Index_ Type of the row/column indices. */ template -struct RunningDense { +class RunningDense { +public: /** * @param num Number of objective vectors, i.e., n. * @param[out] sum Pointer to an output array of length `num`. * This should be zeroed on input, and will store the running sums after each `add()`. * @param skip_nan See `Options::skip_nan` for details. */ - RunningDense(Index_ num, Output_* sum, bool skip_nan) : num(num), sum(sum), skip_nan(skip_nan) {} + RunningDense(Index_ num, Output_* sum, bool skip_nan) : my_num(num), my_sum(sum), my_skip_nan(skip_nan) {} /** * Add the next observed vector to the running sums. - * @param[in] ptr Pointer to an array of values of length `num`, corresponding to an observed vector. + * @param[in] ptr Pointer to an array of values of length `my_num`, corresponding to an observed vector. */ void add(const Value_* ptr) { - if (skip_nan) { - for (Index_ i = 0; i < num; ++i) { + if (my_skip_nan) { + for (Index_ i = 0; i < my_num; ++i) { auto val = ptr[i]; if (!std::isnan(val)) { - sum[i] += val; + my_sum[i] += val; } } } else { - for (Index_ i = 0; i < num; ++i) { - sum[i] += ptr[i]; + for (Index_ i = 0; i < my_num; ++i) { + my_sum[i] += ptr[i]; } } } private: - Index_ num; - Output_* sum; - bool skip_nan; + Index_ my_num; + Output_* my_sum; + bool my_skip_nan; }; /** @@ -129,18 +130,18 @@ struct RunningDense { * @tparam Index_ Type of the row/column indices. */ template -struct RunningSparse { +class RunningSparse { +public: /** - * @param num Number of objective vectors. - * @param[out] sum Pointer to an output array of length `num`. + * @param[out] sum Pointer to an output array of length equal to the number of objective vectors. * This should be zeroed on input, and will store the running sums after each `add()`. * @param skip_nan See `Options::skip_nan` for details. * @param subtract Offset to subtract from each element of `index` before using it to index into `mean` and friends. * Only relevant if `mean` and friends hold statistics for a contiguous subset of objective vectors, * e.g., during task allocation for parallelization. */ - RunningSparse(Index_ num, Output_* sum, bool skip_nan, Index_ subtract = 0) : - num(num), sum(sum), skip_nan(skip_nan), subtract(subtract) {} + RunningSparse(Output_* sum, bool skip_nan, Index_ subtract = 0) : + my_sum(sum), my_skip_nan(skip_nan), my_subtract(subtract) {} /** * Add the next observed vector to the running sums. @@ -150,25 +151,24 @@ struct RunningSparse { * @param number Number of non-zero elements in `value` and `index`. */ void add(const Value_* value, const Index_* index, Index_ number) { - if (skip_nan) { + if (my_skip_nan) { for (Index_ i = 0; i < number; ++i) { auto val = value[i]; if (!std::isnan(val)) { - sum[index[i] - subtract] += val; + my_sum[index[i] - my_subtract] += val; } } } else { for (Index_ i = 0; i < number; ++i) { - sum[index[i] - subtract] += value[i]; + my_sum[index[i] - my_subtract] += value[i]; } } } private: - Index_ num; - Output_* sum; - bool skip_nan; - Index_ subtract; + Output_* my_sum; + bool my_skip_nan; + Index_ my_subtract; }; /** @@ -218,7 +218,7 @@ void apply(bool row, const tatami::Matrix* p, Output_* output, c std::vector ibuffer(l); LocalOutputBuffer local_output(thread, s, l, output); - sums::RunningSparse runner(l, local_output.data(), sopt.skip_nan, s); + sums::RunningSparse runner(local_output.data(), sopt.skip_nan, s); for (Index_ x = 0; x < otherdim; ++x) { auto out = ext->fetch(vbuffer.data(), ibuffer.data()); diff --git a/include/tatami_stats/variances.hpp b/include/tatami_stats/variances.hpp index bafbff7..4e0b3c6 100644 --- a/include/tatami_stats/variances.hpp +++ b/include/tatami_stats/variances.hpp @@ -171,7 +171,8 @@ std::pair direct(const Value_* ptr, Index_ num, bool skip_nan) * @tparam Index_ Type of the row/column indices. */ template -struct RunningDense { +class RunningDense { +public: /** * @param num Number of objective vectors, i.e., n. * @param[out] mean Pointer to an output array of length `num`. @@ -181,24 +182,24 @@ struct RunningDense { * @param skip_nan See `Options::skip_nan` for details. */ RunningDense(Index_ num, Output_* mean, Output_* variance, bool skip_nan) : - num(num), mean(mean), variance(variance), skip_nan(skip_nan), ok_count(skip_nan ? num : 0) {} + my_num(num), my_mean(mean), my_variance(variance), my_skip_nan(skip_nan), my_ok_count(skip_nan ? num : 0) {} /** * Add the next observed vector to the variance calculation. * @param[in] ptr Pointer to an array of values of length `num`, corresponding to an observed vector. */ void add(const Value_* ptr) { - if (skip_nan) { - for (Index_ i = 0; i < num; ++i, ++ptr) { + if (my_skip_nan) { + for (Index_ i = 0; i < my_num; ++i, ++ptr) { auto val = *ptr; if (!std::isnan(val)) { - internal::add_welford(mean[i], variance[i], val, ++(ok_count[i])); + internal::add_welford(my_mean[i], my_variance[i], val, ++(my_ok_count[i])); } } } else { - ++count; - for (Index_ i = 0; i < num; ++i, ++ptr) { - internal::add_welford(mean[i], variance[i], *ptr, count); + ++my_count; + for (Index_ i = 0; i < my_num; ++i, ++ptr) { + internal::add_welford(my_mean[i], my_variance[i], *ptr, my_count); } } } @@ -207,39 +208,39 @@ struct RunningDense { * Finish the variance calculation once all observed vectors have been passed to `add()`. */ void finish() { - if (skip_nan) { - for (Index_ i = 0; i < num; ++i) { - auto ct = ok_count[i]; + if (my_skip_nan) { + for (Index_ i = 0; i < my_num; ++i) { + auto ct = my_ok_count[i]; if (ct < 2) { - variance[i] = std::numeric_limits::quiet_NaN(); + my_variance[i] = std::numeric_limits::quiet_NaN(); if (ct == 0) { - mean[i] = std::numeric_limits::quiet_NaN(); + my_mean[i] = std::numeric_limits::quiet_NaN(); } } else { - variance[i] /= ct - 1; + my_variance[i] /= ct - 1; } } } else { - if (count < 2) { - std::fill_n(variance, num, std::numeric_limits::quiet_NaN()); - if (count == 0) { - std::fill_n(mean, num, std::numeric_limits::quiet_NaN()); + if (my_count < 2) { + std::fill_n(my_variance, my_num, std::numeric_limits::quiet_NaN()); + if (my_count == 0) { + std::fill_n(my_mean, my_num, std::numeric_limits::quiet_NaN()); } } else { - for (Index_ i = 0; i < num; ++i) { - variance[i] /= count - 1; + for (Index_ i = 0; i < my_num; ++i) { + my_variance[i] /= my_count - 1; } } } } private: - Index_ num; - Output_* mean; - Output_* variance; - bool skip_nan; - Index_ count = 0; - std::vector ok_count; + Index_ my_num; + Output_* my_mean; + Output_* my_variance; + bool my_skip_nan; + Index_ my_count = 0; + std::vector my_ok_count; }; /** @@ -253,7 +254,8 @@ struct RunningDense { * @tparam Index_ Type of the row/column indices. */ template -struct RunningSparse { +class RunningSparse { +public: /** * @param num Number of objective vectors. * @param[out] mean Pointer to an output array of length `num`, containing the means for each objective vector. @@ -266,7 +268,7 @@ struct RunningSparse { * e.g., during task allocation for parallelization. */ RunningSparse(Index_ num, Output_* mean, Output_* variance, bool skip_nan, Index_ subtract = 0) : - num(num), mean(mean), variance(variance), nonzero(num), skip_nan(skip_nan), subtract(subtract), nan(skip_nan ? num : 0) {} + my_num(num), my_mean(mean), my_variance(variance), my_nonzero(num), my_skip_nan(skip_nan), my_subtract(subtract), my_nan(skip_nan ? num : 0) {} /** * Add the next observed vector to the variance calculation. @@ -275,22 +277,22 @@ struct RunningSparse { * @param number Number of non-zero elements in `value` and `index`. */ void add(const Value_* value, const Index_* index, Index_ number) { - ++count; - if (skip_nan) { + ++my_count; + if (my_skip_nan) { for (Index_ i = 0; i < number; ++i) { auto val = value[i]; - auto ri = index[i] - subtract; + auto ri = index[i] - my_subtract; if (std::isnan(val)) { - ++nan[ri]; + ++my_nan[ri]; } else { - internal::add_welford(mean[ri], variance[ri], val, ++(nonzero[ri])); + internal::add_welford(my_mean[ri], my_variance[ri], val, ++(my_nonzero[ri])); } } } else { for (Index_ i = 0; i < number; ++i) { - auto ri = index[i] - subtract; - internal::add_welford(mean[ri], variance[ri], value[i], ++(nonzero[ri])); + auto ri = index[i] - my_subtract; + internal::add_welford(my_mean[ri], my_variance[ri], value[i], ++(my_nonzero[ri])); } } } @@ -299,11 +301,11 @@ struct RunningSparse { * Finish the variance calculation once all observed vectors have been passed to `add()`. */ void finish() { - if (skip_nan) { - for (Index_ i = 0; i < num; ++i) { - auto& curM = mean[i]; - auto& curV = variance[i]; - auto ct = count - nan[i]; + if (my_skip_nan) { + for (Index_ i = 0; i < my_num; ++i) { + auto& curM = my_mean[i]; + auto& curV = my_variance[i]; + auto ct = my_count - my_nan[i]; if (ct < 2) { curV = std::numeric_limits::quiet_NaN(); @@ -311,36 +313,36 @@ struct RunningSparse { curM = std::numeric_limits::quiet_NaN(); } } else { - internal::add_welford_zeros(curM, curV, nonzero[i], ct); + internal::add_welford_zeros(curM, curV, my_nonzero[i], ct); curV /= ct - 1; } } } else { - if (count < 2) { - std::fill_n(variance, num, std::numeric_limits::quiet_NaN()); - if (count == 0) { - std::fill_n(mean, num, std::numeric_limits::quiet_NaN()); + if (my_count < 2) { + std::fill_n(my_variance, my_num, std::numeric_limits::quiet_NaN()); + if (my_count == 0) { + std::fill_n(my_mean, my_num, std::numeric_limits::quiet_NaN()); } } else { - for (Index_ i = 0; i < num; ++i) { - auto& var = variance[i]; - internal::add_welford_zeros(mean[i], var, nonzero[i], count); - var /= count - 1; + for (Index_ i = 0; i < my_num; ++i) { + auto& var = my_variance[i]; + internal::add_welford_zeros(my_mean[i], var, my_nonzero[i], my_count); + var /= my_count - 1; } } } } private: - Index_ num; - Output_* mean; - Output_* variance; - std::vector nonzero; - bool skip_nan; - Index_ subtract; - Index_ count = 0; - std::vector nan; + Index_ my_num; + Output_* my_mean; + Output_* my_variance; + std::vector my_nonzero; + bool my_skip_nan; + Index_ my_subtract; + Index_ my_count = 0; + std::vector my_nan; }; /**