Overhauled organization of functions within each category.

Each statistic now gets its own namespace with a common apply(), direct() and Running* classes (if applicable). The row/column functions are now moved inside this namespace and are called by_row and by_column. NaN skipping and the number of threads are now passed via a separate statistic-specific Options class, which avoids the need to decide on NaN handling at compile time.
tatami-inc · Mar 27, 2024 · f7c491c · f7c491c
1 parent 111e912
commit f7c491c
Show file tree

Hide file tree

Showing 13 changed files with 977 additions and 1,042 deletions.
diff --git a/include/tatami_stats/counts.hpp b/include/tatami_stats/counts.hpp
@@ -16,6 +16,12 @@
 
 namespace tatami_stats {
 
+/**
+ * @brief Functions for computing dimension-wise counts.
+ * @namespace tatami_stats::counts
+ */
+namespace counts {
+
 /**
  * Count the number of values in each dimension element that satisfy the `condition`.
  *
@@ -28,12 +34,12 @@ namespace tatami_stats {
  * @param p Pointer to a `tatami::Matrix`.
  * @param[out] output Pointer to an array of length equal to the number of rows (if `row = true`) or columns (otherwise).
  * On output, this will contain the row/column variances.
- * @param threads Number of threads to use.
+ * @param num_threads Number of threads to use.
  * @param condition Function that accepts a `Value_` and returns a boolean.
- * If NaNs might be present in `p`, this should be handled by `condition`.
+ * This function is also responsible for handling any NaNs that might be present in `p`.
  */
 template<typename Value_, typename Index_, typename Output_, class Condition_>
-void counts(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, int threads, Condition_ condition) {
+void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, int num_threads, Condition_ condition) {
     auto dim = (row ? p->nrow() : p->ncol());
     auto otherdim = (row ? p->ncol() : p->nrow());
     std::fill(output, output + dim, 0);
@@ -60,7 +66,7 @@ void counts(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output,
                     }
                     output[x + start] = target;
                 }
-            }, dim, threads);
+            }, dim, num_threads);
 
         } else {
             tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
@@ -75,13 +81,13 @@ void counts(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output,
                     }
                     output[x + start] = target;
                 }
-            }, dim, threads);
+            }, dim, num_threads);
         }
 
     } else {
-        std::vector<Output_*> threaded_output_ptrs(threads, output);
-        std::vector<std::vector<Output_> > threaded_output(threads - 1);
-        for (int t = 1; t < threads; ++t) {
+        std::vector<Output_*> threaded_output_ptrs(num_threads, output);
+        std::vector<std::vector<Output_> > threaded_output(num_threads - 1);
+        for (int t = 1; t < num_threads; ++t) {
             auto& curout = threaded_output[t - 1];
             curout.resize(dim);
             threaded_output_ptrs[t] = curout.data();
@@ -114,7 +120,7 @@ void counts(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output,
                         curoutput[d] += len - nonzeros[d];
                     }
                 }
-            }, otherdim, threads);
+            }, otherdim, num_threads);
 
         } else {
             tatami::parallelize([&](int t, Index_ start, Index_ len) -> void {
@@ -128,10 +134,10 @@ void counts(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output,
                         curoutput[j] += condition(ptr[j]);
                     }
                 }
-            }, otherdim, threads);
+            }, otherdim, num_threads);
         }
 
-        for (int t = 1; t < threads; ++t) {
+        for (int t = 1; t < num_threads; ++t) {
             auto curoutput = threaded_output_ptrs[t];
             for (Index_ d = 0; d < dim; ++d) {
                 output[d] += curoutput[d];
@@ -140,141 +146,219 @@ void counts(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output,
     }
 }
 
+/**
+ * @brief Functions for counting NaNs on each dimension.
+ * @namespace tatami_stats::counts::nan
+ */
+namespace nan {
+
+/**
+ * @brief NaN-counting options.
+ */
+struct Options {
+    /**
+     * Number of threads to use when obtaining counts across a `tatami::Matrix`.
+     */
+    int num_threads = 1;
+};
+
 /**
  * @tparam Value_ Type of the matrix value, should be summable.
  * @tparam Index_ Type of the row/column indices.
  * @tparam Output_ Type of the output value.
  * This should be at least large enough to hold the dimensions of `p`.
  *
+ * @param row Whether to obtain a count for each row.
  * @param p Pointer to a `tatami::Matrix`.
  * @param[out] output Pointer to an array of length equal to the number of rows.
  * On output, this will store the number of NaNs in each row.
- * @param threads Number of threads to use.
+ * @param nopt Counting options.
  */
 template<typename Value_, typename Index_, typename Output_>
-void row_nan_counts(const tatami::Matrix<Value_, Index_>* p, Output_* output, int threads = 1) {
-    counts(true, p, output, threads, [](Value_ x) -> bool { return std::isnan(x); });
+void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const Options& nopt) {
+    counts::apply(row, p, output, nopt.num_threads, [](Value_ x) -> bool { return std::isnan(x); });
 }
 
 /**
+ * Wrapper around `apply()` for row NaN counts.
+ *
  * @tparam Output_ Type of the output value.
  * @tparam Value_ Type of the matrix value, should be summable.
  * @tparam Index_ Type of the row/column indices.
  *
  * @param p Pointer to a `tatami::Matrix`.
- * @param threads Number of threads to use.
+ * @param nopt Counting options.
  *
  * @return A vector of length equal to the number of rows, containing the number of NaNs in each row.
  */
 template<typename Output_ = int, typename Value_, typename Index_>
-std::vector<Output_> row_nan_counts(const tatami::Matrix<Value_, Index_>* p, int threads = 1) {
+std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& nopt) {
     std::vector<Output_> output(p->nrow());
-    row_nan_counts(p, output.data(), threads);
+    apply(true, p, output.data(), nopt);
     return output;
 }
 
 /**
+ * Overload with default options.
+ *
+ * @tparam Output_ Type of the output value.
  * @tparam Value_ Type of the matrix value, should be summable.
  * @tparam Index_ Type of the row/column indices.
- * @tparam Output_ Type of the output value.
- * This should be at least large enough to hold the dimensions of `p`.
  *
  * @param p Pointer to a `tatami::Matrix`.
- * @param[out] output Pointer to an array of length equal to the number of columns.
- * On output, this will store the number of NaNs in each column.
- * @param threads Number of threads to use.
+ * @return A vector of length equal to the number of rows, containing the number of NaNs in each row.
  */
-template<typename Value_, typename Index_, typename Output_>
-void column_nan_counts(const tatami::Matrix<Value_, Index_>* p, Output_* output, int threads = 1) {
-    counts(false, p, output, threads, [](Value_ x) -> bool { return std::isnan(x); });
+template<typename Output_ = int, typename Value_, typename Index_>
+std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
+    return by_row(p, Options());
 }
 
 /**
+ * Wrapper around `apply()` for column NaN counts.
+ *
  * @tparam Output_ Type of the output value.
  * This should be at least large enough to hold the dimensions of `p`.
  * @tparam Value_ Type of the matrix value, should be summable.
  * @tparam Index_ Type of the row/column indices.
  *
  * @param p Pointer to a `tatami::Matrix`.
- * @param threads Number of threads to use.
+ * @param nopt Counting options.
  *
  * @return A vector of length equal to the number of columns, containing the number of NaNs in each column.
  */
 template<typename Output_ = int, typename Value_, typename Index_>
-std::vector<Output_> column_nan_counts(const tatami::Matrix<Value_, Index_>* p, int threads = 1) {
+std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& nopt) {
     std::vector<Output_> output(p->ncol());
-    column_nan_counts(p, output.data(), threads);
+    apply(false, p, output.data(), nopt);
     return output;
 }
 
+/**
+ * Overload with default options.
+ *
+ * @tparam Output_ Type of the output value.
+ * This should be at least large enough to hold the dimensions of `p`.
+ * @tparam Value_ Type of the matrix value, should be summable.
+ * @tparam Index_ Type of the row/column indices.
+ *
+ * @param p Pointer to a `tatami::Matrix`.
+ *
+ * @return A vector of length equal to the number of columns, containing the number of NaNs in each column.
+ */
+template<typename Output_ = int, typename Value_, typename Index_>
+std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
+    return by_column(p, Options());
+}
+
+}
+
+/**
+ * @brief Functions for counting zeros on each dimension.
+ * @namespace tatami_stats::counts::zero
+ */
+namespace zero {
+
+/**
+ * @brief Zero-counting options.
+ */
+struct Options {
+    /**
+     * Number of threads to use when obtaining counts across a `tatami::Matrix`.
+     */
+    int num_threads = 1;
+};
+
 /**
  * @tparam Value_ Type of the matrix value, should be summable.
  * @tparam Index_ Type of the row/column indices.
  * @tparam Output_ Type of the output value.
  * This should be at least large enough to hold the dimensions of `p`.
  *
+ * @param row Whether to obtain a count for each row.
  * @param p Pointer to a `tatami::Matrix`.
  * @param[out] output Pointer to an array of length equal to the number of rows.
  * On output, this will store the number of zeros in each row.
- * @param threads Number of threads to use.
+ * @param zopt Counting options.
  */
 template<typename Value_, typename Index_, typename Output_>
-void row_zero_counts(const tatami::Matrix<Value_, Index_>* p, Output_* output, int threads = 1) {
-    counts(true, p, output, threads, [](Value_ x) -> bool { return x == 0; });
+void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const Options& zopt) {
+    counts::apply(row, p, output, zopt.num_threads, [](Value_ x) -> bool { return x == 0; });
 }
 
 /**
- * @tparam Output_ Type of the output value.
- * This should be at least large enough to hold the dimensions of `p`.
+ * Wrapper around `apply()` for row-wise zero counts.
+ *
  * @tparam Value_ Type of the matrix value, should be summable.
  * @tparam Index_ Type of the row/column indices.
+ * @tparam Output_ Type of the output value.
+ * This should be at least large enough to hold the dimensions of `p`.
  *
  * @param p Pointer to a `tatami::Matrix`.
- * @param threads Number of threads to use.
- *
- * @return A vector of length equal to the number of rows, containing the number of zeros in each row.
+ * @param zopt Counting options.
  */
 template<typename Output_ = int, typename Value_, typename Index_>
-std::vector<Output_> row_zero_counts(const tatami::Matrix<Value_, Index_>* p, int threads = 1) {
+std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& zopt) {
     std::vector<Output_> output(p->nrow());
-    row_zero_counts(p, output.data(), threads);
+    apply(true, p, output.data(), zopt);
     return output;
 }
 
 /**
- * @tparam Value_ Type of the matrix value, should be summable.
- * @tparam Index_ Type of the row/column indices.
+ * Overload with default options. 
+ *
  * @tparam Output_ Type of the output value.
  * This should be at least large enough to hold the dimensions of `p`.
+ * @tparam Value_ Type of the matrix value, should be summable.
+ * @tparam Index_ Type of the row/column indices.
  *
  * @param p Pointer to a `tatami::Matrix`.
- * @param[out] output Pointer to an array of length equal to the number of columns.
- * On output, this will store the number of zeros in each column.
- * @param threads Number of threads to use.
+ *
+ * @return A vector of length equal to the number of rows, containing the number of zeros in each row.
  */
-template<typename Value_, typename Index_, typename Output_>
-void column_zero_counts(const tatami::Matrix<Value_, Index_>* p, Output_* output, int threads = 1) {
-    counts(false, p, output, threads, [](Value_ x) -> bool { return x == 0; });
+template<typename Output_ = int, typename Value_, typename Index_>
+std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
+    return by_row(p, Options());
 }
 
 /**
+ * Wrapper around `apply()` for column-wise zero counts.
+ *
  * @tparam Output_ Type of the output value.
  * This should be at least large enough to hold the dimensions of `p`.
  * @tparam Value_ Type of the matrix value, should be summable.
  * @tparam Index_ Type of the row/column indices.
  *
  * @param p Pointer to a `tatami::Matrix`.
- * @param threads Number of threads to use.
+ * @param zopt Counting options.
  *
  * @return A vector of length equal to the number of columns, containing the number of zeros in each column.
  */
 template<typename Output_ = int, typename Value_, typename Index_>
-std::vector<Output_> column_zero_counts(const tatami::Matrix<Value_, Index_>* p, int threads = 1) {
+std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& zopt) {
     std::vector<Output_> output(p->ncol());
-    column_zero_counts(p, output.data(), threads);
+    apply(false, p, output.data(), zopt);
     return output;
 }
 
+/**
+ * @tparam Output_ Type of the output value.
+ * This should be at least large enough to hold the dimensions of `p`.
+ * @tparam Value_ Type of the matrix value, should be summable.
+ * @tparam Index_ Type of the row/column indices.
+ *
+ * @param p Pointer to a `tatami::Matrix`.
+ *
+ * @return A vector of length equal to the number of columns, containing the number of zeros in each column.
+ */
+template<typename Output_ = int, typename Value_, typename Index_>
+std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
+    return by_column(p, Options());
+}
+
+}
+
+}
+
 }
 
 #endif
diff --git a/include/tatami_stats/grouped_medians.hpp b/include/tatami_stats/grouped_medians.hpp
@@ -51,7 +51,7 @@ void grouped_medians(const tatami::Matrix<Value_, Index_>* p, const Group_* grou
 
                 for (size_t g = 0; g < ngroups; ++g, ++curoutput) {
                     auto& w = workspace[g];
-                    *curoutput = median::compute(w.data(), w.size(), static_cast<size_t>(group_sizes[g]));
+                    *curoutput = medians::direct(w.data(), w.size(), static_cast<size_t>(group_sizes[g]), false);
                     w.clear();
                 }
             }
@@ -65,7 +65,7 @@ void grouped_medians(const tatami::Matrix<Value_, Index_>* p, const Group_* grou
                 }
 
                 for (auto& w : workspace) {
-                    *curoutput = median::compute(w.data(), w.size());
+                    *curoutput = medians::direct(w.data(), w.size(), false);
                     ++curoutput;
                     w.clear();
                 }