Cleaned up the docstrings.

tatami-inc · Jun 25, 2024 · c444f2b · c444f2b
1 parent 91878d6
commit c444f2b
Show file tree

Hide file tree

Showing 7 changed files with 63 additions and 53 deletions.
diff --git a/include/tatami_stats/counts.hpp b/include/tatami_stats/counts.hpp
@@ -23,14 +23,15 @@ namespace tatami_stats {
 namespace counts {
 
 /**
- * Count the number of values in each dimension element that satisfy the `condition`.
+ * Count the number of values that satisfy the `condition` in each element of a chosen dimension.
  *
  * @tparam Value_ Type of the matrix value, should be numeric.
  * @tparam Index_ Type of the row/column indices.
  * @tparam Output_ Type of the output value.
  * This should be at least large enough to hold the dimensions of `p`.
  *
- * @param row Whether to count in each row.
+ * @param row Whether to perform the count within each row.
+ * If false, the count is performed within each column instead.
  * @param p Pointer to a `tatami::Matrix`.
  * @param[out] output Pointer to an array of length equal to the number of rows (if `row = true`) or columns (otherwise).
  * On output, this will contain the row/column variances.

diff --git a/include/tatami_stats/grouped_medians.hpp b/include/tatami_stats/grouped_medians.hpp
@@ -47,7 +47,8 @@ struct Options {
  * @tparam Output_ Type of the output value.
  * This should be floating-point to store potential averages.
  *
- * @param row Whether to compute medians for the rows.
+ * @param row Whether to compute group-wise medians within each row.
+ * If false, medians are computed in each column instead.
  * @param p Pointer to a `tatami::Matrix`.
  * @param[in] group Pointer to an array of length equal to the number of columns (if `row = true`) or rows (otherwise).
  * Each value should be an integer that specifies the group assignment.

diff --git a/include/tatami_stats/grouped_sums.hpp b/include/tatami_stats/grouped_sums.hpp
@@ -46,12 +46,13 @@ struct Options {
  * @tparam Output_ Type of the output value.
  * This should be floating-point to store potential averages.
  *
- * @param row Whether to compute sums for the rows.
+ * @param row Whether to compute group-wise sums within each row.
+ * If false, sums are computed within the column instead.
  * @param p Pointer to a `tatami::Matrix`.
  * @param[in] group Pointer to an array of length equal to the number of columns (if `row = true`) or rows (otherwise).
  * Each value should be an integer that specifies the group assignment.
- * Values should lie in `[0, N)` where `N` is the number of unique groups.
- * @param num_groups Number of groups, i.e., `N`.
+ * Values should lie in \f$[0, N)\f$ where \f$N\f$ is the number of unique groups.
+ * @param num_groups Number of groups, i.e., \f$N\f$.
  * This can be determined by calling `tatami_stats::total_groups()` on `group`.
  * @param[out] output Pointer to an array of pointers of length equal to the number of groups.
  * Each inner pointer should reference an array of length equal to the number of rows (if `row = true`) or columns (otherwise).
@@ -196,7 +197,7 @@ void apply(bool row, const tatami::Matrix<Value_, Index_>* p, const Group_* grou
  * @param p Pointer to a `tatami::Matrix`.
  * @param[in] group Pointer to an array of length equal to the number of columns.
  * Each value should be an integer that specifies the group assignment.
- * Values should lie in `[0, N)` where `N` is the number of unique groups.
+ * Values should lie in \f$[0, N)\f$ where \f$N\f$ is the number of unique groups.
  * @param sopt Summation options.
  *
  * @return Vector of length equal to the number of groups.
@@ -230,7 +231,7 @@ std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>*
  * @param p Pointer to a `tatami::Matrix`.
  * @param[in] group Pointer to an array of length equal to the number of columns.
  * Each value should be an integer that specifies the group assignment.
- * Values should lie in `[0, N)` where `N` is the number of unique groups.
+ * Values should lie in \f$[0, N)\f$ where \f$N\f$ is the number of unique groups.
  *
  * @return Vector of length equal to the number of groups.
  * Each entry is a vector of length equal to the number of rows, containing the row-wise sums for the corresponding group.
@@ -251,7 +252,7 @@ std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>*
  * @param p Pointer to a `tatami::Matrix`.
  * @param[in] group Pointer to an array of length equal to the number of rows.
  * Each value should be an integer that specifies the group assignment.
- * Values should lie in `[0, N)` where `N` is the number of unique groups.
+ * Values should lie in \f$[0, N)\f$ where \f$N\f$ is the number of unique groups.
  * @param sopt Summation options.
  *
  * @return Vector of length equal to the number of groups.
@@ -285,7 +286,7 @@ std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_
  * @param p Pointer to a `tatami::Matrix`.
  * @param[in] group Pointer to an array of length equal to the number of rows.
  * Each value should be an integer that specifies the group assignment.
- * Values should lie in `[0, N)` where `N` is the number of unique groups.
+ * Values should lie in \f$[0, N)\f$ where \f$N\f$ is the number of unique groups.
  *
  * @return Vector of length equal to the number of groups.
  * Each entry is a vector of length equal to the number of columns, containing the column-wise sums for the corresponding group.

diff --git a/include/tatami_stats/medians.hpp b/include/tatami_stats/medians.hpp
@@ -62,11 +62,11 @@ Index_ translocate_nans(Value_* ptr, Index_& num) {
  */
 
 /**
- * Directly compute the median from a dense vector.
+ * Directly compute the median from a dense objective vector.
  *
- * @param[in] ptr Pointer to an array of values.
+ * @param[in] ptr Pointer to an array of length `num`, containing the values of the objective vector.
  * This may be modified on output.
- * @param num Length of the array.
+ * @param num Length of the objective vector, i.e., length of the array at `ptr`.
  * @param skip_nan See `Options::skip_nan` for details.
  *
  * @tparam Output_ Type of the output value.
@@ -103,12 +103,12 @@ Output_ direct(Value_* ptr, Index_ num, bool skip_nan) {
 }
 
 /**
- * Directly compute the median from a sparse vector.
+ * Directly compute the median from a sparse objective vector.
  *
- * @param[in] value Pointer to an array of structural non-zero values.
+ * @param[in] value Pointer to an array of length `num_nonzero`, containing values of the structural non-zeroes.
  * This may be modified on output.
- * @param num_nonzero Number of non-zero elements, i.e., the length of the array referenced by `ptr`.
- * @param num_all Total number of elements in the set,
+ * @param num_nonzero Number of structural non-zeros in the objective vector.
+ * @param num_all Length of the obejctive vector, including the structural zeros,
  * i.e., `num_all - num_nonzero` is the number of zeros.
  * @param skip_nan See `Options::skip_nan` for details.
  *
@@ -182,7 +182,8 @@ Output_ direct(Value_* value, Index_ num_nonzero, Index_ num_all, bool skip_nan)
  * @tparam Output_ Type of the output value.
  * This should be floating-point to store potential averages.
  *
- * @param row Whether to compute medians for the rows.
+ * @param row Whether to compute the median for each row.
+ * If false, the median is computed for each column instead.
  * @param p Pointer to a `tatami::Matrix`.
  * @param[out] output Pointer to an array of length equal to the number of rows (if `row = true`) or columns (otherwise).
  * On output, this will contain the row/column medians.

diff --git a/include/tatami_stats/ranges.hpp b/include/tatami_stats/ranges.hpp
@@ -77,15 +77,15 @@ bool is_better(Output_ best, Value_ alt) {
  */
 
 /**
- * Directly compute the minimum or maximum of a dense array.
+ * Directly compute the minimum or maximum of a dense objective vector.
  *
  * @tparam minimum_ Whether to compute the minimum.
  * If false, the maximum is computed instead.
  * @tparam Value_ Type of the input data.
  * @tparam Index_ Type of the row/column indices.
  *
- * @param[in] ptr Pointer to an array of values of length `num`.
- * @param num Size of the array.
+ * @param[in] ptr Pointer to an array of length `num`, containing the values of the objective vector.
+ * @param num Length of the objective vector, i.e., length of the array at `ptr`.
  * @param skip_nan See `Options::skip_nan` for details.
  *
  * @return The minimum or maximum value, depending on `minimum_`.
@@ -117,16 +117,16 @@ Value_ direct(const Value_* ptr, Index_ num, bool skip_nan) {
 }
 
 /**
- * Compute the extremes of a sparse array.
+ * Compute the extremes of a sparse objective vector.
  *
  * @tparam minimum_ Whether to compute the minimum.
  * If false, the maximum is computed instead.
  * @tparam Value_ Type of the input data.
  * @tparam Index_ Type of the row/column indices.
  *
- * @param[in] value Pointer to an array of values of length `num`.
- * @param num_nonzero Length of the array pointed to by `value`.
- * @param num_all Total number of values in the dataset, including the zeros not in `value`.
+ * @param[in] value Pointer to an array of length `num_nonzero`, containing the values of the structural non-zeros.
+ * @param num_nonzero Number of structural non-zeros in the objective vector.
+ * @param num_all Length of the objective vector, including the structural zeros not in `value`.
  * This should be greater than or equal to `num_nonzero`.
  * @param skip_nan See `Options::skip_nan` for details.
  *
@@ -152,13 +152,14 @@ Value_ direct(const Value_* value, Index_ num_nonzero, Index_ num_all, bool skip
 /**
  * @brief Running minima/maxima from dense data.
  *
- * This considers a scenario with a set of equilength "objective" vectors [V1, V2, V3, ..., Vn],
- * but data are only available for "observed" vectors [P1, P2, P3, ..., Pm],
- * where Pi[j] contains the i-th element of objective vector Vj.
- * The idea is to repeatedly call `add()` for `ptr` corresponding to observed vectors from 0 to m - 1,
+ * This considers a scenario with a set of equilength "objective" vectors \f$[v_1, v_2, v_3, ..., v_n]\f$,
+ * but data are only available for "observed" vectors \f$[p_1, p_2, p_3, ..., p_m]\f$,
+ * where the \f$j\f$-th element of \f$p_i\f$ is the \f$i\f$-th element of \f$v_j\f$.
+ * The idea is to repeatedly call `add()` for `ptr` corresponding to observed vectors from 0 to \f$m - 1\f$,
  * which computes the running minimum/maximum for each objective vector at each invocation.
  *
  * @tparam minimum_ Whether to compute the minimum.
+ * If false, the maximum is computed instead.
  * @tparam Output_ Type of the output data.
  * @tparam Value_ Type of the input data.
  * @tparam Index_ Type of the row/column indices.
@@ -167,7 +168,7 @@ template<bool minimum_, typename Output_, typename Value_, typename Index_>
 class RunningDense {
 public:
     /**
-     * @param num Number of objective vectors, i.e., n.
+     * @param num Number of objective vectors, i.e., \f$n\f$.
      * @param[out] store Pointer to an output array of length `num`.
      * After `finish()` is called, this will contain the minimum/maximum for each objective vector.
      * @param skip_nan See `Options::skip_nan` for details.
@@ -227,6 +228,7 @@ class RunningDense {
  * This does the same as `RunningDense` but for sparse observed vectors.
  *
  * @tparam minimum_ Whether to compute the minimum.
+ * If false, the maximum is computed instead.
  * @tparam Output_ Type of the output data.
  * @tparam Value_ Type of the input value.
  * @tparam Index_ Type of the row/column indices.
@@ -316,7 +318,8 @@ class RunningSparse {
  * @tparam Index_ Type of the row/column indices.
  * @tparam Output_ Type of the output value.
  *
- * @param row Whether to compute variances for the rows.
+ * @param row Whether to compute the range for each row.
+ * If false, the range is computed for each column instead.
  * @param p Pointer to a `tatami::Matrix`.
  * @param[out] min_out Pointer to an array of length equal to the number of rows (if `row = true`) or columns (otherwise).
  * On output, this will contain the minimum of each row/column.

diff --git a/include/tatami_stats/sums.hpp b/include/tatami_stats/sums.hpp
@@ -39,15 +39,16 @@ struct Options {
 };
 
 /**
- * Directly sum an array of values using naive accumulation.
+ * Compute the sum across an objective vector using naive accumulation.
  * This is best used with a sufficiently high-precision `Output_`, hence the default of `double`.
  *
  * @tparam Output_ Type of the output data.
  * @tparam Value_ Type of the input data.
  * @tparam Index_ Type of the row/column index.
  *
- * @param[in] ptr Pointer to an array of values of length `num`.
- * @param num Size of the array.
+ * @param[in] ptr Pointer to an array of length `num`, containing the values of the objective vector.
+ * @param num Size of the array at `ptr`.
+ * This may be less than the length of the objective vector for sparse data.
  * @param skip_nan See `Options::skip_nan`.
  * @return The sum.
  */
@@ -70,10 +71,10 @@ Output_ direct(const Value_* ptr, Index_ num, bool skip_nan) {
 /**
  * @brief Running sums from dense data.
  *
- * This considers a scenario with a set of equilength "objective" vectors [V1, V2, V3, ..., Vn],
- * but data are only available for "observed" vectors [P1, P2, P3, ..., Pm],
- * where Pi[j] contains the i-th element of objective vector Vj.
- * The idea is to repeatedly call `add()` for `ptr` corresponding to observed vectors from 0 to m - 1,
+ * This considers a scenario with a set of equilength "objective" vectors \f$[v_1, v_2, v_3, ..., v_n]\f$,
+ * but data are only available for "observed" vectors \f$[p_1, p_2, p_3, ..., p_m]\f$,
+ * where the \f$j\f$-th element of \f$p_i\f$ is the \f$i\f$-th element of \f$v_j\f$.
+ * The idea is to repeatedly call `add()` for `ptr` corresponding to observed vectors from 0 to \f$m - 1\f$,
  * and then finally call `finish()` to obtain the sum for each objective vector.
  *
  * This class uses naive accumulation to obtain the sum for each objective vector.
@@ -87,7 +88,7 @@ template<typename Output_, typename Value_, typename Index_>
 class RunningDense {
 public:
     /**
-     * @param num Number of objective vectors, i.e., n.
+     * @param num Number of objective vectors, i.e., \f$n\f$.
      * @param[out] sum Pointer to an output array of length `num`.
      * This should be zeroed on input, and will store the running sums after each `add()`.
      * @param skip_nan See `Options::skip_nan` for details.
@@ -182,7 +183,8 @@ class RunningSparse {
  * @tparam Index_ Type of the row/column indices.
  * @tparam Output_ Type of the output value.
  *
- * @param row Whether to compute variances for the rows.
+ * @param row Whether to compute the sum for each row.
+ * If false, the sum is computed for each column instead.
  * @param p Pointer to a `tatami::Matrix`.
  * @param[out] output Pointer to an array of length equal to the number of rows (if `row = true`) or columns (otherwise).
  * On output, this will contain the row/column variances.

diff --git a/include/tatami_stats/variances.hpp b/include/tatami_stats/variances.hpp
@@ -65,17 +65,17 @@ void add_welford_zeros(Output_& mean, Output_& sumsq, Index_ num_nonzero, Index_
  */
 
 /**
- * Compute the mean and variance from a sparse array of values.
+ * Compute the mean and variance from a sparse objective vector.
  * This uses the standard two-pass algorithm with naive accumulation of the sum of squared differences;
  * thus, it is best used with a sufficiently high-precision `Output_` like `double`.
  *
  * @tparam Output_ Type of the output data.
  * @tparam Value_ Type of the input data.
  * @tparam Index_ Type of the row/column indices.
  *
- * @param[in] value Pointer to an array of values of length `num`.
+ * @param[in] value Pointer to an array of length `num`, containing the values of the structural non-zeros.
  * @param num_nonzero Length of the array pointed to by `value`.
- * @param num_all Total number of values in the dataset, including the zeros not in `value`.
+ * @param num_all Length of the objective vector, including the structural zeros not in `value`.
  * This should be greater than or equal to `num_nonzero`.
  * @param skip_nan See `Options::skip_nan`.
  *
@@ -136,16 +136,16 @@ std::pair<Output_, Output_> direct(const Value_* value, Index_ num_nonzero, Inde
 }
 
 /**
- * Compute the mean and variance from an array of values.
+ * Compute the mean and variance from a dense objective vector.
  * This uses the standard two-pass algorithm with naive accumulation of the sum of squared differences;
  * thus, it is best used with a sufficiently high-precision `Output_` like `double`.
  *
  * @tparam Output_ Type of the output data.
  * @tparam Value_ Type of the input data.
  * @tparam Index_ Type of the row/column indices.
  *
- * @param[in] ptr Pointer to an array of values of length `num`.
- * @param num Size of the array.
+ * @param[in] ptr Pointer to an array of length `num`, containing the values of the objective vector.
+ * @param num Length of the objective vector, i.e., length of the array at `ptr`.
  * @param skip_nan See `Options::skip_nan`.
  *
  * @return The sample mean and variance of values in `[ptr, ptr + num)`.
@@ -160,10 +160,10 @@ std::pair<Output_, Output_> direct(const Value_* ptr, Index_ num, bool skip_nan)
  * @brief Running variances from dense data.
  *
  * Compute running means and variances from dense data using Welford's method.
- * This considers a scenario with a set of equilength "objective" vectors [V1, V2, V3, ..., Vn],
- * but data are only available for "observed" vectors [P1, P2, P3, ..., Pm],
- * where Pi[j] contains the i-th element of objective vector Vj.
- * The idea is to repeatedly call `add()` for `ptr` corresponding to observed vectors from 0 to m - 1,
+ * This considers a scenario with a set of equilength "objective" vectors \f$[v_1, v_2, v_3, ..., v_n]\f$,
+ * but data are only available for "observed" vectors \f$[p_1, p_2, p_3, ..., p_m]\f$,
+ * where the \f$j\f$-th element of \f$p_i\f$ is the \f$i\f$-th element of \f$v_j\f$.
+ * The idea is to repeatedly call `add()` for `ptr` corresponding to observed vectors from 0 to \f$m - 1\f$,
  * and then finally call `finish()` to obtain the mean and variance for each objective vector.
  *
  * @tparam Output_ Type of the output data.
@@ -174,7 +174,7 @@ template<typename Output_, typename Value_, typename Index_>
 class RunningDense {
 public:
     /**
-     * @param num Number of objective vectors, i.e., n.
+     * @param num Number of objective vectors, i.e., \f$n\f$.
      * @param[out] mean Pointer to an output array of length `num`.
      * This should be zeroed on input; after `finish()` is called, this will contain the means for each objective vector.
      * @param[out] variance Pointer to an output array of length `num`, containing the variances for each objective vector.
@@ -247,7 +247,7 @@ class RunningDense {
  * @brief Running variances from sparse data.
  *
  * Compute running means and variances from sparse data using Welford's method.
- * This does the same as its dense overload for sparse observed vectors.
+ * This does the same as `RunningDense` but for sparse observed vectors.
  *
  * @tparam Output_ Type of the output data.
  * @tparam Value_ Type of the input data.
@@ -354,7 +354,8 @@ class RunningSparse {
  * @tparam Index_ Type of the row/column indices.
  * @tparam Output_ Type of the output value.
  *
- * @param row Whether to compute variances for the rows.
+ * @param row Whether to compute the variance for each row.
+ * If false, the variance is computed for each column instead.
  * @param p Pointer to a `tatami::Matrix`.
  * @param[out] output Pointer to an array of length equal to the number of rows (if `row = true`) or columns (otherwise).
  * On output, this will contain the row/column variances.