diff --git a/docs/doxygen-awesome.css b/docs/doxygen-awesome.css index 6244a1e..c2f4114 100644 --- a/docs/doxygen-awesome.css +++ b/docs/doxygen-awesome.css @@ -1753,7 +1753,7 @@ table.fieldtable th { color: var(--tablehead-foreground); } -table.fieldtable td.fieldtype, .fieldtable td.fieldname, .fieldtable td.fielddoc, .fieldtable th { +table.fieldtable td.fieldtype, .fieldtable td.fieldname, .fieldtable td.fieldinit, .fieldtable td.fielddoc, .fieldtable th { border-bottom: 1px solid var(--separator-color); border-right: 1px solid var(--separator-color); } diff --git a/docs/index.html b/docs/index.html index 945d362..03bbb83 100644 --- a/docs/index.html +++ b/docs/index.html @@ -101,7 +101,7 @@

Quick start

vopt.num_threads = 5;
auto col_mean_and_var = tatami_stats::variances::by_column(mat.get(), vopt);
tatami::DenseRowMatrix
-
tatami_stats::medians::by_row
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > *p, const Options &mopt)
Definition medians.hpp:310
+
tatami_stats::medians::by_row
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > *p, const Options &mopt)
Definition medians.hpp:315
tatami_stats::variances::by_column
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > *p, const Options &vopt)
Definition variances.hpp:480
tatami_stats::variances::Options
Variance calculation options.
Definition variances.hpp:30
tatami_stats::variances::Options::num_threads
int num_threads
Definition variances.hpp:41
diff --git a/docs/medians_8hpp_source.html b/docs/medians_8hpp_source.html index 3bf27a1..e92619b 100644 --- a/docs/medians_8hpp_source.html +++ b/docs/medians_8hpp_source.html @@ -161,171 +161,176 @@
109 // immediately before 'halfway' in the sort order, we just need to find the
110 // maximum from '[0, halfway)'.
111 Output_ other = *std::max_element(ptr, ptr + halfway);
-
112 return (medtmp + other)/2;
-
113}
+
112
+
113 // Avoid FP overflow, preserve exactness of the median if medtmp == other.
+
114 return medtmp + (other - medtmp)/2;
+
115}
-
114
-
132template<typename Output_ = double, typename Value_, typename Index_>
-
-
133Output_ direct(Value_* value, Index_ num_nonzero, Index_ num_all, bool skip_nan) {
-
134 // Fallback to the dense code if there are no structural zeros. This is not
-
135 // just for efficiency as the downstream averaging code assumes that there
-
136 // is at least one structural zero when considering its scenarios.
-
137 if (num_nonzero == num_all) {
-
138 return direct<Output_>(value, num_all, skip_nan);
-
139 }
-
140
-
141 ::tatami_stats::internal::nanable_ifelse<Value_>(
-
142 skip_nan,
-
143 [&]() {
-
144 auto lost = internal::translocate_nans(value, num_nonzero);
-
145 value += lost;
-
146 num_nonzero -= lost;
-
147 num_all -= lost;
-
148 },
-
149 [&]() {}
-
150 );
-
151
-
152 // Is the number of non-zeros less than the number of zeros?
-
153 // If so, the median must be zero. Note that we calculate it
-
154 // in this way to avoid overflow from 'num_nonzero * 2'.
-
155 if (num_nonzero < num_all - num_nonzero) {
-
156 return 0;
-
157 }
-
158
-
159 size_t halfway = num_all / 2;
-
160 bool is_even = (num_all % 2 == 0);
-
161
-
162 size_t num_zero = num_all - num_nonzero;
-
163 size_t num_negative = 0;
-
164 for (Index_ i = 0; i < num_nonzero; ++i) {
-
165 num_negative += (value[i] < 0);
-
166 }
-
167
-
168 if (!is_even) {
-
169 if (num_negative > halfway) {
-
170 std::nth_element(value, value + halfway, value + num_nonzero);
-
171 return value[halfway];
-
172
-
173 } else if (halfway >= num_negative + num_zero) {
-
174 size_t skip_zeros = halfway - num_zero;
-
175 std::nth_element(value, value + skip_zeros, value + num_nonzero);
-
176 return value[skip_zeros];
-
177
-
178 } else {
-
179 return 0;
-
180 }
-
181 }
-
182
-
183 Output_ tmp = 0;
-
184 if (num_negative > halfway) { // both halves of the median are negative.
-
185 std::nth_element(value, value + halfway, value + num_nonzero);
-
186 tmp = value[halfway] + *(std::max_element(value, value + halfway)); // max_element gets the sorted value at halfway - 1, see explanation for the dense case.
-
187
-
188 } else if (num_negative == halfway) { // the upper half is guaranteed to be zero.
-
189 size_t below_halfway = halfway - 1;
-
190 std::nth_element(value, value + below_halfway, value + num_nonzero);
-
191 tmp = value[below_halfway];
-
192
-
193 } else if (num_negative < halfway && num_negative + num_zero > halfway) { // both halves are zero, so zero is the median.
-
194 ;
+
116
+
134template<typename Output_ = double, typename Value_, typename Index_>
+
+
135Output_ direct(Value_* value, Index_ num_nonzero, Index_ num_all, bool skip_nan) {
+
136 // Fallback to the dense code if there are no structural zeros. This is not
+
137 // just for efficiency as the downstream averaging code assumes that there
+
138 // is at least one structural zero when considering its scenarios.
+
139 if (num_nonzero == num_all) {
+
140 return direct<Output_>(value, num_all, skip_nan);
+
141 }
+
142
+
143 ::tatami_stats::internal::nanable_ifelse<Value_>(
+
144 skip_nan,
+
145 [&]() {
+
146 auto lost = internal::translocate_nans(value, num_nonzero);
+
147 value += lost;
+
148 num_nonzero -= lost;
+
149 num_all -= lost;
+
150 },
+
151 [&]() {}
+
152 );
+
153
+
154 // Is the number of non-zeros less than the number of zeros?
+
155 // If so, the median must be zero. Note that we calculate it
+
156 // in this way to avoid overflow from 'num_nonzero * 2'.
+
157 if (num_nonzero < num_all - num_nonzero) {
+
158 return 0;
+
159 }
+
160
+
161 size_t halfway = num_all / 2;
+
162 bool is_even = (num_all % 2 == 0);
+
163
+
164 size_t num_zero = num_all - num_nonzero;
+
165 size_t num_negative = 0;
+
166 for (Index_ i = 0; i < num_nonzero; ++i) {
+
167 num_negative += (value[i] < 0);
+
168 }
+
169
+
170 if (!is_even) {
+
171 if (num_negative > halfway) {
+
172 std::nth_element(value, value + halfway, value + num_nonzero);
+
173 return value[halfway];
+
174
+
175 } else if (halfway >= num_negative + num_zero) {
+
176 size_t skip_zeros = halfway - num_zero;
+
177 std::nth_element(value, value + skip_zeros, value + num_nonzero);
+
178 return value[skip_zeros];
+
179
+
180 } else {
+
181 return 0;
+
182 }
+
183 }
+
184
+
185 Output_ baseline = 0, other = 0;
+
186 if (num_negative > halfway) { // both halves of the median are negative.
+
187 std::nth_element(value, value + halfway, value + num_nonzero);
+
188 baseline = value[halfway];
+
189 other = *(std::max_element(value, value + halfway)); // max_element gets the sorted value at halfway - 1, see explanation for the dense case.
+
190
+
191 } else if (num_negative == halfway) { // the upper half is guaranteed to be zero.
+
192 size_t below_halfway = halfway - 1;
+
193 std::nth_element(value, value + below_halfway, value + num_nonzero);
+
194 other = value[below_halfway]; // set to other so that addition/subtraction of a zero baseline has no effect on precision.
195
-
196 } else if (num_negative + num_zero == halfway) { // the lower half is guaranteed to be zero.
-
197 size_t skip_zeros = halfway - num_zero;
-
198 std::nth_element(value, value + skip_zeros, value + num_nonzero);
-
199 tmp = value[skip_zeros];
-
200
-
201 } else { // both halves of the median are non-negative.
-
202 size_t skip_zeros = halfway - num_zero;
-
203 std::nth_element(value, value + skip_zeros, value + num_nonzero);
-
204 tmp = value[skip_zeros] + *(std::max_element(value, value + skip_zeros)); // max_element gets the sorted value at skip_zeros - 1, see explanation for the dense case.
-
205 }
-
206
-
207 return tmp / 2;
-
208}
+
196 } else if (num_negative < halfway && num_negative + num_zero > halfway) { // both halves are zero, so zero is the median.
+
197 ;
+
198
+
199 } else if (num_negative + num_zero == halfway) { // the lower half is guaranteed to be zero.
+
200 size_t skip_zeros = halfway - num_zero;
+
201 std::nth_element(value, value + skip_zeros, value + num_nonzero);
+
202 other = value[skip_zeros]; // set to other so that addition/subtraction of a zero baseline has no effect on precision.
+
203
+
204 } else { // both halves of the median are non-negative.
+
205 size_t skip_zeros = halfway - num_zero;
+
206 std::nth_element(value, value + skip_zeros, value + num_nonzero);
+
207 baseline = value[skip_zeros];
+
208 other = *(std::max_element(value, value + skip_zeros)); // max_element gets the sorted value at skip_zeros - 1, see explanation for the dense case.
+
209 }
+
210
+
211 // Avoid FP overflow, preserve exactness of the median if baseline == other.
+
212 return baseline + (other - baseline) / 2;
+
213}
-
209
-
225template<typename Value_, typename Index_, typename Output_>
-
-
226void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const medians::Options& mopt) {
-
227 auto dim = (row ? p->nrow() : p->ncol());
-
228 auto otherdim = (row ? p->ncol() : p->nrow());
-
229
-
230 if (p->sparse()) {
-
231 tatami::Options opt;
-
232 opt.sparse_extract_index = false;
-
233 opt.sparse_ordered_index = false; // we'll be sorting by value anyway.
+
214
+
230template<typename Value_, typename Index_, typename Output_>
+
+
231void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const medians::Options& mopt) {
+
232 auto dim = (row ? p->nrow() : p->ncol());
+
233 auto otherdim = (row ? p->ncol() : p->nrow());
234
-
235 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
-
236 auto ext = tatami::consecutive_extractor<true>(p, row, s, l, opt);
-
237 std::vector<Value_> buffer(otherdim);
-
238 auto vbuffer = buffer.data();
-
239 for (Index_ x = 0; x < l; ++x) {
-
240 auto range = ext->fetch(vbuffer, NULL);
-
241 tatami::copy_n(range.value, range.number, vbuffer);
-
242 output[x + s] = medians::direct<Output_>(vbuffer, range.number, otherdim, mopt.skip_nan);
-
243 }
-
244 }, dim, mopt.num_threads);
-
245
-
246 } else {
-
247 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
-
248 std::vector<Value_> buffer(otherdim);
-
249 auto ext = tatami::consecutive_extractor<false>(p, row, s, l);
-
250 for (Index_ x = 0; x < l; ++x) {
-
251 auto ptr = ext->fetch(buffer.data());
-
252 tatami::copy_n(ptr, otherdim, buffer.data());
-
253 output[x + s] = medians::direct<Output_>(buffer.data(), otherdim, mopt.skip_nan);
-
254 }
-
255 }, dim, mopt.num_threads);
-
256 }
-
257}
+
235 if (p->sparse()) {
+
236 tatami::Options opt;
+
237 opt.sparse_extract_index = false;
+
238 opt.sparse_ordered_index = false; // we'll be sorting by value anyway.
+
239
+
240 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
+
241 auto ext = tatami::consecutive_extractor<true>(p, row, s, l, opt);
+
242 std::vector<Value_> buffer(otherdim);
+
243 auto vbuffer = buffer.data();
+
244 for (Index_ x = 0; x < l; ++x) {
+
245 auto range = ext->fetch(vbuffer, NULL);
+
246 tatami::copy_n(range.value, range.number, vbuffer);
+
247 output[x + s] = medians::direct<Output_>(vbuffer, range.number, otherdim, mopt.skip_nan);
+
248 }
+
249 }, dim, mopt.num_threads);
+
250
+
251 } else {
+
252 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
+
253 std::vector<Value_> buffer(otherdim);
+
254 auto ext = tatami::consecutive_extractor<false>(p, row, s, l);
+
255 for (Index_ x = 0; x < l; ++x) {
+
256 auto ptr = ext->fetch(buffer.data());
+
257 tatami::copy_n(ptr, otherdim, buffer.data());
+
258 output[x + s] = medians::direct<Output_>(buffer.data(), otherdim, mopt.skip_nan);
+
259 }
+
260 }, dim, mopt.num_threads);
+
261 }
+
262}
-
258
-
272template<typename Output_ = double, typename Value_, typename Index_>
-
-
273std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& mopt) {
-
274 std::vector<Output_> output(p->ncol());
-
275 apply(false, p, output.data(), mopt);
-
276 return output;
-
277}
+
263
+
277template<typename Output_ = double, typename Value_, typename Index_>
+
+
278std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& mopt) {
+
279 std::vector<Output_> output(p->ncol());
+
280 apply(false, p, output.data(), mopt);
+
281 return output;
+
282}
-
278
-
291template<typename Output_ = double, typename Value_, typename Index_>
-
-
292std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
-
293 return by_column(p, Options());
-
294}
+
283
+
296template<typename Output_ = double, typename Value_, typename Index_>
+
+
297std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
+
298 return by_column(p, Options());
+
299}
-
295
-
309template<typename Output_ = double, typename Value_, typename Index_>
-
-
310std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& mopt) {
-
311 std::vector<Output_> output(p->nrow());
-
312 apply(true, p, output.data(), mopt);
-
313 return output;
-
314}
+
300
+
314template<typename Output_ = double, typename Value_, typename Index_>
+
+
315std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& mopt) {
+
316 std::vector<Output_> output(p->nrow());
+
317 apply(true, p, output.data(), mopt);
+
318 return output;
+
319}
-
315
-
328template<typename Output_ = double, typename Value_, typename Index_>
-
-
329std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
-
330 return by_row(p, Options());
-
331}
+
320
+
333template<typename Output_ = double, typename Value_, typename Index_>
+
+
334std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
+
335 return by_row(p, Options());
+
336}
-
332
-
333}
-
334
-
335}
-
336
-
337#endif
+
337
+
338}
+
339
+
340}
+
341
+
342#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Output_ direct(Value_ *ptr, Index_ num, bool skip_nan)
Definition medians.hpp:82
-
void apply(bool row, const tatami::Matrix< Value_, Index_ > *p, Output_ *output, const medians::Options &mopt)
Definition medians.hpp:226
-
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > *p, const Options &mopt)
Definition medians.hpp:310
-
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > *p, const Options &mopt)
Definition medians.hpp:273
+
void apply(bool row, const tatami::Matrix< Value_, Index_ > *p, Output_ *output, const medians::Options &mopt)
Definition medians.hpp:231
+
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > *p, const Options &mopt)
Definition medians.hpp:315
+
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > *p, const Options &mopt)
Definition medians.hpp:278
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, Index_ tasks, int threads)
Value_ * copy_n(const Value_ *input, Size_ n, Value_ *output)