Skip to content

Commit

Permalink
Improvements to cache locality and sparse vector handling (#115)
Browse files Browse the repository at this point in the history
* Re-arrange loops for cleaner code.. no-op

* Add const guarantees to a few biom methods, to make it cleaner

* Add const guarantees to a few tree methods, to make it cleaner

* Add const guarantees in unifrac.cpp, to make it cleaner

* Add biom::get_obs_data_range and su::set_proportions_range

* Implemented chunked embedding. Added related _range methods, and helper classes

* Add OpenMP directives

* Add support for float intermediate results.

* Make DEF_VEC_SIZE a function of TFloat

* Add restricts  to improve optimization

* Fix CPU alignment parameters

* Add checking fo zero overlap in UnifracUnweightedTask, also switched to 64-bit packed

* Move norrmalization inside biom

* Use explicit data type for bool to int conversion

* Default to 0, not 1

* Fix typo

* Rename variable to mirror the meaning

* Add spares logic to UnnormalizedWeighted

* Pre-compuite sums for partial-sparse use case in UnifracUnnormalizedWeightedTask

* Fix ACC typo

* Pre-compuite sums for partial-sparse use case in UnifracNnormalizedWeightedTask
  • Loading branch information
sfiligoi authored Sep 28, 2020
1 parent dc8f010 commit d065698
Show file tree
Hide file tree
Showing 9 changed files with 757 additions and 264 deletions.
57 changes: 52 additions & 5 deletions sucpp/biom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ void biom::create_id_index(std::vector<std::string> &ids,
}
}

unsigned int biom::get_obs_data_direct(std::string id, uint32_t *& current_indices_out, double *& current_data_out) {
unsigned int biom::get_obs_data_direct(const std::string &id, uint32_t *& current_indices_out, double *& current_data_out) {
uint32_t idx = obs_id_index.at(id);
uint32_t start = obs_indptr[idx];
uint32_t end = obs_indptr[idx + 1];
Expand Down Expand Up @@ -198,11 +198,12 @@ unsigned int biom::get_obs_data_direct(std::string id, uint32_t *& current_indic
return count[0];
}

void biom::get_obs_data(std::string id, double* out) {
template<class TFloat>
void biom::get_obs_data_TT(const std::string &id, TFloat* out) const {
uint32_t idx = obs_id_index.at(id);
unsigned int count = obs_counts_resident[idx];
uint32_t *indices = obs_indices_resident[idx];
double *data = obs_data_resident[idx];
const uint32_t * const indices = obs_indices_resident[idx];
const double * const data = obs_data_resident[idx];

// reset our output buffer
for(unsigned int i = 0; i < n_samples; i++)
Expand All @@ -213,7 +214,53 @@ void biom::get_obs_data(std::string id, double* out) {
}
}

unsigned int biom::get_sample_data_direct(std::string id, uint32_t *& current_indices_out, double *& current_data_out) {
void biom::get_obs_data(const std::string &id, double* out) const {
biom::get_obs_data_TT(id,out);
}

void biom::get_obs_data(const std::string &id, float* out) const {
biom::get_obs_data_TT(id,out);
}


// note: out is supposed to be fully filled, i.e. out[start:end]
template<class TFloat>
void biom::get_obs_data_range_TT(const std::string &id, unsigned int start, unsigned int end, bool normalize, TFloat* out) const {
uint32_t idx = obs_id_index.at(id);
unsigned int count = obs_counts_resident[idx];
const uint32_t * const indices = obs_indices_resident[idx];
const double * const data = obs_data_resident[idx];

// reset our output buffer
for(unsigned int i = start; i < end; i++)
out[i-start] = 0.0;

if (normalize) {
for(unsigned int i = 0; i < count; i++) {
const int32_t j = indices[i];
if ((j>=start)&&(j<end)) {
out[j-start] = data[i]/sample_counts[j];
}
}
} else {
for(unsigned int i = 0; i < count; i++) {
const uint32_t j = indices[i];
if ((j>=start)&&(j<end)) {
out[j-start] = data[i];
}
}
}
}

void biom::get_obs_data_range(const std::string &id, unsigned int start, unsigned int end, bool normalize, double* out) const {
biom::get_obs_data_range_TT(id,start,end,normalize,out);
}

void biom::get_obs_data_range(const std::string &id, unsigned int start, unsigned int end, bool normalize, float* out) const {
biom::get_obs_data_range_TT(id,start,end,normalize,out);
}

unsigned int biom::get_sample_data_direct(const std::string &id, uint32_t *& current_indices_out, double *& current_data_out) {
uint32_t idx = sample_id_index.at(id);
uint32_t start = sample_indptr[idx];
uint32_t end = sample_indptr[idx + 1];
Expand Down
26 changes: 23 additions & 3 deletions sucpp/biom.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,22 @@ namespace su {
* Values of an index position [0, n_samples) which do not
* have data will be zero'd.
*/
void get_obs_data(std::string id, double* out);
void get_obs_data(const std::string &id, double* out) const;
void get_obs_data(const std::string &id, float* out) const;

/* get a dense vector of a range of observation data
*
* @param id The observation ID to fetc
* @param start Initial index
* @param end First index past the end
* @param normalize If set, divide by sample_counts
* @param out An allocated array of at least size (end-start). First element will corrrectpoint to index start.
* Values of an index position [0, (end-start)) which do not
* have data will be zero'd.
*/
void get_obs_data_range(const std::string &id, unsigned int start, unsigned int end, bool normalize, double* out) const;
void get_obs_data_range(const std::string &id, unsigned int start, unsigned int end, bool normalize, float* out) const;

private:
/* retain DataSet handles within the HDF5 file */
H5::DataSet obs_indices;
Expand All @@ -50,8 +65,8 @@ namespace su {
double **obs_data_resident;
unsigned int *obs_counts_resident;

unsigned int get_obs_data_direct(std::string id, uint32_t *& current_indices_out, double *& current_data_out);
unsigned int get_sample_data_direct(std::string id, uint32_t *& current_indices_out, double *& current_data_out);
unsigned int get_obs_data_direct(const std::string &id, uint32_t *& current_indices_out, double *& current_data_out);
unsigned int get_sample_data_direct(const std::string &id, uint32_t *& current_indices_out, double *& current_data_out);
double* get_sample_counts();

/* At construction, lookups mapping IDs -> index position within an
Expand Down Expand Up @@ -85,5 +100,10 @@ namespace su {
*/
void create_id_index(std::vector<std::string> &ids,
std::unordered_map<std::string, uint32_t> &map);


// templatized version
template<class TFloat> void get_obs_data_TT(const std::string &id, TFloat* out) const;
template<class TFloat> void get_obs_data_range_TT(const std::string &id, unsigned int start, unsigned int end, bool normalize, TFloat* out) const;
};
}
147 changes: 143 additions & 4 deletions sucpp/test_su.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -555,14 +555,14 @@ void test_bptree_rightsibling() {

void test_propstack_constructor() {
SUITE_START("test propstack constructor");
su::PropStack ps = su::PropStack(10);
su::PropStack<double> ps(10);
// nothing to test directly...
SUITE_END();
}

void test_propstack_push_and_pop() {
SUITE_START("test propstack push and pop");
su::PropStack ps = su::PropStack(10);
su::PropStack<double> ps(10);

double *vec1 = ps.pop(1);
double *vec2 = ps.pop(2);
Expand All @@ -587,7 +587,7 @@ void test_propstack_push_and_pop() {

void test_propstack_get() {
SUITE_START("test propstack get");
su::PropStack ps = su::PropStack(10);
su::PropStack<double> ps(10);

double *vec1 = ps.pop(1);
double *vec2 = ps.pop(2);
Expand All @@ -609,7 +609,7 @@ void test_unifrac_set_proportions() {
// ( ( ) ( ( ) ( ) ) ( ( ) ( ) ) )
su::BPTree tree = su::BPTree("(GG_OTU_1,(GG_OTU_2,GG_OTU_3),(GG_OTU_5,GG_OTU_4));");
su::biom table = su::biom("test.biom");
su::PropStack ps = su::PropStack(table.n_samples);
su::PropStack<double> ps(table.n_samples);

double *obs = ps.pop(4); // GG_OTU_2
double exp4[] = {0.714285714286, 0.333333333333, 0.0, 0.333333333333, 1.0, 0.25};
Expand All @@ -631,6 +631,143 @@ void test_unifrac_set_proportions() {
SUITE_END();
}

void test_unifrac_set_proportions_range() {
SUITE_START("test unifrac set proportions range");
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
// ( ( ) ( ( ) ( ) ) ( ( ) ( ) ) )
su::BPTree tree = su::BPTree("(GG_OTU_1,(GG_OTU_2,GG_OTU_3),(GG_OTU_5,GG_OTU_4));");
su::biom table = su::biom("test.biom");

const double exp4[] = {0.714285714286, 0.333333333333, 0.0, 0.333333333333, 1.0, 0.25};
const double exp6[] = {0.0, 0.0, 0.25, 0.666666666667, 0.0, 0.5};
const double exp3[] = {0.71428571, 0.33333333, 0.25, 1.0, 1.0, 0.75};


// first the whole table
{
su::PropStack<double> ps(table.n_samples);

double *obs = ps.pop(4); // GG_OTU_2
set_proportions_range(obs, tree, 4, table, 0, table.n_samples, ps);
for(unsigned int i = 0; i < table.n_samples; i++)
ASSERT(fabs(obs[i] - exp4[i]) < 0.000001);

obs = ps.pop(6); // GG_OTU_3
set_proportions_range(obs, tree, 6, table, 0, table.n_samples, ps);
for(unsigned int i = 0; i < table.n_samples; i++)
ASSERT(fabs(obs[i] - exp6[i]) < 0.000001);

obs = ps.pop(3); // node containing GG_OTU_2 and GG_OTU_3
set_proportions_range(obs, tree, 3, table, 0, table.n_samples, ps);
for(unsigned int i = 0; i < table.n_samples; i++)
ASSERT(fabs(obs[i] - exp3[i]) < 0.000001);
}

// beginning
{
su::PropStack<double> ps(3);

double *obs = ps.pop(4); // GG_OTU_2
set_proportions_range(obs, tree, 4, table, 0, 3, ps);
for(unsigned int i = 0; i < 3; i++)
ASSERT(fabs(obs[i] - exp4[i]) < 0.000001);

obs = ps.pop(6); // GG_OTU_3
set_proportions_range(obs, tree, 6, table, 0, 3, ps);
for(unsigned int i = 0; i < 3; i++)
ASSERT(fabs(obs[i] - exp6[i]) < 0.000001);

obs = ps.pop(3); // node containing GG_OTU_2 and GG_OTU_3
set_proportions_range(obs, tree, 3, table, 0, 3, ps);
for(unsigned int i = 0; i < 3; i++)
ASSERT(fabs(obs[i] - exp3[i]) < 0.000001);
}


// end
{
su::PropStack<double> ps(4);

double *obs = ps.pop(4); // GG_OTU_2
set_proportions_range(obs, tree, 4, table, 2, table.n_samples, ps);
for(unsigned int i = 2; i < table.n_samples; i++)
ASSERT(fabs(obs[i-2] - exp4[i]) < 0.000001);

obs = ps.pop(6); // GG_OTU_3
set_proportions_range(obs, tree, 6, table, 2, table.n_samples, ps);
for(unsigned int i = 2; i < table.n_samples; i++)
ASSERT(fabs(obs[i-2] - exp6[i]) < 0.000001);

obs = ps.pop(3); // node containing GG_OTU_2 and GG_OTU_3
set_proportions_range(obs, tree, 3, table, 2, table.n_samples, ps);
for(unsigned int i = 2; i < table.n_samples; i++)
ASSERT(fabs(obs[i-2] - exp3[i]) < 0.000001);
}


// middle
{
const unsigned int start = 1;
const unsigned int end = 4;
su::PropStack<double> ps(end-start);

double *obs = ps.pop(4); // GG_OTU_2
set_proportions_range(obs, tree, 4, table, start, end, ps);
for(unsigned int i =start; i < end; i++)
ASSERT(fabs(obs[i-start] - exp4[i]) < 0.000001);

obs = ps.pop(6); // GG_OTU_3
set_proportions_range(obs, tree, 6, table, start, end, ps);
for(unsigned int i = start; i < end; i++)
ASSERT(fabs(obs[i-start] - exp6[i]) < 0.000001);

obs = ps.pop(3); // node containing GG_OTU_2 and GG_OTU_3
set_proportions_range(obs, tree, 3, table, start, end, ps);
for(unsigned int i = start; i < end; i++)
ASSERT(fabs(obs[i-start] - exp3[i]) < 0.000001);
}

SUITE_END();
}

void test_unifrac_set_proportions_range_float() {
SUITE_START("test unifrac set proportions range float");
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
// ( ( ) ( ( ) ( ) ) ( ( ) ( ) ) )
su::BPTree tree = su::BPTree("(GG_OTU_1,(GG_OTU_2,GG_OTU_3),(GG_OTU_5,GG_OTU_4));");
su::biom table = su::biom("test.biom");

const float exp4[] = {0.714285714286, 0.333333333333, 0.0, 0.333333333333, 1.0, 0.25};
const float exp6[] = {0.0, 0.0, 0.25, 0.666666666667, 0.0, 0.5};
const float exp3[] = {0.71428571, 0.33333333, 0.25, 1.0, 1.0, 0.75};

// just midle
{
const unsigned int start = 1;
const unsigned int end = 4;
su::PropStack<float> ps(end-start);

float *obs = ps.pop(4); // GG_OTU_2
set_proportions_range(obs, tree, 4, table, start, end, ps);
for(unsigned int i =start; i < end; i++)
ASSERT(fabs(obs[i-start] - exp4[i]) < 0.000001);

obs = ps.pop(6); // GG_OTU_3
set_proportions_range(obs, tree, 6, table, start, end, ps);
for(unsigned int i = start; i < end; i++)
ASSERT(fabs(obs[i-start] - exp6[i]) < 0.000001);

obs = ps.pop(3); // node containing GG_OTU_2 and GG_OTU_3
set_proportions_range(obs, tree, 3, table, start, end, ps);
for(unsigned int i = start; i < end; i++)
ASSERT(fabs(obs[i-start] - exp3[i]) < 0.000001);
}

SUITE_END();
}



void test_unifrac_deconvolute_stripes() {
SUITE_START("test deconvolute stripes");
std::vector<double*> stripes;
Expand Down Expand Up @@ -1703,6 +1840,8 @@ int main(int argc, char** argv) {
test_propstack_get();

test_unifrac_set_proportions();
test_unifrac_set_proportions_range();
test_unifrac_set_proportions_range_float();
test_unifrac_deconvolute_stripes();
test_unifrac_stripes_to_condensed_form_even();
test_unifrac_stripes_to_condensed_form_odd();
Expand Down
24 changes: 12 additions & 12 deletions sucpp/tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,43 +197,43 @@ void BPTree::index_and_cache() {
}
}

uint32_t BPTree::postorderselect(uint32_t k) {
uint32_t BPTree::postorderselect(uint32_t k) const {
return open(select_0_index[k]);
}

uint32_t BPTree::preorderselect(uint32_t k) {
uint32_t BPTree::preorderselect(uint32_t k) const {
return select_1_index[k];
}

inline uint32_t BPTree::open(uint32_t i) {
inline uint32_t BPTree::open(uint32_t i) const {
return structure[i] ? i : openclose[i];
}

inline uint32_t BPTree::close(uint32_t i) {
inline uint32_t BPTree::close(uint32_t i) const {
return structure[i] ? openclose[i] : i;
}

bool BPTree::isleaf(unsigned int idx) {
bool BPTree::isleaf(unsigned int idx) const {
return (structure[idx] && !structure[idx + 1]);
}

uint32_t BPTree::leftchild(uint32_t i) {
uint32_t BPTree::leftchild(uint32_t i) const {
// aka fchild
if(isleaf(i))
return 0; // this is awkward, using 0 which is root, but a root cannot be a child. edge case
else
return i + 1;
}

uint32_t BPTree::rightchild(uint32_t i) {
uint32_t BPTree::rightchild(uint32_t i) const {
// aka lchild
if(isleaf(i))
return 0; // this is awkward, using 0 which is root, but a root cannot be a child. edge case
else
return open(close(i) - 1);
}

uint32_t BPTree::rightsibling(uint32_t i) {
uint32_t BPTree::rightsibling(uint32_t i) const {
// aka nsibling
uint32_t position = close(i) + 1;
if(position >= nparens)
Expand All @@ -244,18 +244,18 @@ uint32_t BPTree::rightsibling(uint32_t i) {
return 0;
}

int32_t BPTree::parent(uint32_t i) {
int32_t BPTree::parent(uint32_t i) const {
return enclose(i);
}

int32_t BPTree::enclose(uint32_t i) {
int32_t BPTree::enclose(uint32_t i) const {
if(structure[i])
return bwd(i, -2) + 1;
else
return bwd(i - 1, -2) + 1;
}

int32_t BPTree::bwd(uint32_t i, int d) {
int32_t BPTree::bwd(uint32_t i, int d) const {
uint32_t target_excess = excess[i] + d;
for(int current_idx = i - 1; current_idx >= 0; current_idx--) {
if(excess[current_idx] == target_excess)
Expand Down Expand Up @@ -410,7 +410,7 @@ void BPTree::set_node_metadata(unsigned int open_idx, std::string &token) {
lengths[open_idx] = length;
}

inline bool BPTree::is_structure_character(char c) {
inline bool BPTree::is_structure_character(char c) const {
return (c == '(' || c == ')' || c == ',' || c == ';');
}

Expand Down
Loading

0 comments on commit d065698

Please sign in to comment.