diff --git a/cpp/roaring.hh b/cpp/roaring.hh index f14150baf..44830940c 100644 --- a/cpp/roaring.hh +++ b/cpp/roaring.hh @@ -545,6 +545,14 @@ public: return api::roaring_bitmap_rank(&roaring, x); } + /** + * Get `rank()` values in bulk. The values in `[begin .. end)` must be in Ascending order. + * possible implementation: for(auto* iter = begin; iter != end; ++iter) *(ans++) = rank(*iter); + */ + void rank_many(const uint32_t* begin, const uint32_t* end, uint64_t* ans) const noexcept { + return api::roaring_bitmap_rank_many(&roaring, begin, end, ans); + } + /** * Returns the index of x in the set, index start from 0. * If the set doesn't contain x , this function will return -1. diff --git a/include/roaring/containers/array.h b/include/roaring/containers/array.h index 3070d6e33..bf4e4e412 100644 --- a/include/roaring/containers/array.h +++ b/include/roaring/containers/array.h @@ -412,6 +412,29 @@ inline int array_container_rank(const array_container_t *arr, uint16_t x) { } } +// bulk version of array_container_rank(); return number of consumed elements +inline uint32_t array_container_rank_many(const array_container_t *arr, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans){ + const uint16_t high = (uint16_t)((*begin) >> 16); + uint32_t pos = 0; + const uint32_t* iter = begin; + for(; iter != end; iter++) { + uint32_t x = *iter; + uint16_t xhigh = (uint16_t)(x >> 16); + if(xhigh != high) return iter - begin;// stop at next container + + const int32_t idx = binarySearch(arr->array+pos, arr->cardinality-pos, (uint16_t)x); + const bool is_present = idx >= 0; + if (is_present) { + *(ans++) = start_rank + pos + (idx + 1); + pos = idx+1; + } else { + *(ans++) = start_rank + pos + (-idx - 1); + } + } + return iter - begin; +} + + /* Returns the index of x , if not exsist return -1 */ inline int array_container_get_index(const array_container_t *arr, uint16_t x) { const int32_t idx = binarySearch(arr->array, arr->cardinality, x); diff --git a/include/roaring/containers/bitset.h b/include/roaring/containers/bitset.h index a27e715ae..b895e1a61 100644 --- a/include/roaring/containers/bitset.h +++ b/include/roaring/containers/bitset.h @@ -495,6 +495,9 @@ uint16_t bitset_container_maximum(const bitset_container_t *container); /* Returns the number of values equal or smaller than x */ int bitset_container_rank(const bitset_container_t *container, uint16_t x); +// bulk version of bitset_container_rank(); return number of consumed elements +uint32_t bitset_container_rank_many(const bitset_container_t *container, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans); + /* Returns the index of x , if not exsist return -1 */ int bitset_container_get_index(const bitset_container_t *container, uint16_t x); diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h index d011cc02e..0edab5573 100644 --- a/include/roaring/containers/containers.h +++ b/include/roaring/containers/containers.h @@ -2331,6 +2331,28 @@ static inline int container_rank( return false; } +// bulk version of container_rank(); return number of consumed elements +static inline uint32_t container_rank_many( + const container_t *c, uint8_t type, + uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans +){ + c = container_unwrap_shared(c, &type); + switch (type) { + case BITSET_CONTAINER_TYPE: + return bitset_container_rank_many(const_CAST_bitset(c), start_rank, begin, end, ans); + case ARRAY_CONTAINER_TYPE: + return array_container_rank_many(const_CAST_array(c), start_rank, begin, end, ans); + case RUN_CONTAINER_TYPE: + return run_container_rank_many(const_CAST_run(c), start_rank, begin, end, ans); + default: + assert(false); + roaring_unreachable; + } + assert(false); + roaring_unreachable; + return 0; +} + // return the index of x, if not exsist return -1 static inline int container_get_index(const container_t *c, uint8_t type, uint16_t x) { diff --git a/include/roaring/containers/run.h b/include/roaring/containers/run.h index f24a579a3..b8d9be0cb 100644 --- a/include/roaring/containers/run.h +++ b/include/roaring/containers/run.h @@ -561,6 +561,9 @@ inline uint16_t run_container_maximum(const run_container_t *run) { /* Returns the number of values equal or smaller than x */ int run_container_rank(const run_container_t *arr, uint16_t x); +// bulk version of run_container_rank(); return number of consumed elements +uint32_t run_container_rank_many(const run_container_t *arr, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans); + /* Returns the index of x, if not exsist return -1 */ int run_container_get_index(const run_container_t *arr, uint16_t x); diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h index b2476e7db..d32a08b24 100644 --- a/include/roaring/roaring.h +++ b/include/roaring/roaring.h @@ -815,6 +815,17 @@ bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank, */ uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x); +/** + * roaring_bitmap_rank_many is an `Bulk` version of `roaring_bitmap_rank` + * it puts rank value of each element in `[begin .. end)` to `ans[]` + * + * the values in `[begin .. end)` must be sorted in Ascending order; + * Caller is responsible to ensure that there is enough memory allocated, e.g. + * + * ans = malloc((end-begin) * sizeof(uint64_t)); + */ +void roaring_bitmap_rank_many(const roaring_bitmap_t *r, const uint32_t* begin, const uint32_t* end, uint64_t* ans); + /** * Returns the index of x in the given roaring bitmap. * If the roaring bitmap doesn't contain x , this function will return -1. diff --git a/microbenchmarks/bench.cpp b/microbenchmarks/bench.cpp index 3f013b8cd..fd2e45717 100644 --- a/microbenchmarks/bench.cpp +++ b/microbenchmarks/bench.cpp @@ -1,4 +1,5 @@ #include "bench.h" +#include struct successive_intersection { @@ -154,6 +155,35 @@ struct compute_cardinality { auto ComputeCardinality = BasicBench; BENCHMARK(ComputeCardinality); +struct rank_many_slow { + static uint64_t run() { + std::vector ranks(5); + for (size_t i = 0; i < count; ++i) { + ranks[0] = roaring_bitmap_rank(bitmaps[i], maxvalue/5); + ranks[1] = roaring_bitmap_rank(bitmaps[i], 2*maxvalue/5); + ranks[2] = roaring_bitmap_rank(bitmaps[i], 3*maxvalue/5); + ranks[3] = roaring_bitmap_rank(bitmaps[i], 4*maxvalue/5); + ranks[4] = roaring_bitmap_rank(bitmaps[i], maxvalue); + } + return ranks[0]; + } +}; +auto RankManySlow = BasicBench; +BENCHMARK(RankManySlow); + +struct rank_many { + static uint64_t run() { + std::vector ranks(5); + std::vector input{maxvalue/5, 2*maxvalue/5, 3*maxvalue/5, 4*maxvalue/5, maxvalue}; + for (size_t i = 0; i < count; ++i) { + roaring_bitmap_rank_many(bitmaps[i],input.data(), input.data()+input.size(), ranks.data()); + } + return ranks[0]; + } +}; +auto RankMany = BasicBench; +BENCHMARK(RankMany); + int main(int argc, char **argv) { const char *dir_name; if ((argc == 1) || (argc > 1 && argv[1][0] == '-')) { diff --git a/src/containers/bitset.c b/src/containers/bitset.c index 722eda1e3..5d38817c4 100644 --- a/src/containers/bitset.c +++ b/src/containers/bitset.c @@ -1232,6 +1232,29 @@ int bitset_container_rank(const bitset_container_t *container, uint16_t x) { return sum; } +uint32_t bitset_container_rank_many(const bitset_container_t *container, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans){ + const uint16_t high = (uint16_t)((*begin) >> 16); + int i = 0; + int sum = 0; + const uint32_t* iter = begin; + for(; iter != end; iter++) { + uint32_t x = *iter; + uint16_t xhigh = (uint16_t)(x >> 16); + if(xhigh != high) return iter - begin; // stop at next container + + uint16_t xlow = (uint16_t)x; + for(int count = xlow / 64; i < count; i++){ + sum += roaring_hamming(container->words[i]); + } + uint64_t lastword = container->words[i]; + uint64_t lastpos = UINT64_C(1) << (xlow % 64); + uint64_t mask = lastpos + lastpos - 1; // smear right + *(ans++) = start_rank + sum + roaring_hamming(lastword & mask); + } + return iter - begin; +} + + /* Returns the index of x , if not exsist return -1 */ int bitset_container_get_index(const bitset_container_t *container, uint16_t x) { if (bitset_container_get(container, x)) { diff --git a/src/containers/run.c b/src/containers/run.c index ddba08a3c..b3c90b06a 100644 --- a/src/containers/run.c +++ b/src/containers/run.c @@ -883,6 +883,39 @@ int run_container_rank(const run_container_t *container, uint16_t x) { } return sum; } +uint32_t run_container_rank_many(const run_container_t *container, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans){ + const uint16_t high = (uint16_t)((*begin) >> 16); + const uint32_t* iter = begin; + int sum = 0; + int i = 0; + for(;iter != end; iter++) { + uint32_t x = *iter; + uint16_t xhigh = (uint16_t)(x >> 16); + if(xhigh != high) return iter - begin; // stop at next container + + uint32_t x32 = x & 0xFFFF; + while(i < container->n_runs) { + uint32_t startpoint = container->runs[i].value; + uint32_t length = container->runs[i].length; + uint32_t endpoint = length + startpoint; + if (x32 <= endpoint) { + if (x32 < startpoint) { + *(ans++) = start_rank + sum; + } else { + *(ans++) = start_rank + sum + (x32 - startpoint) + 1; + } + break; + } else { + sum += length + 1; + i++; + } + } + if (i >= container->n_runs) *(ans++) = start_rank + sum; + } + + return iter - begin; +} + int run_container_get_index(const run_container_t *container, uint16_t x) { if (run_container_contains(container, x)) { diff --git a/src/roaring.c b/src/roaring.c index c863aa531..d7edaa087 100644 --- a/src/roaring.c +++ b/src/roaring.c @@ -2796,6 +2796,32 @@ uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { } return size; } +void roaring_bitmap_rank_many(const roaring_bitmap_t *bm, const uint32_t* begin, const uint32_t* end, uint64_t* ans) { + uint64_t size = 0; + + int i = 0; + const uint32_t* iter = begin; + while(i < bm->high_low_container.size && iter != end) { + uint32_t x = *iter; + uint32_t xhigh = x >> 16; + uint32_t key = bm->high_low_container.keys[i]; + if (xhigh > key) { + size += + container_get_cardinality(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i]); + i++; + } else if (xhigh == key) { + uint32_t consumed = container_rank_many(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i], + size, iter, end, ans); + iter += consumed; + ans += consumed; + } else { + *(ans++) = size; + iter++; + } + } +} /** * roaring_bitmap_get_index returns the index of x, if not exsist return -1. diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index ecec3c83d..14e0c504f 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -978,6 +978,17 @@ DEFINE_TEST(test_cpp_add_many) { assert_true(r1 == r2); } +DEFINE_TEST(test_cpp_rank_many) { + std::vector values = {123, 9999, 9999, 0xFFFFFFF7, 0xFFFFFFFF}; + Roaring r1; + r1.addMany(values.size(), values.data()); + + std::vector ranks(values.size()); + r1.rank_many(values.data(), values.data()+values.size(), ranks.data()); + std::vector expect_ranks{1,2,2,3,4}; + assert_true(ranks == expect_ranks); +} + DEFINE_TEST(test_cpp_add_many_64) { { // 32-bit integers @@ -2002,6 +2013,7 @@ int main() { cmocka_unit_test(test_cpp_add_range_closed_combinatoric_64), cmocka_unit_test(test_cpp_add_bulk), cmocka_unit_test(test_cpp_contains_bulk), + cmocka_unit_test(test_cpp_rank_many), cmocka_unit_test(test_cpp_remove_range_closed_64), cmocka_unit_test(test_cpp_remove_range_64), cmocka_unit_test(test_run_compression_cpp_64_true), diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c index ebbf47ffe..621c34079 100644 --- a/tests/toplevel_unit.c +++ b/tests/toplevel_unit.c @@ -3606,6 +3606,17 @@ DEFINE_TEST(test_rank) { if (truerank != computedrank) printf("%d != %d \n", (int)truerank, (int)computedrank); assert_true(truerank == computedrank); + + uint32_t input[] = {z, z+1, z+10, z+100, z+1000}; + uint64_t output[5]; + roaring_bitmap_rank_many(r, input, input+5, output); + for(uint32_t i = 0; i < 5; i++) { + truerank = rank(ans, card, input[i]); + computedrank = output[i]; + if (truerank != computedrank) + printf("%d != %d \n", (int)truerank, (int)computedrank); + assert_true(truerank == computedrank); + } } free(ans); // now bitmap @@ -3622,6 +3633,17 @@ DEFINE_TEST(test_rank) { if (truerank != computedrank) printf("%d != %d \n", (int)truerank, (int)computedrank); assert_true(truerank == computedrank); + + uint32_t input[] = {z, z+1, z+10, z+100, z+1000}; + uint64_t output[5]; + roaring_bitmap_rank_many(r, input, input+5, output); + for(uint32_t i = 0; i < 5; i++) { + truerank = rank(ans, card, input[i]); + computedrank = output[i]; + if (truerank != computedrank) + printf("%d != %d \n", (int)truerank, (int)computedrank); + assert_true(truerank == computedrank); + } } free(ans); // now run @@ -3639,6 +3661,17 @@ DEFINE_TEST(test_rank) { if (truerank != computedrank) printf("%d != %d \n", (int)truerank, (int)computedrank); assert_true(truerank == computedrank); + + uint32_t input[] = {z, z+1, z+10, z+100, z+1000}; + uint64_t output[5]; + roaring_bitmap_rank_many(r, input, input+5, output); + for(uint32_t i = 0; i < 5; i++) { + truerank = rank(ans, card, input[i]); + computedrank = output[i]; + if (truerank != computedrank) + printf("%d != %d \n", (int)truerank, (int)computedrank); + assert_true(truerank == computedrank); + } } free(ans);