Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add roaring_bitmap_rank_many(): get rank() values in Bulk #527

Merged
merged 3 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cpp/roaring.hh
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,14 @@ public:
return api::roaring_bitmap_rank(&roaring, x);
}

/**
* Get `rank()` values in bulk. The values in `[begin .. end)` must be in Ascending order.
* possible implementation: for(auto* iter = begin; iter != end; ++iter) *(ans++) = rank(*iter);
*/
void rank_many(const uint32_t* begin, const uint32_t* end, uint64_t* ans) const noexcept {
return api::roaring_bitmap_rank_many(&roaring, begin, end, ans);
}

/**
* Returns the index of x in the set, index start from 0.
* If the set doesn't contain x , this function will return -1.
Expand Down
23 changes: 23 additions & 0 deletions include/roaring/containers/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,29 @@ inline int array_container_rank(const array_container_t *arr, uint16_t x) {
}
}

// bulk version of array_container_rank(); return number of consumed elements
inline uint32_t array_container_rank_many(const array_container_t *arr, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans){
const uint16_t high = (uint16_t)((*begin) >> 16);
uint32_t pos = 0;
const uint32_t* iter = begin;
for(; iter != end; iter++) {
uint32_t x = *iter;
uint16_t xhigh = (uint16_t)(x >> 16);
if(xhigh != high) return iter - begin;// stop at next container

const int32_t idx = binarySearch(arr->array+pos, arr->cardinality-pos, (uint16_t)x);
const bool is_present = idx >= 0;
if (is_present) {
*(ans++) = start_rank + pos + (idx + 1);
pos = idx+1;
} else {
*(ans++) = start_rank + pos + (-idx - 1);
}
}
return iter - begin;
}


/* Returns the index of x , if not exsist return -1 */
inline int array_container_get_index(const array_container_t *arr, uint16_t x) {
const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
Expand Down
3 changes: 3 additions & 0 deletions include/roaring/containers/bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,9 @@ uint16_t bitset_container_maximum(const bitset_container_t *container);
/* Returns the number of values equal or smaller than x */
int bitset_container_rank(const bitset_container_t *container, uint16_t x);

// bulk version of bitset_container_rank(); return number of consumed elements
uint32_t bitset_container_rank_many(const bitset_container_t *container, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans);

/* Returns the index of x , if not exsist return -1 */
int bitset_container_get_index(const bitset_container_t *container, uint16_t x);

Expand Down
22 changes: 22 additions & 0 deletions include/roaring/containers/containers.h
Original file line number Diff line number Diff line change
Expand Up @@ -2331,6 +2331,28 @@ static inline int container_rank(
return false;
}

// bulk version of container_rank(); return number of consumed elements
static inline uint32_t container_rank_many(
const container_t *c, uint8_t type,
uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans
){
c = container_unwrap_shared(c, &type);
switch (type) {
case BITSET_CONTAINER_TYPE:
return bitset_container_rank_many(const_CAST_bitset(c), start_rank, begin, end, ans);
case ARRAY_CONTAINER_TYPE:
return array_container_rank_many(const_CAST_array(c), start_rank, begin, end, ans);
case RUN_CONTAINER_TYPE:
return run_container_rank_many(const_CAST_run(c), start_rank, begin, end, ans);
default:
assert(false);
roaring_unreachable;
}
assert(false);
roaring_unreachable;
return 0;
}

// return the index of x, if not exsist return -1
static inline int container_get_index(const container_t *c, uint8_t type,
uint16_t x) {
Expand Down
3 changes: 3 additions & 0 deletions include/roaring/containers/run.h
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,9 @@ inline uint16_t run_container_maximum(const run_container_t *run) {
/* Returns the number of values equal or smaller than x */
int run_container_rank(const run_container_t *arr, uint16_t x);

// bulk version of run_container_rank(); return number of consumed elements
uint32_t run_container_rank_many(const run_container_t *arr, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans);

/* Returns the index of x, if not exsist return -1 */
int run_container_get_index(const run_container_t *arr, uint16_t x);

Expand Down
11 changes: 11 additions & 0 deletions include/roaring/roaring.h
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,17 @@ bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank,
*/
uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x);

/**
* roaring_bitmap_rank_many is an `Bulk` version of `roaring_bitmap_rank`
* it puts rank value of each element in `[begin .. end)` to `ans[]`
*
* the values in `[begin .. end)` must be sorted in Ascending order;
* Caller is responsible to ensure that there is enough memory allocated, e.g.
*
* ans = malloc((end-begin) * sizeof(uint64_t));
*/
void roaring_bitmap_rank_many(const roaring_bitmap_t *r, const uint32_t* begin, const uint32_t* end, uint64_t* ans);

/**
* Returns the index of x in the given roaring bitmap.
* If the roaring bitmap doesn't contain x , this function will return -1.
Expand Down
30 changes: 30 additions & 0 deletions microbenchmarks/bench.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "bench.h"
#include <vector>


struct successive_intersection {
Expand Down Expand Up @@ -154,6 +155,35 @@ struct compute_cardinality {
auto ComputeCardinality = BasicBench<compute_cardinality>;
BENCHMARK(ComputeCardinality);

struct rank_many_slow {
static uint64_t run() {
std::vector<uint64_t> ranks(5);
for (size_t i = 0; i < count; ++i) {
ranks[0] = roaring_bitmap_rank(bitmaps[i], maxvalue/5);
ranks[1] = roaring_bitmap_rank(bitmaps[i], 2*maxvalue/5);
ranks[2] = roaring_bitmap_rank(bitmaps[i], 3*maxvalue/5);
ranks[3] = roaring_bitmap_rank(bitmaps[i], 4*maxvalue/5);
ranks[4] = roaring_bitmap_rank(bitmaps[i], maxvalue);
}
return ranks[0];
}
};
auto RankManySlow = BasicBench<rank_many_slow>;
BENCHMARK(RankManySlow);

struct rank_many {
static uint64_t run() {
std::vector<uint64_t> ranks(5);
std::vector<uint32_t> input{maxvalue/5, 2*maxvalue/5, 3*maxvalue/5, 4*maxvalue/5, maxvalue};
for (size_t i = 0; i < count; ++i) {
roaring_bitmap_rank_many(bitmaps[i],input.data(), input.data()+input.size(), ranks.data());
}
return ranks[0];
}
};
auto RankMany = BasicBench<rank_many>;
BENCHMARK(RankMany);

int main(int argc, char **argv) {
const char *dir_name;
if ((argc == 1) || (argc > 1 && argv[1][0] == '-')) {
Expand Down
23 changes: 23 additions & 0 deletions src/containers/bitset.c
Original file line number Diff line number Diff line change
Expand Up @@ -1232,6 +1232,29 @@ int bitset_container_rank(const bitset_container_t *container, uint16_t x) {
return sum;
}

uint32_t bitset_container_rank_many(const bitset_container_t *container, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans){
const uint16_t high = (uint16_t)((*begin) >> 16);
int i = 0;
int sum = 0;
const uint32_t* iter = begin;
for(; iter != end; iter++) {
uint32_t x = *iter;
uint16_t xhigh = (uint16_t)(x >> 16);
if(xhigh != high) return iter - begin; // stop at next container

uint16_t xlow = (uint16_t)x;
for(int count = xlow / 64; i < count; i++){
sum += roaring_hamming(container->words[i]);
}
uint64_t lastword = container->words[i];
uint64_t lastpos = UINT64_C(1) << (xlow % 64);
uint64_t mask = lastpos + lastpos - 1; // smear right
*(ans++) = start_rank + sum + roaring_hamming(lastword & mask);
}
return iter - begin;
}


/* Returns the index of x , if not exsist return -1 */
int bitset_container_get_index(const bitset_container_t *container, uint16_t x) {
if (bitset_container_get(container, x)) {
Expand Down
33 changes: 33 additions & 0 deletions src/containers/run.c
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,39 @@ int run_container_rank(const run_container_t *container, uint16_t x) {
}
return sum;
}
uint32_t run_container_rank_many(const run_container_t *container, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans){
const uint16_t high = (uint16_t)((*begin) >> 16);
const uint32_t* iter = begin;
int sum = 0;
int i = 0;
for(;iter != end; iter++) {
uint32_t x = *iter;
uint16_t xhigh = (uint16_t)(x >> 16);
if(xhigh != high) return iter - begin; // stop at next container

uint32_t x32 = x & 0xFFFF;
while(i < container->n_runs) {
uint32_t startpoint = container->runs[i].value;
uint32_t length = container->runs[i].length;
uint32_t endpoint = length + startpoint;
if (x32 <= endpoint) {
if (x32 < startpoint) {
*(ans++) = start_rank + sum;
} else {
*(ans++) = start_rank + sum + (x32 - startpoint) + 1;
}
break;
} else {
sum += length + 1;
i++;
}
}
if (i >= container->n_runs) *(ans++) = start_rank + sum;
}

return iter - begin;
}


int run_container_get_index(const run_container_t *container, uint16_t x) {
if (run_container_contains(container, x)) {
Expand Down
26 changes: 26 additions & 0 deletions src/roaring.c
Original file line number Diff line number Diff line change
Expand Up @@ -2796,6 +2796,32 @@ uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
}
return size;
}
void roaring_bitmap_rank_many(const roaring_bitmap_t *bm, const uint32_t* begin, const uint32_t* end, uint64_t* ans) {
uint64_t size = 0;

int i = 0;
const uint32_t* iter = begin;
while(i < bm->high_low_container.size && iter != end) {
uint32_t x = *iter;
uint32_t xhigh = x >> 16;
uint32_t key = bm->high_low_container.keys[i];
if (xhigh > key) {
size +=
container_get_cardinality(bm->high_low_container.containers[i],
bm->high_low_container.typecodes[i]);
i++;
} else if (xhigh == key) {
uint32_t consumed = container_rank_many(bm->high_low_container.containers[i],
bm->high_low_container.typecodes[i],
size, iter, end, ans);
iter += consumed;
ans += consumed;
} else {
*(ans++) = size;
iter++;
}
}
}

/**
* roaring_bitmap_get_index returns the index of x, if not exsist return -1.
Expand Down
12 changes: 12 additions & 0 deletions tests/cpp_unit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,17 @@ DEFINE_TEST(test_cpp_add_many) {
assert_true(r1 == r2);
}

DEFINE_TEST(test_cpp_rank_many) {
std::vector<uint32_t> values = {123, 9999, 9999, 0xFFFFFFF7, 0xFFFFFFFF};
Roaring r1;
r1.addMany(values.size(), values.data());

std::vector<uint64_t> ranks(values.size());
r1.rank_many(values.data(), values.data()+values.size(), ranks.data());
std::vector<uint64_t> expect_ranks{1,2,2,3,4};
assert_true(ranks == expect_ranks);
}

DEFINE_TEST(test_cpp_add_many_64) {
{
// 32-bit integers
Expand Down Expand Up @@ -2002,6 +2013,7 @@ int main() {
cmocka_unit_test(test_cpp_add_range_closed_combinatoric_64),
cmocka_unit_test(test_cpp_add_bulk),
cmocka_unit_test(test_cpp_contains_bulk),
cmocka_unit_test(test_cpp_rank_many),
cmocka_unit_test(test_cpp_remove_range_closed_64),
cmocka_unit_test(test_cpp_remove_range_64),
cmocka_unit_test(test_run_compression_cpp_64_true),
Expand Down
33 changes: 33 additions & 0 deletions tests/toplevel_unit.c
Original file line number Diff line number Diff line change
Expand Up @@ -3606,6 +3606,17 @@ DEFINE_TEST(test_rank) {
if (truerank != computedrank)
printf("%d != %d \n", (int)truerank, (int)computedrank);
assert_true(truerank == computedrank);

uint32_t input[] = {z, z+1, z+10, z+100, z+1000};
uint64_t output[5];
roaring_bitmap_rank_many(r, input, input+5, output);
for(uint32_t i = 0; i < 5; i++) {
truerank = rank(ans, card, input[i]);
computedrank = output[i];
if (truerank != computedrank)
printf("%d != %d \n", (int)truerank, (int)computedrank);
assert_true(truerank == computedrank);
}
}
free(ans);
// now bitmap
Expand All @@ -3622,6 +3633,17 @@ DEFINE_TEST(test_rank) {
if (truerank != computedrank)
printf("%d != %d \n", (int)truerank, (int)computedrank);
assert_true(truerank == computedrank);

uint32_t input[] = {z, z+1, z+10, z+100, z+1000};
uint64_t output[5];
roaring_bitmap_rank_many(r, input, input+5, output);
for(uint32_t i = 0; i < 5; i++) {
truerank = rank(ans, card, input[i]);
computedrank = output[i];
if (truerank != computedrank)
printf("%d != %d \n", (int)truerank, (int)computedrank);
assert_true(truerank == computedrank);
}
}
free(ans);
// now run
Expand All @@ -3639,6 +3661,17 @@ DEFINE_TEST(test_rank) {
if (truerank != computedrank)
printf("%d != %d \n", (int)truerank, (int)computedrank);
assert_true(truerank == computedrank);

uint32_t input[] = {z, z+1, z+10, z+100, z+1000};
uint64_t output[5];
roaring_bitmap_rank_many(r, input, input+5, output);
for(uint32_t i = 0; i < 5; i++) {
truerank = rank(ans, card, input[i]);
computedrank = output[i];
if (truerank != computedrank)
printf("%d != %d \n", (int)truerank, (int)computedrank);
assert_true(truerank == computedrank);
}
}
free(ans);

Expand Down