Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support vectors of logicals as an argument to filter_bitset. #199

Merged
merged 3 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ Suggests:
testthat (>= 2.1.0),
xml2,
bench
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
VignetteBuilder: knitr
LinkingTo:
Rcpp,
Expand Down
8 changes: 6 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,18 @@ bitset_to_vector <- function(b) {
.Call(`_individual_bitset_to_vector`, b)
}

filter_bitset_vector <- function(b, other) {
.Call(`_individual_filter_bitset_vector`, b, other)
filter_bitset_integer <- function(b, other) {
.Call(`_individual_filter_bitset_integer`, b, other)
}

filter_bitset_bitset <- function(b, other) {
.Call(`_individual_filter_bitset_bitset`, b, other)
}

filter_bitset_logical <- function(bitset, other) {
.Call(`_individual_filter_bitset_logical`, bitset, other)
}

bitset_choose <- function(b, k) {
invisible(.Call(`_individual_bitset_choose`, b, k))
}
Expand Down
19 changes: 12 additions & 7 deletions R/bitset.R
Original file line number Diff line number Diff line change
Expand Up @@ -246,23 +246,28 @@ all.equal.Bitset <- function(target, current, ...) {
#' @description This non-modifying function returns a new \code{\link{Bitset}}
#' object of the same maximum size as the original but which only contains
#' those values at the indices specified by the argument \code{other}.
#' Indices in \code{other} may be specified either as a vector of integers or as
#' another bitset. Please note that filtering by another bitset is not a
#' "bitwise and" intersection, and will have the same behavior as providing
#' an equivalent vector of integer indices.
#'
#' Indices in \code{other} may be specified either as a vector of logicals, a
#' vector of integers or as another bitset. If a vector of logicals is
#' specified, it must be of the same size as the bitset. Please note that
#' filtering by another bitset is not a "bitwise and" intersection, and will
#' have the same behavior as providing an equivalent vector of integer indices.
#' @param bitset the \code{\link{Bitset}} to filter
#' @param other the values to keep (may be a vector of intergers or another \code{\link{Bitset}})
#' @param other the values to keep (may be a vector of integers, logicals, or
#' another \code{\link{Bitset}})
#' @export
filter_bitset = function(bitset, other) {
if ( inherits(other, "Bitset")) {
if (inherits(other, "Bitset")) {
if (other$size() > 0) {
return(Bitset$new(from = filter_bitset_bitset(bitset$.bitset, other$.bitset)))
} else {
return(Bitset$new(size = bitset$max_size))
}
} else if (inherits(other, "logical")) {
return(Bitset$new(from = filter_bitset_logical(bitset$.bitset, other)))
} else {
if (length(other) > 0) {
return(Bitset$new(from = filter_bitset_vector(bitset$.bitset, as.integer(other))))
return(Bitset$new(from = filter_bitset_integer(bitset$.bitset, as.integer(other))))
} else {
return(Bitset$new(size = bitset$max_size))
}
Expand Down
13 changes: 8 additions & 5 deletions man/filter_bitset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/restore_object_state.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 18 additions & 5 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,15 +179,15 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// filter_bitset_vector
Rcpp::XPtr<individual_index_t> filter_bitset_vector(const Rcpp::XPtr<individual_index_t> b, std::vector<size_t> other);
RcppExport SEXP _individual_filter_bitset_vector(SEXP bSEXP, SEXP otherSEXP) {
// filter_bitset_integer
Rcpp::XPtr<individual_index_t> filter_bitset_integer(const Rcpp::XPtr<individual_index_t> b, std::vector<size_t> other);
RcppExport SEXP _individual_filter_bitset_integer(SEXP bSEXP, SEXP otherSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const Rcpp::XPtr<individual_index_t> >::type b(bSEXP);
Rcpp::traits::input_parameter< std::vector<size_t> >::type other(otherSEXP);
rcpp_result_gen = Rcpp::wrap(filter_bitset_vector(b, other));
rcpp_result_gen = Rcpp::wrap(filter_bitset_integer(b, other));
return rcpp_result_gen;
END_RCPP
}
Expand All @@ -203,6 +203,18 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// filter_bitset_logical
Rcpp::XPtr<individual_index_t> filter_bitset_logical(const Rcpp::XPtr<individual_index_t> bitset, Rcpp::LogicalVector other);
RcppExport SEXP _individual_filter_bitset_logical(SEXP bitsetSEXP, SEXP otherSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const Rcpp::XPtr<individual_index_t> >::type bitset(bitsetSEXP);
Rcpp::traits::input_parameter< Rcpp::LogicalVector >::type other(otherSEXP);
rcpp_result_gen = Rcpp::wrap(filter_bitset_logical(bitset, other));
return rcpp_result_gen;
END_RCPP
}
// bitset_choose
void bitset_choose(const Rcpp::XPtr<individual_index_t> b, const size_t k);
RcppExport SEXP _individual_bitset_choose(SEXP bSEXP, SEXP kSEXP) {
Expand Down Expand Up @@ -1469,8 +1481,9 @@ static const R_CallMethodDef CallEntries[] = {
{"_individual_bitset_sample", (DL_FUNC) &_individual_bitset_sample, 2},
{"_individual_bitset_sample_vector", (DL_FUNC) &_individual_bitset_sample_vector, 2},
{"_individual_bitset_to_vector", (DL_FUNC) &_individual_bitset_to_vector, 1},
{"_individual_filter_bitset_vector", (DL_FUNC) &_individual_filter_bitset_vector, 2},
{"_individual_filter_bitset_integer", (DL_FUNC) &_individual_filter_bitset_integer, 2},
{"_individual_filter_bitset_bitset", (DL_FUNC) &_individual_filter_bitset_bitset, 2},
{"_individual_filter_bitset_logical", (DL_FUNC) &_individual_filter_bitset_logical, 2},
{"_individual_bitset_choose", (DL_FUNC) &_individual_bitset_choose, 2},
{"_individual_create_categorical_variable", (DL_FUNC) &_individual_create_categorical_variable, 2},
{"_individual_categorical_variable_get_size", (DL_FUNC) &_individual_categorical_variable_get_size, 1},
Expand Down
24 changes: 23 additions & 1 deletion src/bitset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ std::vector<size_t> bitset_to_vector(const Rcpp::XPtr<individual_index_t> b) {
}

//[[Rcpp::export]]
Rcpp::XPtr<individual_index_t> filter_bitset_vector(
Rcpp::XPtr<individual_index_t> filter_bitset_integer(
const Rcpp::XPtr<individual_index_t> b,
std::vector<size_t> other
) {
Expand Down Expand Up @@ -163,6 +163,28 @@ Rcpp::XPtr<individual_index_t> filter_bitset_bitset(
);
}

//[[Rcpp::export]]
Rcpp::XPtr<individual_index_t> filter_bitset_logical(
const Rcpp::XPtr<individual_index_t> bitset,
Rcpp::LogicalVector other
) {
if (bitset->size() != other.size()) {
Rcpp::stop("vector of logicals must equal the size of the bitset");
}

individual_index_t result(bitset->max_size());

auto bitset_it = bitset->begin();
auto other_it = other.begin();
for (; bitset_it != bitset->end() && other_it != other.end(); ++bitset_it, ++other_it) {
if (*other_it) {
result.insert(*bitset_it);
}
}

return Rcpp::XPtr<individual_index_t>(new individual_index_t(std::move(result)), true);
}

//[[Rcpp::export]]
void bitset_choose(
const Rcpp::XPtr<individual_index_t> b,
Expand Down
4 changes: 3 additions & 1 deletion tests/performance/bench-bitset.R
Original file line number Diff line number Diff line change
Expand Up @@ -261,12 +261,14 @@ filter_bset <- bench::press(
index <- individual::Bitset$new(size = limit)$insert(1:limit)
vector_idx <- create_random_data(size = size, limit = limit)
bset_idx <- individual::Bitset$new(size = limit)$insert(vector_idx)
logical_idx <- 1:limit %in% vector_idx
bench::mark(
min_iterations = 50,
check = FALSE,
filter_gc = TRUE,
vector = {individual::filter_bitset(bitset = index, other = vector_idx)},
bset = {individual::filter_bitset(bitset = index, other = bset_idx)}
bset = {individual::filter_bitset(bitset = index, other = bset_idx)},
logical = {individual::filter_bitset(bitset = index, other = logical_idx)},
)
},
.grid = args_grid
Expand Down
14 changes: 14 additions & 0 deletions tests/testthat/test-bitset.R
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,20 @@ test_that("bitset filtering works for vector input", {
expect_equal(filter_bitset(b, f)$to_vector(), c(1, 6))
})

test_that("bitset filtering works for logical input", {
b <- Bitset$new(10)$insert(c(1, 5, 6))
f <- c(TRUE, FALSE, TRUE)
expect_equal(filter_bitset(b, f)$to_vector(), c(1, 6))
})

test_that("bitset filtering errors when logical vector is too short", {
b <- Bitset$new(10)$insert(c(1, 5, 6))
f <- c(TRUE, FALSE)
expect_error(
filter_bitset(b, f),
"vector of logicals must equal the size of the bitset")
})

test_that("bitset filtering works for vector input with jump at the start", {
b <- Bitset$new(10)$insert(c(1, 5, 6, 10))
f <- c(2, 4)
Expand Down
4 changes: 2 additions & 2 deletions vignettes/Performance.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ probs <- runif(n)

keep <- probs >= 0.5

stay <- filter_bitset(bitset = bset,other = which(keep))
leave <- filter_bitset(bitset = bset,other = which(!keep))
stay <- filter_bitset(bitset = bset, other = keep)
leave <- filter_bitset(bitset = bset, other = !keep)
```

This pattern is almost always slower than using the sample method with a set difference:
Expand Down
Loading