Skip to content

Commit

Permalink
Cache powerset creation/reduction
Browse files Browse the repository at this point in the history
Reducing powersets, i.e. reducing the number of set elements, is so
slow that most of the runtime for discover is repeat reductions, so
caching them speeds it up dramatically.
  • Loading branch information
CharnelMouse committed Nov 23, 2024
1 parent dd20dc5 commit a363b28
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
17 changes: 15 additions & 2 deletions R/discover.r
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@ discover <- function(
"constructing powerset",
use_visited
)
# cache generated powerset and reductions, otherwise we spend a lot
# of time duplicating reduction work
all_powersets <- stats::setNames(list(powerset), max_n_lhs_attrs)
compute_partitions <- partition_computer(
unname(df[, nonfixed, drop = FALSE]),
accuracy,
Expand Down Expand Up @@ -247,7 +250,12 @@ discover <- function(
else
integer()
if (n_lhs_attrs > 0) {
nodes <- reduce_powerset(powerset, n_lhs_attrs)
if (n_lhs_attrs %in% names(all_powersets))
nodes <- all_powersets[[as.character(n_lhs_attrs)]]
else{
nodes <- reduce_powerset(powerset, n_lhs_attrs)
all_powersets[[as.character(n_lhs_attrs)]] <- nodes
}
simple_nodes <- to_nodes(seq_len(n_lhs_attrs), nodes)
lhss <- report$op(
rhs,
Expand Down Expand Up @@ -277,7 +285,12 @@ discover <- function(
}
valid_determinant_nonfixed_indices <- setdiff(valid_determinant_nonfixed_indices, rhs)
max_n_lhs_attrs <- max_n_lhs_attrs - 1L
powerset <- reduce_powerset(powerset, max_n_lhs_attrs)
if (max_n_lhs_attrs %in% names(all_powersets))
powerset <- all_powersets[[as.character(max_n_lhs_attrs)]]
else{
powerset <- reduce_powerset(powerset, max_n_lhs_attrs)
all_powersets[[as.character(max_n_lhs_attrs)]] <- powerset
}
}else
dependencies[[nonfixed[rhs]]] <- c(
dependencies[[nonfixed[rhs]]],
Expand Down
4 changes: 3 additions & 1 deletion R/powerset.r
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ reduce_powerset <- function(powerset, cardinality) {
trimmed$bits
)
trimmed$bits <- lapply(trimmed$bits, utils::head, cardinality)
trimmed$parents <- lapply(trimmed$parents, \(x) match(x[x %in% keep], keep))
# updating parents is slow, and the main reason why caching powerset
# reductions in discover() saves a lot of time
trimmed$parents <- lapply(trimmed$parents, \(x) which(keep %in% x))
trimmed
}

Expand Down

0 comments on commit a363b28

Please sign in to comment.