Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Packer improvements #73

Merged
merged 6 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: monty
Title: Monte Carlo Models
Version: 0.2.6
Version: 0.2.7
Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"),
email = "[email protected]"),
person("Wes", "Hinsley", role = "aut"),
Expand Down
172 changes: 161 additions & 11 deletions R/packer.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,36 @@
##' This approach generalises to higher dimensional input, though we
##' suspect you'll spend a bit of time head-scratching if you use it.
##'
##' We do not currently offer the ability to pack this sort of output
##' back up, though it's not hard. Please let us know if you would
##' use this.
##' # Packing lists into vectors and matrices
##'
##' The unpacking operation is very common - an MCMC proceeds,
##' produces an unstructured vector, and you unpack it into a list in
##' order to be able to easily work with it. The reverse is much less
##' common, where we take a list and convert it into a vector (or
##' matrix, or multidimensional array). Use of this direction
##' ("packing") may be more common where using packers to work with
##' the output of state-space models (e.g. in
##' [odin2](https://mrc-ide.github.io/odin2) or
##' [dust2](https://mrc-ide.github.io/dust2), which use this
##' machinery).
##'
##' The input to `pack()` will be the shape that `unpack()` returned;
##' a named list of numerical vectors, matrices and arrays. The names
##' must correspond do the names if your packer (i.e., `scalar` and
richfitz marked this conversation as resolved.
Show resolved Hide resolved
##' the names of `array`). Each element has dimensions
##'
##' ```
##' <...object, ...residual>
##' ```
##'
##' where `...object` is the dimensions of the data itself and
##' `...residual` is the dimensions of the hypothetical input to
##' `pack`.
##'
##' There is an unfortunate ambiguity in R's lack of true scalar types
##' that we cannot avoid. It is hard to tell the difference packing a
##' vector vs packing an array where all dimensions are 1. See the
##' examples, and please let us know if the behaviour needs changing.
##'
##' @title Build a parameter packer
##'
Expand Down Expand Up @@ -211,6 +238,21 @@
##' # The processed elements are ignored on the return pack:
##' p$pack(list(a = 1, b_flat = 2:4, b = matrix(c(2, 3, 3, 4), 2, 2)))
##' p$pack(list(a = 1, b_flat = 2:4))
##'
##' # R lacks scalars, which means that some packers will unpack
##' # different inputs to the same outputs:
##' p <- monty_packer(c("a", "b"))
##' p$unpack(1:2)
##' p$unpack(cbind(1:2))
##'
##' # This means that we can't reliably pack these inputs in a way
##' # that guarantees round-tripping is possible. We have chosen to
##' # prioritise the case where a *single vector* is round-trippable:
##' p$pack(list(a = 1, b = 2))
##'
##' # This ambiguity goes away if unpacking matices with more than one
##' # column:
##' p$unpack(matrix(1:6, 2, 3))
monty_packer <- function(scalar = NULL, array = NULL, fixed = NULL,
process = NULL) {
call <- environment()
Expand Down Expand Up @@ -286,14 +328,25 @@ monty_packer <- function(scalar = NULL, array = NULL, fixed = NULL,
}

pack <- function(p) {
res <- numeric(length(parameters))
if (!all(lengths(p[names(idx)]) == lengths(idx))) {
## Not quite enough, because we should check the dimensions too.
## That ends up being quite hard with integer checks possibly
## because we really want to use identical() - this will do for now.
cli::cli_abort("Invalid structure to 'pack()'")
## We might want a more explicit error here?
assert_named(p, unique = TRUE)
## TODO: drop any processed and fixed things here, which means
## we're more concerned about finding things we can reshape than
## anything else.
shp <- pack_check_dimensions(p, scalar, shape, names(fixed), process)
ret <- matrix(NA_real_, len, prod(shp))
for (nm in c(scalar, names(shape))) {
ret[idx[[nm]], ] <- p[[nm]]
}
unlist(lapply(names(idx), function(el) p[el]), TRUE, FALSE)

drop <- length(shp) == 0 ||
(length(shp) == 1 && (length(shape) == 0 || all(lengths(shape) == 0)))
if (drop) {
dim(ret) <- NULL
} else if (length(shp) > 1) {
dim(ret) <- c(len, shp)
}
ret
}

ret <- list(parameters = parameters,
Expand All @@ -317,8 +370,9 @@ prepare_pack_array <- function(name, shape, call = NULL) {
"'{name}' is not"),
arg = "array", call = call)
}
shape <- as.integer(shape)
if (length(shape) == 0) {
return(list(names = name, shape = 1, n = 1L))
return(list(names = name, shape = integer(0), n = 1L))
}
if (any(shape <= 0)) {
cli::cli_abort(
Expand Down Expand Up @@ -442,3 +496,99 @@ unpack_array <- function(x, parameters, len, idx, shape, fixed, process) {

res
}


## Now, we do some annoying calculations to make sure that what we've
## been gven has the correct size, etc.
richfitz marked this conversation as resolved.
Show resolved Hide resolved
pack_check_dimensions <- function(p, scalar, shape, fixed, process,
call = parent.frame()) {
if (length(scalar) > 0) {
shape <- c(set_names(rep(list(integer()), length(scalar)), scalar),
shape)
}

msg <- setdiff(names(shape), names(p))
extra <- if (is.null(process)) setdiff(names(p), c(names(shape), fixed))

err <- setdiff(names(shape), names(p))
if (length(err) > 0) {
cli::cli_abort("Missing element{?s} from input to pack: {squote(err)}",
call = call)
}
if (length(extra) > 0) {
cli::cli_abort(
"Unexpected element{?s} present in input to pack: {squote(extra)}",
call = call)
}

## It's very tedious to check if we can map a set of inputs to a
## single pack. What we expect is if we have an element 'x' with
## dimensions <a, b, c, ...> and we expect that the shape 's' of
## this is <a, b> we need to validate that the first dimensions of
## 'x' are 's' and thern record the remainder - that's the shape of
richfitz marked this conversation as resolved.
Show resolved Hide resolved
## the rest of the eventual object (so if there is no dimension left
## then it was originally a vector, if there's one element left our
## original input was a matrix and so on). Then we check that this
## remainder is consistent across all elements in the list.
##
## There are a couple of additional wrinkles due to scalar variables
## (these are ones where 's' has length 0). First, We can't drop
richfitz marked this conversation as resolved.
Show resolved Hide resolved
## things from 'd' with d[-i] because that will drop everything.
## Second, when working out the residual dimensions of the packed
## data we might have a situation where there are some scalars for
## which we think the residual dimension is 1 and some non-scalars
## where we think there is no residual dimension. This is actually
## the same situation. This is explored a bit in the tests and the
## examples above.
res <- lapply(names(shape), function(nm) {
d <- dim2(p[[nm]])
s <- shape[[nm]]
i <- seq_along(s)
if (length(d) >= length(s) && identical(d[i], s)) {
list(success = TRUE, residual = if (length(i) == 0) d else d[-i])
} else {
list(success = FALSE, shape = d[i])
}
})

err <- !vlapply(res, "[[", "success")
if (any(err)) {
err_nms <- names(shape)[err]
detail <- sprintf(
"%s: expected <%s>, given <%s>",
err_nms,
vcapply(shape[err], paste, collapse = ", "),
vcapply(res[err], function(x) paste(x$shape, collapse = ", ")))
cli::cli_abort(
c("Incompatible dimensions in input for {squote(names(shape)[err])}",
set_names(detail, "x")),
call = call)
}

residual <- lapply(res, "[[", "residual")
is_scalar <- lengths(residual) == 0
is_vector_output <- any(is_scalar) &&
all(lengths(residual[!is_scalar]) == 1) &&
all(vnapply(residual[!is_scalar], identity) == 1)
if (is_vector_output) {
return(integer())
}

ret <- residual[[1]]
ok <- vlapply(residual, identical, ret)
if (!all(ok)) {
residual[is_scalar] <- 1L
hash <- vcapply(residual, rlang::hash)
detail <- vcapply(unique(hash), function(h) {
sprintf("%s: <...%s>",
paste(squote(names(shape)[hash == h]), collapse = ", "),
paste(residual[hash == h][[1]], collapse = ", "))
})
cli::cli_abort(
c("Inconsistent residual dimension in inputs",
set_names(detail, "x")),
call = call)
}

ret
}
2 changes: 1 addition & 1 deletion R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ dim2 <- function(x) {


"dim2<-" <- function(x, value) {
if (length(value) != 1) {
if (length(value) > 1) {
dim(x) <- value
}
x
Expand Down
47 changes: 44 additions & 3 deletions man/monty_packer.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading