Skip to content

Commit

Permalink
remove estimate_coef, ame to amp
Browse files Browse the repository at this point in the history
  • Loading branch information
helske committed Sep 4, 2024
1 parent 5a7cdcd commit e97e0fe
Show file tree
Hide file tree
Showing 22 changed files with 490 additions and 461 deletions.
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ S3method(posterior_probs,hmm)
S3method(posterior_probs,mhmm)
S3method(posterior_probs,mnhmm)
S3method(posterior_probs,nhmm)
S3method(predict,mnhmm)
S3method(predict,nhmm)
S3method(print,hmm)
S3method(print,mhmm)
S3method(print,mnhmm)
Expand All @@ -47,14 +49,13 @@ S3method(vcov,mhmm)
export("cluster_names<-")
export("state_names<-")
export(alphabet)
export(average_marginal_effect)
export(average_marginal_prediction)
export(build_hmm)
export(build_lcm)
export(build_mhmm)
export(build_mm)
export(build_mmm)
export(cluster_names)
export(estimate_coef)
export(estimate_mnhmm)
export(estimate_nhmm)
export(fit_model)
Expand Down
18 changes: 11 additions & 7 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
seqHMM 1.3.0
seqHMM 2.0.0
==============
* Rewrote sequence visualization functions using `ggseqplot` and `patchwork`
packages. Old plotting functions are deprecated and will be removed in the
future.
* Added support for non-homogeneous HMMs where initial, transition, and
emission probabilities can depend on individual-specific covariates.
Penalized maximum likelihood estimation of these models is based on
L-BFGS-B with automatic differentiation with `Stan`.
emission probabilities can depend on individual-specific covariates.
Penalized maximum likelihood estimation of these models is based on
L-BFGS-B with automatic differentiation with `Stan`.
* Rewrote sequence visualization functions using `ggseqplot` and `patchwork`
packages. Old plotting functions are deprecated and will be removed in the
future.
* Warning and error messages were rewritten using `cli` package.
* Removed the function `estimate_coef`. A portion of the code was
accidentally commented out, rendering the function non-functional for
several years. Rather than correcting the code, the function was removed as it
was deemed unnecessary.
* Added automatic tests using `testthat` package.
* Internally switched from Rd syntax to Markdown in Roxygen documentation.

Expand Down
4 changes: 0 additions & 4 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,6 @@ objectivex <- function(transition, emission, init, obs, ANZ, BNZ, INZ, nSymbols,
.Call(`_seqHMM_objectivex`, transition, emission, init, obs, ANZ, BNZ, INZ, nSymbols, coef, X, numberOfStates, threads)
}

estimate_coefs <- function(transition, emission, init, obs, nSymbols, coef, X, numberOfStates, itermax, tol, trace, threads) {
.Call(`_seqHMM_estimate_coefs`, transition, emission, init, obs, nSymbols, coef, X, numberOfStates, itermax, tol, trace, threads)
}

softmax <- function(x, logspace) {
.Call(`_seqHMM_softmax`, x, logspace)
}
Expand Down
148 changes: 0 additions & 148 deletions R/average_marginal_effect.R

This file was deleted.

146 changes: 146 additions & 0 deletions R/average_marginal_prediction.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#' Average Marginal Predictions for Non-homogenous Hidden Markov Models
#'
#' @param model A Hidden Markov Model of class `nhmm` or `mnhmm`.
#' @param variable Name of the variable of interest.
#' @param values Vector containing one or two values for `variable`.
#' @param newdata Optional data frame which is used for marginalization.
#' @param nsim Non-negative integer defining the number of samples from the
#' normal approximation of the model parameters used in
#' computing the approximate quantiles of the estimates. If `0`, only point
#' estimates are returned.
#' @param probs Vector defining the quantiles of interest. Default is
#' `c(0.025, 0.5, 0.975)`.
#' @export
average_marginal_prediction <- function(
model, variable, values, marginalize_B_over = "sequences", newdata = NULL,
nsim = 0, probs = c(0.025, 0.5, 0.975)) {
marginalize_over <- match.arg(
marginalize_over, c("sequences", "states", "clusters"), several.ok = TRUE)
stopifnot_(
checkmate::test_count(nsim),
"Argument {.arg nsim} should be a single non-negative integer."
)
stopifnot_(
checkmate::test_string(x = variable),
"Argument {.arg variable} must be a single character string."
)
stopifnot_(
length(values) != 2,
"Argument {.arg values} should contain two values for
variable {.var variable}.")
if (is.null(newdata)) {
time <- model$time_variable
id <- model$id_variable
stopifnot_(
is.data.frame(newdata),
"Argument {.arg newdata} must be a {.cls data.frame} object."
)
stopifnot_(
!is.null(newdata[[id]]),
"Can't find grouping variable {.var {id}} in {.arg newdata}."
)
stopifnot_(
!is.null(newdata[[time]]),
"Can't find time index variable {.var {time}} in {.arg newdata}."
)
stopifnot_(
!is.null(newdata[[variable]]),
"Can't find time variable {.var {variable}} in {.arg newdata}."
)
} else {
stopifnot(
!is.null(model$data),
"Model does not contain original data and argument {.arg newdata} is
{.var NULL}."
)
newdata <- model$data
}
# use same RNG seed so that the same samples of coefficients are drawn
newdata[[variable]] <- values[1]
seed <- sample(.Machine$integer.max, 1)
set.seed(seed)
pred <- predict(model, newdata, nsim, return_samples = TRUE)
if (length(values) == 2) {
newdata[[variable]] <- values[2]
set.seed(seed)
pred2 <- predict(model, newdata, nsim, return_samples = TRUE)
pred <- mapply("-", pred, pred2, SIMPLIFY = FALSE)
}
T <- model$length_of_sequences
N <- model$n_sequences
S <- model$n_states
M <- model$n_symbols
C <- model$n_channels
D <- model$n_clusters
if (C == 1) {
ids <- rownames(model$observations)
times <- colnames(model$observations)
symbol_names <- list(model$symbol_names)
} else {
ids <- rownames(model$observations[[1]])
times <- colnames(model$observations[[1]])
symbol_names <- model$symbol_names
}
marginalize <- dplyr::recode(
marginalize_B_over,
"clusters" = "cluster",
"states" = "state",
"sequences" = "id")
id_var <- model$id_variable
time_var <- model$time_variable

pi <- data.frame(
cluster = rep(model$cluster_names, each = S * N),
id = rep(ids, each = S),
state = model$state_names,
estimate = unlist(pred$pi)
) |>
dplyr::group_by(.data[[marginalize]]) |>
summarise(estimate = mean(estimate))
colnames(pi)[2] <- id_var

A <- data.frame(
cluster = rep(model$cluster_names, each = S^2 * T * N),
id = rep(ids, each = S^2 * T),
time = rep(times, each = S^2),
state_from = model$state_names,
state_to = rep(model$state_names, each = S),
estimate = unlist(pred$A)
)
colnames(A)[2] <- id_var
colnames(A)[3] <- time_var

B <- data.frame(
cluster = rep(model$cluster_names, each = S * sum(M) * T * N),
id = unlist(lapply(seq_len(C), function(i) rep(ids, each = S * M[i] * T))),
time = unlist(lapply(seq_len(C), function(i) rep(times, each = S * M[i]))),
state = model$state_names,
channel = unlist(lapply(seq_len(C), function(i) {
rep(model$channel_names[i], each = S * M[i]* T * N)
})),
observation = unlist(lapply(seq_len(C), function(i) {
rep(symbol_names[[i]], each = S)
})),
estimate = unlist(pred$B)
) |>
dplyr::group_by(.data[[marginalize]]) |>
summarise(estimate = mean(estimate))
idx <- which(names(B) == "id")
if(length(idx) == 1) names(B)[idx] <- id_var
idx <- which(names(B) == "time")
if(length(idx) == 1) names(B)[idx] <- time_var
if (C == 1) emission_probs$channel <- NULL

if (D > 1) {
omega <- data.frame(
cluster = model$cluster_names,
id = rep(ids, each = D),
estimate = c(pred$omega)
)
out <- list(omega = omega, pi = pi, A = A, B = B)
} else {
out <- list(pi = pi, A = A, B = B)
}
class(out) <- "amp"
out
}
Loading

0 comments on commit e97e0fe

Please sign in to comment.