From 77f98a13a5ca93cbe34743ced1e16946a8771630 Mon Sep 17 00:00:00 2001 From: Jouni Helske Date: Sat, 21 Dec 2024 07:36:36 +0200 Subject: [PATCH] fix feedback and autoregression --- NAMESPACE | 1 + R/RcppExports.R | 4 ++ R/build_fanhmm.R | 24 +++++-- R/create_initial_values.R | 8 +-- R/dnm_fanhmm.R | 2 +- R/simulate_fanhmm.R | 132 ++++++++++++++++++++++++++++++++++++++ man/simulate_fanhmm.Rd | 68 ++++++++++++++++++++ src/RcppExports.cpp | 22 +++++++ src/nhmm_simulate.cpp | 57 +++++++++++++++- 9 files changed, 306 insertions(+), 12 deletions(-) create mode 100644 R/simulate_fanhmm.R create mode 100644 man/simulate_fanhmm.Rd diff --git a/NAMESPACE b/NAMESPACE index 87df9bf..b7fb41d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -108,6 +108,7 @@ export(separate_mhmm) export(seqdef) export(seqstatf) export(simulate_emission_probs) +export(simulate_fanhmm) export(simulate_hmm) export(simulate_initial_probs) export(simulate_mhmm) diff --git a/R/RcppExports.R b/R/RcppExports.R index 9f8e5f3..140e842 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -221,6 +221,10 @@ simulate_mnhmm_multichannel <- function(eta_pi, X_pi, eta_A, X_A, eta_B, X_B, et .Call(`_seqHMM_simulate_mnhmm_multichannel`, eta_pi, X_pi, eta_A, X_A, eta_B, X_B, eta_omega, X_omega, M) } +simulate_fanhmm_singlechannel <- function(eta_pi, X_pi, eta_A, X_A, eta_B, X_B, rho_A, W_A, rho_B, W_B, obs_0) { + .Call(`_seqHMM_simulate_fanhmm_singlechannel`, eta_pi, X_pi, eta_A, X_A, eta_B, X_B, rho_A, W_A, rho_B, W_B, obs_0) +} + viterbi_nhmm_singlechannel <- function(eta_pi, X_pi, eta_A, X_A, eta_B, X_B, obs, Ti, icpt_only_pi, icpt_only_A, icpt_only_B, iv_A, iv_B, tv_A, tv_B) { .Call(`_seqHMM_viterbi_nhmm_singlechannel`, eta_pi, X_pi, eta_A, X_A, eta_B, X_B, obs, Ti, icpt_only_pi, icpt_only_A, icpt_only_B, iv_A, iv_B, tv_A, tv_B) } diff --git a/R/build_fanhmm.R b/R/build_fanhmm.R index 22bd007..f3b1d9d 100644 --- a/R/build_fanhmm.R +++ b/R/build_fanhmm.R @@ -30,7 +30,7 @@ build_fanhmm <- function( stopifnot_( out$model$n_channels == 1L, "Currently only single-channel responses are supported for FAN-HMM.") - if(is.null(autoregression_formula)) { + if(is.null(feedback_formula)) { out$model$W_A <- array( 0, c(0L, out$model$length_of_sequences - 1, out$model$n_sequences) ) @@ -39,21 +39,24 @@ build_fanhmm <- function( ) np_rho_A <- 0 } else { - W_A <- model_matrix_autoregression_formula( - autoregression_formula, data, + W_A <- model_matrix_feedback_formula( + feedback_formula, data, out$model$n_sequences, out$model$length_of_sequences, n_states, out$model$n_symbols, time, id, out$model$sequence_lengths ) + x_attr <- attributes(W_A$X) out$model$W_A <- W_A$X[, -1, , drop = FALSE] + x_attr$dim[2] <- x_attr$dim[2] - 1L + attributes(out$model$W_A) <- x_attr out$model$rhos$A <- create_rho_A_inits( NULL, n_states, out$model$n_symbols, nrow(out$model$W_A), 0 ) out$extras$intercept_only <- FALSE np_rho_A <- W_A$n_pars } - if(is.null(feedback_formula)) { + if(is.null(autoregression_formula)) { out$model$W_B <- array( 0, c(0L, out$model$length_of_sequences - 1, out$model$n_sequences) ) @@ -62,14 +65,17 @@ build_fanhmm <- function( ) np_rho_B <- 0 } else { - W_B <- model_matrix_feedback_formula( - feedback_formula, data, + W_B <- model_matrix_autoregression_formula( + autoregression_formula, data, out$model$n_sequences, out$model$length_of_sequences, n_states, out$model$n_symbols, time, id, out$model$sequence_lengths ) + x_attr <- attributes(W_B$X) out$model$W_B <- W_B$X[, -1, , drop = FALSE] + x_attr$dim[2] <- x_attr$dim[2] - 1L + attributes(out$model$W_B) <- x_attr out$model$rhos$B <- create_rho_B_inits( NULL, n_states, out$model$n_symbols, nrow(out$model$W_B), 0 ) @@ -80,8 +86,14 @@ build_fanhmm <- function( out$model$observations <- out$model$observations[, -1] out$model$length_of_sequences <- out$model$length_of_sequences - 1 out$model$sequence_lengths <- out$model$sequence_lengths - 1 + x_attr <- attributes(out$model$X_A) out$model$X_A <- out$model$X_A[, -1, , drop = FALSE] + x_attr$dim[2] <- x_attr$dim[2] - 1L + attributes(out$model$X_A) <- x_attr + x_attr <- attributes(out$model$X_B) out$model$X_B <- out$model$X_B[, -1, , drop = FALSE] + x_attr$dim[2] <- x_attr$dim[2] - 1L + attributes(out$model$X_B) <- x_attr structure( c( out$model, diff --git a/R/create_initial_values.R b/R/create_initial_values.R index 5cf7f77..16f2973 100644 --- a/R/create_initial_values.R +++ b/R/create_initial_values.R @@ -307,13 +307,13 @@ create_rho_A_inits <- function(x, S, M, L, init_sd = 0) { create_rho_A(numeric((S - 1) * L * (M - 1) * S), S, M, L, init_sd) } else { stopifnot_( - length(x$rho_A) == (S - 1) * L * (M - 1) * S, + length(unlist(x$rho_A)) == (S - 1) * L * (M - 1) * S, paste0( "Number of initial values for {.val rho_A} is not equal to ", "(S - 1) * L * (M - 1) * S = {(S - 1) * L * (M - 1) * S}." ) ) - create_rho_A(x$rho_A, S, M, L, init_sd) + create_rho_A(unlist(x$rho_A), S, M, L, init_sd) } } @@ -329,13 +329,13 @@ create_rho_B_inits <- function(x, S, M, L, init_sd = 0) { create_rho_B(numeric((M - 1) * L * (M - 1) * S), S, M, L, init_sd) } else { stopifnot_( - length(x$rho_B) == (M - 1) * L * (M - 1) * S, + length(unlist(x$rho_B)) == (M - 1) * L * (M - 1) * S, paste0( "Number of initial values for {.val rho_B} is not equal to ", "(M - 1) * L * (M - 1) * S = {(M - 1) * L * (M - 1) * S}." ) ) - create_rho_B(x$rho_B, S, M, L, init_sd) + create_rho_B(unlist(x$rho_B), S, M, L, init_sd) } } diff --git a/R/dnm_fanhmm.R b/R/dnm_fanhmm.R index 939977c..613825f 100644 --- a/R/dnm_fanhmm.R +++ b/R/dnm_fanhmm.R @@ -11,6 +11,7 @@ dnm_fanhmm <- function(model, inits, init_sd, restarts, lambda, bound, control, n_a <- attr(model, "np_rho_B") icpt_only_pi <- attr(model$X_pi, "icpt_only") icpt_only_A <- attr(model$X_A, "icpt_only") + icpt_only_B <- attr(model$X_B, "icpt_only") iv_A <- attr(model$X_A, "iv") iv_B <- attr(model$X_B, "iv") tv_A <- attr(model$X_A, "tv") @@ -20,7 +21,6 @@ dnm_fanhmm <- function(model, inits, init_sd, restarts, lambda, bound, control, iv_A <- TRUE tv_A <- TRUE } - icpt_only_B <- attr(model$X_B, "icpt_only") if (!is.null(model$autoregression_formula)) { icpt_only_B <- FALSE iv_B <- TRUE diff --git a/R/simulate_fanhmm.R b/R/simulate_fanhmm.R new file mode 100644 index 0000000..b222d00 --- /dev/null +++ b/R/simulate_fanhmm.R @@ -0,0 +1,132 @@ +#' Simulate Non-homogeneous Hidden Markov Models +#' +#' Simulate sequences of observed and hidden states given the parameters of a +#' non-homogeneous hidden Markov model. +#' +#' @param n_sequences The number of sequences to simulate. +#' @param sequence_lengths The lengths of the simulated sequences. +#' Either a scalar or vector of length `n_sequences`. +#' @param n_symbols A scalar or vector of length `n_channels` giving the number +#' of observed symbols per channel. +#' @param n_states The number of hidden states. +#' @param coefs If `coefs = "random"` (default), random coefficient values are +#' used. Otherwise `coefs` should be named list of `eta_pi`, `eta_A`, +#' and `eta_B`. +#' @param init_sd Standard deviation of the normal distribution used to generate +#' random coefficient values. Default is `2`. +#' @inheritParams estimate_mnhmm +#' +#' @return A list with the model used in simulation as well as the simulated +#' hidden state sequences. +#' @export +simulate_fanhmm <- function( + n_sequences, sequence_lengths, n_symbols, n_states, + initial_formula = ~1, transition_formula = ~1, + emission_formula = ~1, autoregression_formula = ~1, + feedback_formula = ~1, obs_0, data, time, id, coefs = "random", init_sd = 2) { + + stopifnot_( + !missing(n_sequences) && checkmate::test_int(x = n_sequences, lower = 1L), + "Argument {.arg n_sequences} must be a single positive integer." + ) + stopifnot_( + !missing(sequence_lengths) && + checkmate::test_integerish(x = sequence_lengths, lower = 2L), + "Argument {.arg sequence_lengths} must contain positive integer(s) larger than 1." + ) + stopifnot_( + !missing(n_symbols) && checkmate::test_integerish(x = n_symbols, lower = 2L), + "Argument {.arg n_symbols} must contain positive integer(s) larger than 1." + ) + stopifnot_( + !missing(n_states) && checkmate::test_int(x = n_states, lower = 2L), + "Argument {.arg n_states} must be a single positive integer larger than 1." + ) + stopifnot_( + !missing(data), + "{.arg data} is missing, use {.fn simulate_mhmm} instead." + ) + sequence_lengths <- rep(sequence_lengths, length.out = n_sequences) + n_channels <- length(n_symbols) + stopifnot_( + n_channels == 1, + "Currently only single-channel responses are supported for FAN-HMM." + ) + stopifnot_( + !missing(obs_0) && + checkmate::test_integerish(x = obs_0, lower = 1L, upper = n_symbols), + "Argument {.arg obs_0} should be an integer vector of length {.val n_sequences}." + ) + symbol_names <- as.character(seq_len(n_symbols)) + T_ <- max(sequence_lengths) + obs <- suppressWarnings(suppressMessages( + seqdef(matrix(symbol_names[1], n_sequences, T_ + 1), + alphabet = symbol_names + ))) + data0 <- data[data[[time]] == min(data[[time]]), ] + data0[[time]] <- min(data[[time]]) - min(diff(sort(unique(data[[time]])))) + data <- rbind( + data0, + data + ) + model <- build_fanhmm( + obs, n_states, initial_formula, transition_formula, emission_formula, + autoregression_formula, feedback_formula, data, time, id + ) + if (identical(coefs, "random")) { + coefs <- list( + initial_probs = NULL, + transition_probs = NULL, + emission_probs = NULL) + } else { + if (is.null(coefs$initial_probs)) coefs$initial_probs <- NULL + if (is.null(coefs$transition_probs)) coefs$transition_probs <- NULL + if (is.null(coefs$emission_probs)) coefs$emission_probs <- NULL + } + model$etas <- setNames( + create_initial_values(coefs, model, init_sd), c("pi", "A", "B") + ) + model$gammas$pi <- eta_to_gamma_mat(model$etas$pi) + model$gammas$A <- eta_to_gamma_cube(model$etas$A) + model$gammas$B <- eta_to_gamma_cube(model$etas$B) + model$rhos$A <- create_rho_A_inits( + coefs$rho_A, n_states, n_symbols, nrow(model$W_A), init_sd + ) + model$rhos$B <- create_rho_B_inits( + coefs$rho_B, n_states, n_symbols, nrow(model$W_B), init_sd + ) + out <- simulate_fanhmm_singlechannel( + model$etas$pi, model$X_pi, + model$etas$A, model$X_A, + model$etas$B, model$X_B, + model$rhos$A, model$W_A, + model$rhos$B, model$W_B, + as.integer(obs_0) - 1 + ) + for (i in seq_len(model$n_sequences)) { + Ti <- sequence_lengths[i] + if (Ti < T_) { + out$states[(Ti + 1):T_, i] <- NA + out$observations[, (Ti + 1):T_, i] <- NA + } + } + state_names <- model$state_names + symbol_names <- model$symbol_names + out$states[] <- state_names[c(out$states) + 1] + states <- suppressWarnings(suppressMessages( + seqdef( + matrix( + t(out$states), + n_sequences, max(sequence_lengths) + ), + alphabet = state_names, cnames = seq_len(T_) + ) + )) + + out$observations[] <- symbol_names[c(out$observations) + 1] + model$observations <- suppressWarnings(suppressMessages( + seqdef(t(out$observations), alphabet = symbol_names, cnames = seq_len(T_)) + )) + + list(model = model, states = states) +} diff --git a/man/simulate_fanhmm.Rd b/man/simulate_fanhmm.Rd new file mode 100644 index 0000000..26eaebe --- /dev/null +++ b/man/simulate_fanhmm.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_fanhmm.R +\name{simulate_fanhmm} +\alias{simulate_fanhmm} +\title{Simulate Non-homogeneous Hidden Markov Models} +\usage{ +simulate_fanhmm( + n_sequences, + sequence_lengths, + n_symbols, + n_states, + initial_formula = ~1, + transition_formula = ~1, + emission_formula = ~1, + autoregression_formula = ~1, + feedback_formula = ~1, + obs_0, + data, + time, + id, + coefs = "random", + init_sd = 2 +) +} +\arguments{ +\item{n_sequences}{The number of sequences to simulate.} + +\item{sequence_lengths}{The lengths of the simulated sequences. +Either a scalar or vector of length \code{n_sequences}.} + +\item{n_symbols}{A scalar or vector of length \code{n_channels} giving the number +of observed symbols per channel.} + +\item{n_states}{The number of hidden states.} + +\item{initial_formula}{of class \code{\link[=formula]{formula()}} for the +initial state probabilities.} + +\item{transition_formula}{of class \code{\link[=formula]{formula()}} for the +state transition probabilities.} + +\item{emission_formula}{of class \code{\link[=formula]{formula()}} for the +state emission probabilities.} + +\item{data}{A data frame containing the variables used in the model +formulas. Can be omitted in case of model with no covariates and observations +given as \code{stslist} objects.} + +\item{time}{Name of the time index variable in \code{data}.} + +\item{id}{Name of the id variable in \code{data} identifying different +sequences.} + +\item{coefs}{If \code{coefs = "random"} (default), random coefficient values are +used. Otherwise \code{coefs} should be named list of \code{eta_pi}, \code{eta_A}, +and \code{eta_B}.} + +\item{init_sd}{Standard deviation of the normal distribution used to generate +random coefficient values. Default is \code{2}.} +} +\value{ +A list with the model used in simulation as well as the simulated +hidden state sequences. +} +\description{ +Simulate sequences of observed and hidden states given the parameters of a +non-homogeneous hidden Markov model. +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 55e4ea0..6755937 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -1119,6 +1119,27 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// simulate_fanhmm_singlechannel +Rcpp::List simulate_fanhmm_singlechannel(const arma::mat& eta_pi, const arma::mat& X_pi, const arma::cube& eta_A, const arma::cube& X_A, const arma::cube& eta_B, const arma::cube& X_B, const arma::field& rho_A, const arma::cube& W_A, const arma::field& rho_B, const arma::cube& W_B, const arma::uvec& obs_0); +RcppExport SEXP _seqHMM_simulate_fanhmm_singlechannel(SEXP eta_piSEXP, SEXP X_piSEXP, SEXP eta_ASEXP, SEXP X_ASEXP, SEXP eta_BSEXP, SEXP X_BSEXP, SEXP rho_ASEXP, SEXP W_ASEXP, SEXP rho_BSEXP, SEXP W_BSEXP, SEXP obs_0SEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type eta_pi(eta_piSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type X_pi(X_piSEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type eta_A(eta_ASEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type X_A(X_ASEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type eta_B(eta_BSEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type X_B(X_BSEXP); + Rcpp::traits::input_parameter< const arma::field& >::type rho_A(rho_ASEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type W_A(W_ASEXP); + Rcpp::traits::input_parameter< const arma::field& >::type rho_B(rho_BSEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type W_B(W_BSEXP); + Rcpp::traits::input_parameter< const arma::uvec& >::type obs_0(obs_0SEXP); + rcpp_result_gen = Rcpp::wrap(simulate_fanhmm_singlechannel(eta_pi, X_pi, eta_A, X_A, eta_B, X_B, rho_A, W_A, rho_B, W_B, obs_0)); + return rcpp_result_gen; +END_RCPP +} // viterbi_nhmm_singlechannel Rcpp::List viterbi_nhmm_singlechannel(arma::mat& eta_pi, const arma::mat& X_pi, arma::cube& eta_A, const arma::cube& X_A, arma::cube& eta_B, const arma::cube& X_B, const arma::umat& obs, const arma::uvec Ti, const bool icpt_only_pi, const bool icpt_only_A, const bool icpt_only_B, const bool iv_A, const bool iv_B, const bool tv_A, const bool tv_B); RcppExport SEXP _seqHMM_viterbi_nhmm_singlechannel(SEXP eta_piSEXP, SEXP X_piSEXP, SEXP eta_ASEXP, SEXP X_ASEXP, SEXP eta_BSEXP, SEXP X_BSEXP, SEXP obsSEXP, SEXP TiSEXP, SEXP icpt_only_piSEXP, SEXP icpt_only_ASEXP, SEXP icpt_only_BSEXP, SEXP iv_ASEXP, SEXP iv_BSEXP, SEXP tv_ASEXP, SEXP tv_BSEXP) { @@ -1746,6 +1767,7 @@ static const R_CallMethodDef CallEntries[] = { {"_seqHMM_simulate_nhmm_multichannel", (DL_FUNC) &_seqHMM_simulate_nhmm_multichannel, 7}, {"_seqHMM_simulate_mnhmm_singlechannel", (DL_FUNC) &_seqHMM_simulate_mnhmm_singlechannel, 8}, {"_seqHMM_simulate_mnhmm_multichannel", (DL_FUNC) &_seqHMM_simulate_mnhmm_multichannel, 9}, + {"_seqHMM_simulate_fanhmm_singlechannel", (DL_FUNC) &_seqHMM_simulate_fanhmm_singlechannel, 11}, {"_seqHMM_viterbi_nhmm_singlechannel", (DL_FUNC) &_seqHMM_viterbi_nhmm_singlechannel, 15}, {"_seqHMM_viterbi_nhmm_multichannel", (DL_FUNC) &_seqHMM_viterbi_nhmm_multichannel, 15}, {"_seqHMM_viterbi_mnhmm_singlechannel", (DL_FUNC) &_seqHMM_viterbi_mnhmm_singlechannel, 18}, diff --git a/src/nhmm_simulate.cpp b/src/nhmm_simulate.cpp index 70cf989..0a8288f 100644 --- a/src/nhmm_simulate.cpp +++ b/src/nhmm_simulate.cpp @@ -3,6 +3,8 @@ #include "get_parameters.h" #include "eta_to_gamma.h" +#include "create_Q.h" + // [[Rcpp::export]] Rcpp::List simulate_nhmm_singlechannel( const arma::mat& eta_pi, const arma::mat& X_pi, @@ -51,7 +53,7 @@ Rcpp::List simulate_nhmm_multichannel( arma::ucube y(C, T, N); arma::umat z(T, N); - + arma::mat gamma_pi = eta_to_gamma(eta_pi); arma::cube gamma_A = eta_to_gamma(eta_A); arma::field gamma_B = eta_to_gamma(eta_B); @@ -181,3 +183,56 @@ Rcpp::List simulate_mnhmm_multichannel( Rcpp::Named("states") = Rcpp::wrap(z) ); } + +// [[Rcpp::export]] +Rcpp::List simulate_fanhmm_singlechannel( + const arma::mat& eta_pi, const arma::mat& X_pi, + const arma::cube& eta_A, const arma::cube& X_A, + const arma::cube& eta_B, const arma::cube& X_B, + const arma::field& rho_A, const arma::cube& W_A, + const arma::field& rho_B, const arma::cube& W_B, + const arma::uvec& obs_0) { + arma::uword N = X_A.n_slices; + arma::uword T = X_A.n_cols; + arma::uword S = eta_A.n_slices; + arma::uword M = eta_B.n_rows + 1; + arma::umat y(T, N); + arma::umat z(T, N); + arma::mat Qs = create_Q(S); + arma::mat Qm = create_Q(M); + arma::mat gamma_pi = eta_to_gamma(eta_pi, Qs); + arma::cube gamma_A = eta_to_gamma(eta_A, Qs); + arma::cube gamma_B = eta_to_gamma(eta_B, Qm); + arma::field phi_A = rho_to_phi(rho_A, Qs); + arma::field phi_B = rho_to_phi(rho_B, Qm); + arma::vec pi(S); + arma::vec A(S); + arma::vec B(S); + arma::uvec seqS = arma::linspace(0, S - 1, S); + arma::uvec seqM = arma::linspace(0, M - 1, M); + for (arma::uword i = 0; i < N; i++) { + pi = get_pi(gamma_pi, X_pi.col(i)); + z(0, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqS, 1, false, pi)); + B = softmax( + gamma_B.slice(z(0, i)) * X_B.slice(i).col(0) + + phi_B(z(0, i)).slice(obs_0(i)) * W_B.slice(i).col(0) + ); + y(0, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqM, 1, false, B)); + for (arma::uword t = 1; t < T; t++) { + A = softmax( + gamma_A.slice(z(t - 1, i)) * X_A.slice(i).col(t - 1) + + phi_A(z(t - 1, i)).slice(y(t - 1, i)) * W_A.slice(i).col(t - 1) + ); + z(t, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqS, 1, false, A)); + B = softmax( + gamma_B.slice(z(t, i)) * X_B.slice(i).col(t) + + phi_B(z(t, i)).slice(y(t - 1, i)) * W_B.slice(i).col(t) + ); + y(t, i) = arma::as_scalar(Rcpp::RcppArmadillo::sample(seqM, 1, false, B)); + } + } + return Rcpp::List::create( + Rcpp::Named("observations") = Rcpp::wrap(y), + Rcpp::Named("states") = Rcpp::wrap(z) + ); +}