diff --git a/NAMESPACE b/NAMESPACE
index feddd87a..c931c134 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -29,6 +29,8 @@ S3method(posterior_probs,hmm)
 S3method(posterior_probs,mhmm)
 S3method(posterior_probs,mnhmm)
 S3method(posterior_probs,nhmm)
+S3method(predict,mnhmm)
+S3method(predict,nhmm)
 S3method(print,hmm)
 S3method(print,mhmm)
 S3method(print,mnhmm)
@@ -47,14 +49,13 @@ S3method(vcov,mhmm)
 export("cluster_names<-")
 export("state_names<-")
 export(alphabet)
-export(average_marginal_effect)
+export(average_marginal_prediction)
 export(build_hmm)
 export(build_lcm)
 export(build_mhmm)
 export(build_mm)
 export(build_mmm)
 export(cluster_names)
-export(estimate_coef)
 export(estimate_mnhmm)
 export(estimate_nhmm)
 export(fit_model)
diff --git a/NEWS.md b/NEWS.md
index 2d2de5fb..a91a8aa9 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,13 +1,17 @@
-seqHMM 1.3.0
+seqHMM 2.0.0
 ==============
-  * Rewrote sequence visualization functions using `ggseqplot` and `patchwork` 
-  packages. Old plotting functions are deprecated and will be removed in the 
-  future.
   * Added support for non-homogeneous HMMs where initial, transition, and 
-  emission probabilities can depend on individual-specific covariates. 
-  Penalized maximum likelihood estimation of these models is based on 
-  L-BFGS-B with automatic differentiation with `Stan`.
+emission probabilities can depend on individual-specific covariates. 
+Penalized maximum likelihood estimation of these models is based on 
+L-BFGS-B with automatic differentiation with `Stan`.
+  * Rewrote sequence visualization functions using `ggseqplot` and `patchwork` 
+packages. Old plotting functions are deprecated and will be removed in the 
+future.
   * Warning and error messages were rewritten using `cli` package.
+  * Removed the function `estimate_coef`. A portion of the code was 
+accidentally commented out, rendering the function non-functional for 
+several years. Rather than correcting the code, the function was removed as it 
+was deemed unnecessary.
   * Added automatic tests using `testthat` package.
   * Internally switched from Rd syntax to Markdown in Roxygen documentation.
 
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 53f3d84a..12355e52 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -145,10 +145,6 @@ objectivex <- function(transition, emission, init, obs, ANZ, BNZ, INZ, nSymbols,
     .Call(`_seqHMM_objectivex`, transition, emission, init, obs, ANZ, BNZ, INZ, nSymbols, coef, X, numberOfStates, threads)
 }
 
-estimate_coefs <- function(transition, emission, init, obs, nSymbols, coef, X, numberOfStates, itermax, tol, trace, threads) {
-    .Call(`_seqHMM_estimate_coefs`, transition, emission, init, obs, nSymbols, coef, X, numberOfStates, itermax, tol, trace, threads)
-}
-
 softmax <- function(x, logspace) {
     .Call(`_seqHMM_softmax`, x, logspace)
 }
diff --git a/R/average_marginal_effect.R b/R/average_marginal_effect.R
deleted file mode 100644
index e008c92b..00000000
--- a/R/average_marginal_effect.R
+++ /dev/null
@@ -1,148 +0,0 @@
-#' Average Marginal Effects of Covariates of Non-homogenous Hidden Markov Models
-#' 
-#' @param model A Hidden Markov Model of class `nhmm` or `mnhmm`.
-#' @param variable Name of the variable of interest.
-#' @param values Vector containing two values for `variable`.
-#' @param newdata Optional data frame which is used for marginalization.
-#' @param nsim Non-negative integer defining the number of samples from the 
-#' normal approximation of the model parameters used in 
-#' computing the approximate quantiles of the estimates. If `0`, only point 
-#' estimates are returned.
-#' @param probs Vector defining the quantiles of interest. Default is 
-#' `c(0.025, 0.975)`.
-#' @export
-average_marginal_effect <- function(
-    model, variable, values, newdata = NULL, 
-    nsim = 0, 
-    probs = c(0.025, 0.975)) {
-  stopifnot_(
-    checkmate::test_count(nsim),
-    "Argument {.arg nsim} should be a single non-negative integer."
-  )
-  stopifnot_(
-    checkmate::test_string(x = variable), 
-    "Argument {.arg variable} must be a single character string."
-  )
-  stopifnot_(
-    length(values) != 2, 
-    "Argument {.arg values} should contain two values for 
-    variable {.var variable}.")
-  if (is.null(newdata)) {
-    time <- model$time_variable
-    id <- model$id_variable
-    stopifnot_(
-      is.data.frame(newdata), 
-      "Argument {.arg newdata} must be a {.cls data.frame} object."
-    )
-    stopifnot_(
-      !is.null(newdata[[id]]), 
-      "Can't find grouping variable {.var {id}} in {.arg newdata}."
-    )
-    stopifnot_(
-      !is.null(newdata[[time]]), 
-      "Can't find time index variable {.var {time}} in {.arg newdata}."
-    )
-    stopifnot_(
-      !is.null(newdata[[variable]]), 
-      "Can't find time variable {.var {variable}} in {.arg newdata}."
-    )
-  } else {
-    stopifnot(
-      !is.null(model$data),
-      "Model does not contain original data and argument {.arg newdata} is 
-      {.var NULL}."
-    )
-    newdata <- model$data
-  }
-  
-  beta_i_raw <- stan_to_cpp_initial(
-    model$estimation_results$parameters$beta_i_raw
-  )
-  beta_s_raw <- stan_to_cpp_transition(
-    model$estimation_results$parameters$beta_s_raw
-  )
-  beta_o_raw <- stan_to_cpp_emission(
-    model$estimation_results$parameters$beta_o_raw,
-    1,
-    C > 1
-  )
-  newdata[[variable]] <- values[1]
-  model <- update(model, newdata = newdata)
-  X_initial1 <- t(model$X_initial)
-  X_transition1 <- aperm(model$X_transition, c(3, 1, 2))
-  X_emission1 <- aperm(model$X_emission, c(3, 1, 2))
-  newdata[[variable]] <- values[2]
-  model <- update(model, newdata = newdata)
-  X_initial2 <- t(model$X_initial)
-  X_transition2 <- aperm(model$X_transition, c(3, 1, 2))
-  X_emission2 <- aperm(model$X_emission, c(3, 1, 2))
-  
-  ame_pi <- get_pi(beta_i_raw, X_initial1, 0) - 
-    get_pi(beta_i_raw, X_initial2, 0)
-  ame_A <- get_A(beta_s_raw, X_transition1, 0) - 
-    get_A(beta_s_raw, X_transition2, 0)
-  ame_B <- if (model$n_channels == 1) {
-    get_B(beta_o_raw, X_emission1, 0) - get_B(beta_o_raw, X_emission2, 0)
-  } else {
-    get_multichannel_B(
-      beta_o_raw, 
-      X_emission1, 
-      model$n_states, 
-      model$n_channels, 
-      model$n_symbols, 
-      0, 0) -
-      get_multichannel_B(
-        beta_o_raw, 
-        X_emission2, 
-        model$n_states, 
-        model$n_channels, 
-        model$n_symbols, 
-        0, 0)
-  }
-  if (nsim > 0) {
-    stopifnot_(
-      checkmate::test_numeric(
-        x = probs, lower = 0, upper = 1, any.missing = FALSE, min.len = 1L
-      ),
-      "Argument {.arg probs} must be a {.cls numeric} vector with values
-      between 0 and 1."
-    )
-    chol_precision <- chol(-model$estimation$hessian)
-    U <- backsolve(chol_precision, diag(ncol(chol_precision)))
-    x <- matrix(rnorm(nsim * ncol(U)), nrow = nsim) %*% U
-    x <- t(sweep(x, 2, c(beta_i_raw, beta_s_raw, beta_o_raw), "+"))
-    p_i <- length(beta_i_raw)
-    p_s <- length(beta_s_raw)
-    p_o <- length(beta_o_raw)
-    
-    pi_samples <- apply(
-      x[seq_len(p_i), ], 2, function(z) {
-        z <- array(z, dim = dim(beta_i_raw))
-        z <- stan_to_cpp_initial(z)
-        get_pi(z, X_initial1) - get_pi(z, X_initial2)
-      }
-    )
-    A_samples <- apply(
-      x[p_i + seq_len(p_s), ], 2, function(z) {
-        z <- array(z, dim = dim(beta_s_raw))
-        z <- stan_to_cpp_transition(z)
-        unlist(get_A(z, X_transition1)) - 
-          unlist(get_A(z, X_transition2))
-      }
-    )
-    B_samples <- apply(
-      x[p_i + p_s + seq_len(p_o), ], 2, function(z) {
-        z <- array(z, dim = dim(beta_o_raw))
-        z <- stan_to_cpp_emission(z, 1, model$n_channels > 1)
-        unlist(get_B(aperm(z, c(2, 3, 1)), X_emission1)) -
-          unlist(get_B(aperm(z, c(2, 3, 1)), X_emission2))
-      }
-    )
-    
-    quantiles <- fast_quantiles(samples, probs)
-    for(i in seq_along(probs)) {
-      transition_probs[paste0("q", 100 * probs[i])] <- quantiles[, i]
-    }
-  }
-  class(out) <- "ame"
-}
diff --git a/R/average_marginal_prediction.R b/R/average_marginal_prediction.R
new file mode 100644
index 00000000..d9d599d6
--- /dev/null
+++ b/R/average_marginal_prediction.R
@@ -0,0 +1,146 @@
+#' Average Marginal Predictions for Non-homogenous Hidden Markov Models
+#' 
+#' @param model A Hidden Markov Model of class `nhmm` or `mnhmm`.
+#' @param variable Name of the variable of interest.
+#' @param values Vector containing one or two values for `variable`.
+#' @param newdata Optional data frame which is used for marginalization.
+#' @param nsim Non-negative integer defining the number of samples from the 
+#' normal approximation of the model parameters used in 
+#' computing the approximate quantiles of the estimates. If `0`, only point 
+#' estimates are returned.
+#' @param probs Vector defining the quantiles of interest. Default is 
+#' `c(0.025, 0.5, 0.975)`.
+#' @export
+average_marginal_prediction <- function(
+    model, variable, values, marginalize_B_over = "sequences", newdata = NULL, 
+    nsim = 0, probs = c(0.025, 0.5, 0.975)) {
+  marginalize_over <- match.arg(
+    marginalize_over, c("sequences", "states", "clusters"), several.ok = TRUE)
+  stopifnot_(
+    checkmate::test_count(nsim),
+    "Argument {.arg nsim} should be a single non-negative integer."
+  )
+  stopifnot_(
+    checkmate::test_string(x = variable), 
+    "Argument {.arg variable} must be a single character string."
+  )
+  stopifnot_(
+    length(values) != 2, 
+    "Argument {.arg values} should contain two values for 
+    variable {.var variable}.")
+  if (is.null(newdata)) {
+    time <- model$time_variable
+    id <- model$id_variable
+    stopifnot_(
+      is.data.frame(newdata), 
+      "Argument {.arg newdata} must be a {.cls data.frame} object."
+    )
+    stopifnot_(
+      !is.null(newdata[[id]]), 
+      "Can't find grouping variable {.var {id}} in {.arg newdata}."
+    )
+    stopifnot_(
+      !is.null(newdata[[time]]), 
+      "Can't find time index variable {.var {time}} in {.arg newdata}."
+    )
+    stopifnot_(
+      !is.null(newdata[[variable]]), 
+      "Can't find time variable {.var {variable}} in {.arg newdata}."
+    )
+  } else {
+    stopifnot(
+      !is.null(model$data),
+      "Model does not contain original data and argument {.arg newdata} is 
+      {.var NULL}."
+    )
+    newdata <- model$data
+  }
+  # use same RNG seed so that the same samples of coefficients are drawn
+  newdata[[variable]] <- values[1]
+  seed <- sample(.Machine$integer.max, 1)
+  set.seed(seed)
+  pred <- predict(model, newdata, nsim, return_samples = TRUE)
+  if (length(values) == 2) {
+    newdata[[variable]] <- values[2]
+    set.seed(seed)
+    pred2 <- predict(model, newdata, nsim, return_samples = TRUE)
+    pred <- mapply("-", pred, pred2, SIMPLIFY = FALSE)
+  }
+  T <- model$length_of_sequences
+  N <- model$n_sequences
+  S <- model$n_states
+  M <- model$n_symbols
+  C <- model$n_channels
+  D <- model$n_clusters
+  if (C == 1) {
+    ids <- rownames(model$observations)
+    times <- colnames(model$observations)
+    symbol_names <- list(model$symbol_names)
+  } else {
+    ids <- rownames(model$observations[[1]])
+    times <- colnames(model$observations[[1]])
+    symbol_names <- model$symbol_names
+  }
+  marginalize <- dplyr::recode(
+    marginalize_B_over,
+    "clusters" = "cluster",
+    "states" = "state",
+    "sequences" = "id")
+  id_var <- model$id_variable
+  time_var <- model$time_variable
+  
+  pi <- data.frame(
+    cluster = rep(model$cluster_names, each = S * N),
+    id = rep(ids, each = S),
+    state = model$state_names,
+    estimate = unlist(pred$pi)
+  ) |> 
+    dplyr::group_by(.data[[marginalize]]) |>
+    summarise(estimate = mean(estimate))
+  colnames(pi)[2] <- id_var
+  
+  A <- data.frame(
+    cluster = rep(model$cluster_names, each = S^2 * T * N),
+    id = rep(ids, each = S^2 * T),
+    time = rep(times, each = S^2),
+    state_from = model$state_names,
+    state_to = rep(model$state_names, each = S),
+    estimate = unlist(pred$A)
+  )
+  colnames(A)[2] <- id_var
+  colnames(A)[3] <- time_var
+  
+  B <- data.frame(
+    cluster =  rep(model$cluster_names, each = S * sum(M) * T * N),
+    id = unlist(lapply(seq_len(C), function(i) rep(ids, each = S * M[i] * T))),
+    time = unlist(lapply(seq_len(C), function(i) rep(times, each = S * M[i]))),
+    state = model$state_names,
+    channel = unlist(lapply(seq_len(C), function(i) {
+      rep(model$channel_names[i], each = S * M[i]* T * N)
+    })),
+    observation = unlist(lapply(seq_len(C), function(i) {
+      rep(symbol_names[[i]], each = S)
+    })),
+    estimate = unlist(pred$B)
+  ) |> 
+    dplyr::group_by(.data[[marginalize]]) |>
+    summarise(estimate = mean(estimate))
+  idx <- which(names(B) == "id")
+  if(length(idx) == 1) names(B)[idx] <- id_var
+  idx <- which(names(B) == "time")
+  if(length(idx) == 1) names(B)[idx] <- time_var
+  if (C == 1) emission_probs$channel <- NULL
+  
+  if (D > 1) {
+    omega <- data.frame(
+      cluster = model$cluster_names,
+      id = rep(ids, each = D),
+      estimate = c(pred$omega)
+    )
+    out <- list(omega = omega, pi = pi, A = A, B = B)  
+  } else {
+    out <- list(pi = pi, A = A, B = B)
+  }
+  class(out) <- "amp"
+  out
+}
diff --git a/R/estimate_coef.R b/R/estimate_coef.R
deleted file mode 100644
index f94880ba..00000000
--- a/R/estimate_coef.R
+++ /dev/null
@@ -1,47 +0,0 @@
-#' Estimate Only the Regression Coefficients of Mixture Hidden Markov Models
-#'
-#' Function `estimate_coef` estimates the regression coefficients of 
-#' mixture hidden Markov models of class `mhmm` and its restricted variants 
-#' while keeping other parameters fixed.
-#'
-#' @export
-#' @param model An object of class `mhmm`.
-#' @param threads Number of threads to use in parallel computing. 
-#' The default is 1.
-estimate_coef <- function(model, threads = 1) {
-  stopifnot_(
-    inherits(model, "mhmm"),
-    "{.arg model} must be a {.cls mhmm} object."
-  )
-  stopifnot_(
-    checkmate::test_int(x = threads, lower = 1L), 
-    "Argument {.arg threads} must be a single positive integer."
-  )
-  df <- attr(model, "df")
-  nobs <- attr(model, "nobs")
-  original_model <- model
-  model <- .combine_models(model)
-  obsArray <- create_obsArray(model)
-  emissionArray <- create_emissionArray(model)
-
-  em.con <- list(print_level = 0, maxeval = 1000, reltol = 1e-10)
-
-  res <- estimate_coefs(
-    model$transition_probs, emissionArray, model$initial_probs, obsArray,
-    model$n_symbols, model$coefficients, model$X, model$n_states_in_clusters,
-    em.con$maxeval, em.con$reltol, em.con$print_level, threads
-  )
-  if (res$error != 0) {
-    err_msg <- switch(res$error,
-      "Scaling factors contain non-finite values.",
-      "Backward probabilities contain non-finite values.",
-      "Initial values of coefficients of covariates gives non-finite cluster probabilities.",
-      "Estimation of coefficients of covariates failed due to singular Hessian.",
-      "Estimation of coefficients of covariates failed due to non-finite cluster probabilities.",
-      "Non-finite log-likelihood"
-    )
-    stop_(paste("EM algorithm failed:", err_msg))
-  }
-  original_model$coefficients[] <- res$coefficients
-  original_model
-}
diff --git a/R/get_coefs.R b/R/get_coefs.R
index b133e511..a7a783b8 100644
--- a/R/get_coefs.R
+++ b/R/get_coefs.R
@@ -7,10 +7,10 @@
 #' computing the approximate quantiles of the estimates. If `0`, only point 
 #' estimates are returned.
 #' @param probs Vector defining the quantiles of interest. Default is 
-#' `c(0.025, 0.975)`.
+#' `c(0.025, 0.5, 0.975)`.
 #' @rdname coef
 #' @export
-coef.nhmm <- function(object, nsim = 0, probs = c(0.025, 0.975), ...) {
+coef.nhmm <- function(object, nsim = 0, probs = c(0.025, 0.5, 0.975), ...) {
   stopifnot_(
     checkmate::test_count(nsim),
     "Argument {.arg nsim} should be a single non-negative integer."
@@ -84,7 +84,7 @@ coef.nhmm <- function(object, nsim = 0, probs = c(0.025, 0.975), ...) {
 }
 #' @rdname coef
 #' @export
-coef.mnhmm <- function(object, nsim = 0, probs = c(0.025, 0.975), ...) {
+coef.mnhmm <- function(object, nsim = 0, probs = c(0.025, 0.5, 0.975), ...) {
   stopifnot_(
     checkmate::test_count(nsim),
     "Argument {.arg nsim} should be a single non-negative integer."
diff --git a/R/get_probs.R b/R/get_probs.R
index 4ad5b2d3..bc202acc 100644
--- a/R/get_probs.R
+++ b/R/get_probs.R
@@ -7,7 +7,7 @@
 #' computing the approximate quantiles of the estimates. If `0`, only point 
 #' estimates are returned.
 #' @param probs Vector defining the quantiles of interest. Default is 
-#' `c(0.025, 0.975)`.
+#' `c(0.025, 0.5, 0.975)`.
 #' @rdname get_probs
 #' @export
 get_probs <- function(model, ...) {
@@ -15,15 +15,15 @@ get_probs <- function(model, ...) {
 }
 #' @rdname get_probs
 #' @export
-get_probs.nhmm <- function(model, data = NULL, nsim = 0, 
-                           probs = c(0.025, 0.975), ...) {
+get_probs.nhmm <- function(model, newdata = NULL, nsim = 0, 
+                           probs = c(0.025, 0.5, 0.975), ...) {
   
   stopifnot_(
     checkmate::test_count(nsim),
     "Argument {.arg nsim} should be a single non-negative integer."
   )
-  if (!is.null(data)) {
-    model <- update(model, newdata = data)
+  if (!is.null(newdata)) {
+    model <- update(model, newdata = ne)
   }
   S <- model$n_states
   M <- model$n_symbols
@@ -125,16 +125,16 @@ get_probs.nhmm <- function(model, data = NULL, nsim = 0,
 }
 #' @rdname get_probs
 #' @export
-get_probs.mnhmm <- function(model, data = NULL, nsim = 0, 
-                            probs = c(0.025, 0.975), ...) {
+get_probs.mnhmm <- function(model, ne = NULL, nsim = 0, 
+                            probs = c(0.025, 0.5, 0.975), ...) {
   
   stopifnot_(
     checkmate::test_count(nsim),
     "Argument {.arg nsim} should be a single non-negative integer."
   )
   
-  if (!is.null(data)) {
-    model <- update(model, newdata = data)
+  if (!is.null(ne)) {
+    model <- update(model, newdata = ne)
   }
   
   T <- model$length_of_sequences
@@ -233,65 +233,18 @@ get_probs.mnhmm <- function(model, data = NULL, nsim = 0,
   )
   
   if (nsim > 0) {
-    stopifnot_(
-      checkmate::test_numeric(
-        x = probs, lower = 0, upper = 1, any.missing = FALSE, min.len = 1L
-      ),
-      "Argument {.arg probs} must be a {.cls numeric} vector with values
-      between 0 and 1."
-    )
-    chol_precision <- chol(-model$estimation$hessian)
-    U <- backsolve(chol_precision, diag(ncol(chol_precision)))
-    x <- matrix(rnorm(nsim * ncol(U)), nrow = nsim) %*% U
-    beta_i_raw <- model$estimation_results$parameters$beta_i_raw
-    beta_s_raw <- model$estimation_results$parameters$beta_s_raw
-    beta_o_raw <- model$estimation_results$parameters$beta_o_raw
-    theta_raw <- model$estimation_results$parameters$theta_raw
-    x <- t(sweep(x, 2, c(beta_i_raw, beta_s_raw, beta_o_raw, theta_raw), "+"))
-    p_i <- length(beta_i_raw)
-    p_s <- length(beta_s_raw)
-    p_o <- length(beta_o_raw)
-    p_d <- length(theta_raw)
-    
-    samples <- apply(
-      x[seq_len(p_i), ], 2, function(z) {
-        z <- array(z, dim = dim(beta_i_raw))
-        get_pi(z, X_initial)
-      }
-    )
-    quantiles <- fast_quantiles(samples, probs)
+    out <- sample_parameters(model, nsim, probs)
     for(i in seq_along(probs)) {
-      initial_probs[paste0("q", 100 * probs[i])] <- quantiles[, i]
+      initial_probs[paste0("q", 100 * probs[i])] <- out$quantiles_pi[, i]
     }
-    samples <- apply(
-      x[p_i + seq_len(p_s), ], 2, function(z) {
-        z <- array(z, dim = dim(beta_s_raw))
-        unlist(get_A(aperm(z, c(2, 3, 1)), X_transition))
-      }
-    )
-    quantiles <- fast_quantiles(samples, probs)
     for(i in seq_along(probs)) {
-      transition_probs[paste0("q", 100 * probs[i])] <- quantiles[, i]
+      transition_probs[paste0("q", 100 * probs[i])] <- out$quantiles_A[, i]
     }
-    samples <- apply(
-      x[p_i + p_s + seq_len(p_o), ], 2, function(z) {
-        z <- array(z, dim = dim(beta_o_raw))
-        unlist(get_B(aperm(z, c(2, 3, 1)), X_emission))
-      }
-    )
-    quantiles <- fast_quantiles(samples, probs)
     for(i in seq_along(probs)) {
-      emission_probs[paste0("q", 100 * probs[i])] <- quantiles[, i]
+      emission_probs[paste0("q", 100 * probs[i])] <- out$quantiles_B[, i]
     }
-    samples <- apply(
-      x[p_i + p_s + p_o + seq_len(p_d), ], 2, function(z) {
-        z <- array(z, dim = dim(theta_raw))
-        unlist(get_omega(z, X_cluster))
-      }
-    )
-    quantiles <- fast_quantiles(samples, probs)
     for(i in seq_along(probs)) {
-      cluster_probs[paste0("q", 100 * probs[i])] <- quantiles[, i]
+      cluster_probs[paste0("q", 100 * probs[i])] <- out$quantiles_omega[, i]
     }
   }
   list(
diff --git a/R/predict.R b/R/predict.R
new file mode 100644
index 00000000..b7cc2994
--- /dev/null
+++ b/R/predict.R
@@ -0,0 +1,163 @@
+#' Predict method for non-homogeneous hidden Markov models
+#' 
+#' @param model A Hidden Markov Model of class `nhmm` or `mnhmm`.
+#' @param newdata Optional data frame which is used for prediction.
+#' @param nsim Non-negative integer defining the number of samples from the
+#' normal approximation of the model coefficients.
+#' @param probs Vector defining the quantiles of interest.
+#' @param return_samples Logical indicating whether to return samples or
+#' quantiles.
+#' @export
+predict.nhmm <- function(
+    model, newdata = NULL, nsim = 0, 
+    probs = c(0.025, 0.5, 0.975), return_samples = FALSE) {
+  
+  stopifnot_(
+    checkmate::test_count(nsim),
+    "Argument {.arg nsim} should be a single non-negative integer."
+  )
+  if (!is.null(newdata)) {
+    time <- model$time_variable
+    id <- model$id_variable
+    stopifnot_(
+      is.data.frame(newdata), 
+      "Argument {.arg newdata} must be a {.cls data.frame} object."
+    )
+    stopifnot_(
+      !is.null(newdata[[id]]), 
+      "Can't find grouping variable {.var {id}} in {.arg newdata}."
+    )
+    stopifnot_(
+      !is.null(newdata[[time]]), 
+      "Can't find time index variable {.var {time}} in {.arg newdata}."
+    )
+    stopifnot_(
+      !is.null(newdata[[variable]]), 
+      "Can't find time variable {.var {variable}} in {.arg newdata}."
+    )
+  } else {
+    stopifnot(
+      !is.null(model$data),
+      "Model does not contain original data and argument {.arg newdata} is 
+      {.var NULL}."
+    )
+    model <- update(model, newdata = newdata)
+  }
+  
+  beta_i_raw <- stan_to_cpp_initial(
+    model$estimation_results$parameters$beta_i_raw
+  )
+  beta_s_raw <- stan_to_cpp_transition(
+    model$estimation_results$parameters$beta_s_raw
+  )
+  beta_o_raw <- stan_to_cpp_emission(
+    model$estimation_results$parameters$beta_o_raw,
+    1,
+    C > 1
+  )
+  X_initial <- t(model$X_initial)
+  X_transition <- aperm(model$X_transition, c(3, 1, 2))
+  X_emission <- aperm(model$X_emission, c(3, 1, 2))
+  out <- list()
+  out$pi <- get_pi(beta_i_raw, X_initial, 0)
+  out$A <- get_A(beta_s_raw, X_transition, 0)
+  out$B <- if (model$n_channels == 1) {
+    get_B(beta_o_raw, X_emission, 0)
+  } else {
+    get_multichannel_B(
+      beta_o_raw, 
+      X_emission1, 
+      model$n_states, 
+      model$n_channels, 
+      model$n_symbols, 
+      0, 0)
+  }
+  if (nsim > 0) {
+    samples <- sample_parameters(model, nsim, probs, return_samples)
+    if (return_samples) {
+      out$samples <- samples
+    } else {
+      out$quantiles <- samples
+    }
+  }
+  out
+}
+#' @export
+predict.mnhmm <- function(
+    model, newdata = NULL, nsim = 0, 
+    probs = c(0.025, 0.5, 0.975), return_samples = FALSE) {
+  
+  stopifnot_(
+    checkmate::test_count(nsim),
+    "Argument {.arg nsim} should be a single non-negative integer."
+  )
+  if (!is.null(newdata)) {
+    time <- model$time_variable
+    id <- model$id_variable
+    stopifnot_(
+      is.data.frame(newdata), 
+      "Argument {.arg newdata} must be a {.cls data.frame} object."
+    )
+    stopifnot_(
+      !is.null(newdata[[id]]), 
+      "Can't find grouping variable {.var {id}} in {.arg newdata}."
+    )
+    stopifnot_(
+      !is.null(newdata[[time]]), 
+      "Can't find time index variable {.var {time}} in {.arg newdata}."
+    )
+    stopifnot_(
+      !is.null(newdata[[variable]]), 
+      "Can't find time variable {.var {variable}} in {.arg newdata}."
+    )
+  } else {
+    stopifnot(
+      !is.null(model$data),
+      "Model does not contain original data and argument {.arg newdata} is 
+      {.var NULL}."
+    )
+    model <- update(model, newdata = newdata)
+  }
+  
+  beta_i_raw <- stan_to_cpp_initial(
+    model$estimation_results$parameters$beta_i_raw
+  )
+  beta_s_raw <- stan_to_cpp_transition(
+    model$estimation_results$parameters$beta_s_raw
+  )
+  beta_o_raw <- stan_to_cpp_emission(
+    model$estimation_results$parameters$beta_o_raw,
+    1,
+    C > 1
+  )
+  X_initial <- t(model$X_initial)
+  X_transition <- aperm(model$X_transition, c(3, 1, 2))
+  X_emission <- aperm(model$X_emission, c(3, 1, 2))
+  X_cluster <- t(model$X_cluster)
+  out <- list()
+  out$pi <- get_pi(beta_i_raw, X_initial, 0)
+  out$A <- get_A(beta_s_raw, X_transition, 0)
+  out$B <- if (model$n_channels == 1) {
+    get_B(beta_o_raw, X_emission, 0)
+  } else {
+    get_multichannel_B(
+      beta_o_raw, 
+      X_emission1, 
+      model$n_states, 
+      model$n_channels, 
+      model$n_symbols, 
+      0, 0)
+  }
+  out$omega <- get_omega(
+    model$estimation_results$parameters$theta_raw, X_cluster, 0
+  )
+  if (nsim > 0) {
+    samples <- sample_parameters(model, nsim, probs, return_samples)
+    if (return_samples) {
+      out$samples <- samples
+    } else {
+      out$quantiles <- samples
+    }
+  }
+  out
+}
\ No newline at end of file
diff --git a/R/sample_parameters.R b/R/sample_parameters.R
new file mode 100644
index 00000000..fa941a68
--- /dev/null
+++ b/R/sample_parameters.R
@@ -0,0 +1,85 @@
+#' Samples parameters of non-homegeneous Markov hidden models using the normal approximation.
+#' 
+#' @param model An object of class `nhmm` or `mnhmm`.
+#' @param nsim A non-negative integer defining the number of samples from the 
+#' normal approximation.
+#' @param probs A numeric vector defining the quantiles of interest.
+#' @param return_samples A logical indicating whether to return the samples 
+#' instead of quantiles.
+#' @noRd
+sample_parameters <- function(model, nsim, probs, return_samples = FALSE) {
+  stopifnot_(
+    checkmate::test_int(x = nsim, lower = 1L), 
+    "Argument {.arg nsim} must be a single positive integer."
+  )
+  stopifnot_(
+    !return_samples && checkmate::test_numeric(
+      x = probs, lower = 0, upper = 1, any.missing = FALSE, min.len = 1L
+    ),
+    "Argument {.arg probs} must be a {.cls numeric} vector with values
+      between 0 and 1."
+  )
+  mixture <- inherits(model, "mnhmm")
+  X_initial <- t(model$X_initial)
+  X_transition <- aperm(model$X_transition, c(3, 1, 2))
+  X_emission <- aperm(model$X_emission, c(3, 1, 2))
+  if (mixture) X_cluster <- t(model$X_cluster)
+  
+  chol_precision <- chol(-model$estimation$hessian)
+  U <- backsolve(chol_precision, diag(ncol(chol_precision)))
+  x <- matrix(rnorm(nsim * ncol(U)), nrow = nsim) %*% U
+  beta_i_raw <- model$estimation_results$parameters$beta_i_raw
+  beta_s_raw <- model$estimation_results$parameters$beta_s_raw
+  beta_o_raw <- model$estimation_results$parameters$beta_o_raw
+  pars <- c(beta_i_raw, beta_s_raw, beta_o_raw)
+  p_i <- length(beta_i_raw)
+  p_s <- length(beta_s_raw)
+  p_o <- length(beta_o_raw)
+  if (mixture) {
+    theta_raw <- model$estimation_results$parameters$theta_raw
+    pars <- c(pars, theta_raw)
+    if (mixture) p_d <- length(theta_raw)
+  }
+  x <- t(sweep(x, 2, pars, "+"))
+  samples_pi <- apply(
+    x[seq_len(p_i), ], 2, function(z) {
+      z <- array(z, dim = dim(beta_i_raw))
+      get_pi(z, X_initial)
+    }
+  )
+  samples_A <- apply(
+    x[p_i + seq_len(p_s), ], 2, function(z) {
+      z <- array(z, dim = dim(beta_s_raw))
+      unlist(get_A(aperm(z, c(2, 3, 1)), X_transition))
+    }
+  )
+  samples_B <- apply(
+    x[p_i + p_s + seq_len(p_o), ], 2, function(z) {
+      z <- array(z, dim = dim(beta_o_raw))
+      unlist(get_B(aperm(z, c(2, 3, 1)), X_emission))
+    }
+  )
+  if (mixture) {
+    samples_omega <- apply(
+      x[p_i + p_s + p_o + seq_len(p_d), ], 2, function(z) {
+        z <- array(z, dim = dim(theta_raw))
+        unlist(get_omega(z, X_cluster))
+      }
+    )
+  }
+  if (return_samples) {
+    list(
+      samples_pi = samples_pi,
+      samples_A = samples_A,
+      samples_B = samples_B,
+      samples_omega = if (mixture) samples_omega
+    )
+  } else {
+    list(
+      quantiles_pi = fast_quantiles(samples_pi, probs),
+      quantiles_A = fast_quantiles(samples_A, probs),
+      quantiles_B = fast_quantiles(samples_B, probs),
+      quantiles_omega = if (mixture) fast_quantiles(samples_omega, probs)
+    )
+  }
+}
\ No newline at end of file
diff --git a/R/summary.nhmm.R b/R/summary.nhmm.R
index b186ec3c..6fe5a98a 100644
--- a/R/summary.nhmm.R
+++ b/R/summary.nhmm.R
@@ -5,7 +5,7 @@
 #'
 #' @export
 #' @param object Non-homogeneous hidden Markov model of class `nhmm`.
-summary.nhmm <- function(object, nsim = 0, probs = c(0.025, 0.975), ...) {
+summary.nhmm <- function(object, nsim = 0, probs = c(0.025, 0.5, 0.975), ...) {
   ll <- logLik(object)
   out <- list(
     logLik = ll, BIC = BIC(ll),
@@ -21,7 +21,7 @@ summary.nhmm <- function(object, nsim = 0, probs = c(0.025, 0.975), ...) {
 #'
 #' @export
 #' @param object Non-homogeneous hidden Markov model of class `mnhmm`.
-summary.mnhmm <- function(object, nsim = 0, probs = c(0.025, 0.975), ...) {
+summary.mnhmm <- function(object, nsim = 0, probs = c(0.025, 0.5, 0.975), ...) {
   cf <- coef(object, nsim, probs)
   pr <- exp(object$X_cluster %*% object$estimation_results$parameters$theta_raw)
   prior_cluster_probabilities <- pr / rowSums(pr)
diff --git a/man/average_marginal_effect.Rd b/man/average_marginal_prediction.Rd
similarity index 57%
rename from man/average_marginal_effect.Rd
rename to man/average_marginal_prediction.Rd
index eb7cf5c3..48ef3f42 100644
--- a/man/average_marginal_effect.Rd
+++ b/man/average_marginal_prediction.Rd
@@ -1,16 +1,17 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/average_marginal_effect.R
-\name{average_marginal_effect}
-\alias{average_marginal_effect}
-\title{Average Marginal Effects of Covariates of Non-homogenous Hidden Markov Models}
+% Please edit documentation in R/average_marginal_prediction.R
+\name{average_marginal_prediction}
+\alias{average_marginal_prediction}
+\title{Average Marginal Predictions for Non-homogenous Hidden Markov Models}
 \usage{
-average_marginal_effect(
+average_marginal_prediction(
   model,
   variable,
   values,
+  marginalize_B_over = "sequences",
   newdata = NULL,
   nsim = 0,
-  probs = c(0.025, 0.975)
+  probs = c(0.025, 0.5, 0.975)
 )
 }
 \arguments{
@@ -18,7 +19,7 @@ average_marginal_effect(
 
 \item{variable}{Name of the variable of interest.}
 
-\item{values}{Vector containing two values for \code{variable}.}
+\item{values}{Vector containing one or two values for \code{variable}.}
 
 \item{newdata}{Optional data frame which is used for marginalization.}
 
@@ -28,8 +29,8 @@ computing the approximate quantiles of the estimates. If \code{0}, only point
 estimates are returned.}
 
 \item{probs}{Vector defining the quantiles of interest. Default is
-\code{c(0.025, 0.975)}.}
+\code{c(0.025, 0.5, 0.975)}.}
 }
 \description{
-Average Marginal Effects of Covariates of Non-homogenous Hidden Markov Models
+Average Marginal Predictions for Non-homogenous Hidden Markov Models
 }
diff --git a/man/coef.Rd b/man/coef.Rd
index b237fdf3..2ef115a4 100644
--- a/man/coef.Rd
+++ b/man/coef.Rd
@@ -6,9 +6,9 @@
 \title{Get the Estimated Regression Coefficients of Non-Homogeneous Hidden Markov
 Models}
 \usage{
-\method{coef}{nhmm}(object, nsim = 0, probs = c(0.025, 0.975), ...)
+\method{coef}{nhmm}(object, nsim = 0, probs = c(0.025, 0.5, 0.975), ...)
 
-\method{coef}{mnhmm}(object, nsim = 0, probs = c(0.025, 0.975), ...)
+\method{coef}{mnhmm}(object, nsim = 0, probs = c(0.025, 0.5, 0.975), ...)
 }
 \arguments{
 \item{object}{An object of class \code{nhmm} or \code{mnhmm}.}
@@ -19,7 +19,7 @@ computing the approximate quantiles of the estimates. If \code{0}, only point
 estimates are returned.}
 
 \item{probs}{Vector defining the quantiles of interest. Default is
-\code{c(0.025, 0.975)}.}
+\code{c(0.025, 0.5, 0.975)}.}
 }
 \description{
 Get the Estimated Regression Coefficients of Non-Homogeneous Hidden Markov
diff --git a/man/estimate_coef.Rd b/man/estimate_coef.Rd
deleted file mode 100644
index e76884c5..00000000
--- a/man/estimate_coef.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/estimate_coef.R
-\name{estimate_coef}
-\alias{estimate_coef}
-\title{Estimate Only the Regression Coefficients of Mixture Hidden Markov Models}
-\usage{
-estimate_coef(model, threads = 1)
-}
-\arguments{
-\item{model}{An object of class \code{mhmm}.}
-
-\item{threads}{Number of threads to use in parallel computing.
-The default is 1.}
-}
-\description{
-Function \code{estimate_coef} estimates the regression coefficients of
-mixture hidden Markov models of class \code{mhmm} and its restricted variants
-while keeping other parameters fixed.
-}
diff --git a/man/get_probs.Rd b/man/get_probs.Rd
index 72227fe5..42440daa 100644
--- a/man/get_probs.Rd
+++ b/man/get_probs.Rd
@@ -9,9 +9,9 @@ or MNHMM}
 \usage{
 get_probs(model, ...)
 
-\method{get_probs}{nhmm}(model, data = NULL, nsim = 0, probs = c(0.025, 0.975), ...)
+\method{get_probs}{nhmm}(model, newdata = NULL, nsim = 0, probs = c(0.025, 0.5, 0.975), ...)
 
-\method{get_probs}{mnhmm}(model, data = NULL, nsim = 0, probs = c(0.025, 0.975), ...)
+\method{get_probs}{mnhmm}(model, ne = NULL, nsim = 0, probs = c(0.025, 0.5, 0.975), ...)
 }
 \arguments{
 \item{model}{An object of class \code{nhmm} or \code{mnhmm}.}
@@ -22,7 +22,7 @@ computing the approximate quantiles of the estimates. If \code{0}, only point
 estimates are returned.}
 
 \item{probs}{Vector defining the quantiles of interest. Default is
-\code{c(0.025, 0.975)}.}
+\code{c(0.025, 0.5, 0.975)}.}
 }
 \description{
 Get the Estimated Initial, Transition, and Emission Probabilities for NHMM
diff --git a/man/predict.nhmm.Rd b/man/predict.nhmm.Rd
new file mode 100644
index 00000000..c6dd5881
--- /dev/null
+++ b/man/predict.nhmm.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/predict.R
+\name{predict.nhmm}
+\alias{predict.nhmm}
+\title{Predict method for non-homogeneous hidden Markov models}
+\usage{
+\method{predict}{nhmm}(
+  model,
+  newdata = NULL,
+  nsim = 0,
+  probs = c(0.025, 0.5, 0.975),
+  return_samples = FALSE
+)
+}
+\arguments{
+\item{model}{A Hidden Markov Model of class \code{nhmm} or \code{mnhmm}.}
+
+\item{newdata}{Optional data frame which is used for prediction.}
+
+\item{nsim}{Non-negative integer defining the number of samples from the
+normal approximation of the model coefficients.}
+
+\item{probs}{Vector defining the quantiles of interest.}
+
+\item{return_samples}{Logical indicating whether to return samples or
+quantiles.}
+}
+\description{
+Predict method for non-homogeneous hidden Markov models
+}
diff --git a/man/summary.mnhmm.Rd b/man/summary.mnhmm.Rd
index f57868a1..78333686 100644
--- a/man/summary.mnhmm.Rd
+++ b/man/summary.mnhmm.Rd
@@ -4,7 +4,7 @@
 \alias{summary.mnhmm}
 \title{Summary method for non-homogeneous hidden Markov models}
 \usage{
-\method{summary}{mnhmm}(object, nsim = 0, probs = c(0.025, 0.975), ...)
+\method{summary}{mnhmm}(object, nsim = 0, probs = c(0.025, 0.5, 0.975), ...)
 }
 \arguments{
 \item{object}{Non-homogeneous hidden Markov model of class \code{mnhmm}.}
diff --git a/man/summary.nhmm.Rd b/man/summary.nhmm.Rd
index dbcc9991..6f89932c 100644
--- a/man/summary.nhmm.Rd
+++ b/man/summary.nhmm.Rd
@@ -4,7 +4,7 @@
 \alias{summary.nhmm}
 \title{Summary of Non-homogeneous Hidden Markov Models}
 \usage{
-\method{summary}{nhmm}(object, nsim = 0, probs = c(0.025, 0.975), ...)
+\method{summary}{nhmm}(object, nsim = 0, probs = c(0.025, 0.5, 0.975), ...)
 }
 \arguments{
 \item{object}{Non-homogeneous hidden Markov model of class \code{nhmm}.}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index b405cec6..914ad599 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -623,28 +623,6 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// estimate_coefs
-Rcpp::List estimate_coefs(const arma::mat& transition, const arma::cube& emission, const arma::vec& init, const arma::ucube& obs, const arma::uvec& nSymbols, arma::mat coef, const arma::mat& X, const arma::uvec& numberOfStates, int itermax, double tol, int trace, unsigned int threads);
-RcppExport SEXP _seqHMM_estimate_coefs(SEXP transitionSEXP, SEXP emissionSEXP, SEXP initSEXP, SEXP obsSEXP, SEXP nSymbolsSEXP, SEXP coefSEXP, SEXP XSEXP, SEXP numberOfStatesSEXP, SEXP itermaxSEXP, SEXP tolSEXP, SEXP traceSEXP, SEXP threadsSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const arma::mat& >::type transition(transitionSEXP);
-    Rcpp::traits::input_parameter< const arma::cube& >::type emission(emissionSEXP);
-    Rcpp::traits::input_parameter< const arma::vec& >::type init(initSEXP);
-    Rcpp::traits::input_parameter< const arma::ucube& >::type obs(obsSEXP);
-    Rcpp::traits::input_parameter< const arma::uvec& >::type nSymbols(nSymbolsSEXP);
-    Rcpp::traits::input_parameter< arma::mat >::type coef(coefSEXP);
-    Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP);
-    Rcpp::traits::input_parameter< const arma::uvec& >::type numberOfStates(numberOfStatesSEXP);
-    Rcpp::traits::input_parameter< int >::type itermax(itermaxSEXP);
-    Rcpp::traits::input_parameter< double >::type tol(tolSEXP);
-    Rcpp::traits::input_parameter< int >::type trace(traceSEXP);
-    Rcpp::traits::input_parameter< unsigned int >::type threads(threadsSEXP);
-    rcpp_result_gen = Rcpp::wrap(estimate_coefs(transition, emission, init, obs, nSymbols, coef, X, numberOfStates, itermax, tol, trace, threads));
-    return rcpp_result_gen;
-END_RCPP
-}
 // softmax
 arma::vec softmax(const arma::vec& x, const int logspace);
 RcppExport SEXP _seqHMM_softmax(SEXP xSEXP, SEXP logspaceSEXP) {
@@ -861,7 +839,6 @@ static const R_CallMethodDef CallEntries[] = {
     {"_seqHMM_log_objectivex", (DL_FUNC) &_seqHMM_log_objectivex, 12},
     {"_seqHMM_objective", (DL_FUNC) &_seqHMM_objective, 9},
     {"_seqHMM_objectivex", (DL_FUNC) &_seqHMM_objectivex, 12},
-    {"_seqHMM_estimate_coefs", (DL_FUNC) &_seqHMM_estimate_coefs, 12},
     {"_seqHMM_softmax", (DL_FUNC) &_seqHMM_softmax, 2},
     {"_seqHMM_varcoef", (DL_FUNC) &_seqHMM_varcoef, 2},
     {"_seqHMM_viterbi", (DL_FUNC) &_seqHMM_viterbi, 4},
diff --git a/src/sa_opt_coef.cpp b/src/sa_opt_coef.cpp
deleted file mode 100644
index a335155a..00000000
--- a/src/sa_opt_coef.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-//stand-alone coefficient estimation
-#include "optcoef.h"
-#include "forward_backward.h"
-#include "reparma.h"
-
-// [[Rcpp::export]]
-Rcpp::List estimate_coefs(const arma::mat& transition, const arma::cube& emission, 
-  const arma::vec& init, const arma::ucube& obs, const arma::uvec& nSymbols, 
-  arma::mat coef, const arma::mat& X, const arma::uvec& numberOfStates, 
-  int itermax, double tol, int trace, unsigned int threads) {
-
-  coef.col(0).zeros();
-  arma::mat weights = exp(X * coef).t();
-  if (!weights.is_finite()) {
-    return Rcpp::List::create(Rcpp::Named("error") = 3);
-  }
-  weights.each_row() /= sum(weights, 0);
-
-  arma::mat initk(emission.n_rows, obs.n_slices);
-  for (unsigned int k = 0; k < obs.n_slices; k++) {
-    initk.col(k) = init % reparma(weights.col(k), numberOfStates);
-  }
-
-  arma::cube alpha(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k
-  arma::cube beta(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k
-  arma::mat scales(obs.n_cols, obs.n_slices); //m,n,k
-
-  arma::sp_mat sp_trans(transition);
-  internalForwardx(sp_trans.t(), emission, initk, obs, alpha, scales, threads);
-  if(!scales.is_finite()) {
-    return Rcpp::List::create(Rcpp::Named("error") = 1);
-  }
-  internalBackwardx(sp_trans, emission, obs, beta, scales, threads);
-  if(!beta.is_finite()) {
-    return Rcpp::List::create(Rcpp::Named("error") = 2);
-  }
-  double max_sf = scales.max();
-  if (max_sf > 1e150) {
-    Rcpp::warning("Largest scaling factor was %e, results can be numerically unstable.", max_sf);
-  }
-
-  double sumlogLik = -arma::accu(log(scales));
-
-  if (trace > 0) {
-    Rcpp:: Rcout << "Log-likelihood of initial model: " << sumlogLik << std::endl;
-  }
-
-  double change = tol + 1.0;
-  int iter = 0;
-
-  arma::uvec cumsumstate = arma::cumsum(numberOfStates);
-
-  while ((change > tol) && (iter < itermax)) {
-    iter++;
-  //
-  // unsigned int error = optCoef(weights, obs, emission, initk, beta, scales, coef, X, cumsumstate,
-  //                                  numberOfStates, trace);
-  // if (error != 0) {
-  //   return Rcpp::List::create(Rcpp::Named("error") = error);
-  // }
-
-  for (unsigned int k = 0; k < obs.n_slices; k++) {
-    initk.col(k) = init % reparma(weights.col(k), numberOfStates);
-  }
-
-  internalForwardx(sp_trans.t(), emission, initk, obs, alpha, scales, threads);
-  if(!scales.is_finite()) {
-    return Rcpp::List::create(Rcpp::Named("error") = 1);
-  }
-  internalBackwardx(sp_trans, emission, obs, beta, scales, threads);
-  if(!beta.is_finite()) {
-    return Rcpp::List::create(Rcpp::Named("error") = 2);
-  }
-  double max_sf = scales.max();
-  if (max_sf > 1e150) {
-    Rcpp::warning("Largest scaling factor was %e, results can be numerically unstable.", max_sf);
-  }
-  double tmp = -arma::accu(log(scales));
-  change = (tmp - sumlogLik) / (std::abs(sumlogLik) + 0.1);
-  sumlogLik = tmp;
-  if (trace > 1) {
-    Rcpp::Rcout << "iter: " << iter;
-    Rcpp::Rcout << " logLik: " << sumlogLik;
-    Rcpp::Rcout << " relative change: " << change << std::endl;
-  }
-
-  }
-  if (trace > 0) {
-    if (iter == itermax) {
-      Rcpp::Rcout << "EM algorithm stopped after reaching the maximum number of " << iter
-                  << " iterations." << std::endl;
-    } else {
-      Rcpp::Rcout << "EM algorithm stopped after reaching the relative change of " << change;
-      Rcpp::Rcout << " after " << iter << " iterations." << std::endl;
-    }
-    Rcpp::Rcout << "Final log-likelihood: " << sumlogLik << std::endl;
-
-  }
-  return Rcpp::List::create(Rcpp::Named("coefficients") = Rcpp::wrap(coef), Rcpp::Named("error") = 0);
-}
diff --git a/tests/testthat/test-estimate_coef.R b/tests/testthat/test-estimate_coef.R
deleted file mode 100644
index 51d562cd..00000000
--- a/tests/testthat/test-estimate_coef.R
+++ /dev/null
@@ -1,26 +0,0 @@
-
-test_that("'estimate_coef' works for 'mhmm'", {
-  set.seed(123)
-  data("mhmm_biofam")
-  expect_error(
-    out <- estimate_coef(mhmm_biofam),
-    NA
-  )
-  expect_gte(out, logLik(mhmm_biofam))
-})
-
-test_that("'estimate_coef' errors for 'hmm'", {
-  set.seed(123)
-  data("hmm_biofam")
-  expect_error(
-    out <- estimate_coef(hmm_biofam),
-    "`model` must be a `mhmm` object."
-  )
-})
-test_that("'estimate_coef' errors for incorrect 'threads'", {
-  data("mhmm_biofam")
-  expect_error(
-    out <- estimate_coef(mhmm_biofam, threads = "a"),
-    "Argument `threads` must be a single positive integer"
-  )
-})
diff --git a/tests/testthat/test-simulate_mnhmm.R b/tests/testthat/test-simulate_mnhmm.R
index bd2ead34..f1c2cf31 100644
--- a/tests/testthat/test-simulate_mnhmm.R
+++ b/tests/testthat/test-simulate_mnhmm.R
@@ -1,6 +1,6 @@
 set.seed(1)
-p <- 100
-n <- 20
+p <- 50
+n <- 10
 d <- data.frame(
   person = rep(1:p, each = n),
   month = rep(1:n, p),
@@ -22,5 +22,18 @@ test_that("simulate_mnhmm works", {
       coefs = "random", init_sd = 2),
     NA
   )
+  expect_equal(
+    table(unlist(sim$states)),
+    structure(
+      c(`Cluster 1: State 1` = 305L, `Cluster 1: State 2` = 304L, 
+        `Cluster 2: State 1` = 355L, `Cluster 2: State 2` = 335L, 
+        `Cluster 3: State 1` = 380L, `Cluster 3: State 2` = 321L, 
+        `*` = 0L, `%` = 0L), dim = 8L, 
+      dimnames = structure(list(
+        c("Cluster 1: State 1", "Cluster 1: State 2", "Cluster 2: State 1", 
+          "Cluster 2: State 2", "Cluster 3: State 1", "Cluster 3: State 2", 
+          "*", "%")), names = ""), class = "table")
+    
+  )
 })