remove penalty

helske · Oct 21, 2024 · 96e51c2 · 96e51c2
1 parent c09e2a9
commit 96e51c2
Show file tree

Hide file tree

Showing 14 changed files with 74 additions and 91 deletions.
diff --git a/R/ame.R b/R/ame.R
@@ -216,7 +216,7 @@ ame.mnhmm <- function(
   newdata[[variable]] <- values[2]
   model2 <- update(model, newdata)
 
-  if (!attr(model$X_omega, "iv")) {
+  if (!attr(model$X_cluster, "iv")) {
     X1 <- model1$X_cluster[, 1L, drop = FALSE]
     X2 <- model2$X_cluster[, 1L, drop = FALSE]
   } else {

diff --git a/R/bootstrap.R b/R/bootstrap.R
@@ -67,8 +67,6 @@ permute_clusters <- function(model, pcp_mle) {
 #' nonparametric or parametric bootstrap should be used. The former samples 
 #' sequences with replacement, whereas the latter simulates new datasets based 
 #' on the model.
-#' @param penalty penalty term for model estimation. By default, same penalty is used 
-#' as was in model estimation by [estimate_nhmm()] or [estimate_mnhmm()].
 #' @param verbose Should the progress bar be displayed? Default is `FALSE`.
 #' @param ... Additional arguments to [nloptr()].
 #' @return The original model with additional element `model$boot`, which 
@@ -83,16 +81,13 @@ bootstrap_coefs <- function(model, ...) {
 #' @export
 bootstrap_coefs.nhmm <- function(model, B = 1000, 
                                  method = c("nonparametric", "parametric"),
-                                 penalty, verbose = FALSE, ...) {
+                                 verbose = FALSE, ...) {
   method <- match.arg(method)
   stopifnot_(
     checkmate::test_int(x = B, lower = 0L), 
     "Argument {.arg B} must be a single positive integer."
   )
   init <- model$etas
-  if (missing(penalty)) {
-    penalty <- model$estimation_results$penalty
-  }
   gammas_mle <- model$gammas
   gamma_pi <- replicate(B, gammas_mle$pi, simplify = FALSE)
   gamma_A <- replicate(B, gammas_mle$A, simplify = FALSE)
@@ -102,7 +97,7 @@ bootstrap_coefs.nhmm <- function(model, B = 1000,
     for (i in seq_len(B)) {
       mod <- bootstrap_model(model)
       fit <- fit_nhmm(mod, init, init_sd = 0, restarts = 0, threads = 1, 
-                      penalty = penalty, ...)
+                      ...)
       fit$gammas <- permute_states(fit$gammas, gammas_mle)
       gamma_pi[[i]] <- fit$gammas$pi
       gamma_A[[i]] <- fit$gammas$A
@@ -125,7 +120,7 @@ bootstrap_coefs.nhmm <- function(model, B = 1000,
         N, T_, M, S, formula_pi, formula_A, formula_B,
         data = d, time, id, init)$model
       fit <- fit_nhmm(mod, init, init_sd = 0, restarts = 0, threads = 1, 
-                       penalty = penalty, ...)
+                      ...)
       fit$gammas <- permute_states(fit$gammas, gammas_mle)
       gamma_pi[[i]] <- fit$gammas$pi
       gamma_A[[i]] <- fit$gammas$A
@@ -141,16 +136,13 @@ bootstrap_coefs.nhmm <- function(model, B = 1000,
 #' @export
 bootstrap_coefs.mnhmm <- function(model, B = 1000, 
                                   method = c("nonparametric", "parametric"),
-                                  penalty, verbose = FALSE, ...) {
+                                  verbose = FALSE, ...) {
   method <- match.arg(method)
   stopifnot_(
     checkmate::test_int(x = B, lower = 0L), 
     "Argument {.arg B} must be a single positive integer."
   )
   init <- model$etas
-  if (missing(penalty)) {
-    penalty <- model$estimation_results$penalty
-  }
   gammas_mle <- model$gammas
   pcp_mle <- posterior_cluster_probabilities(model)
   gamma_pi <- replicate(B, gammas_mle$pi, simplify = FALSE)
@@ -163,7 +155,7 @@ bootstrap_coefs.mnhmm <- function(model, B = 1000,
     for (i in seq_len(B)) {
       mod <- bootstrap_model(model)
       fit <- fit_mnhmm(mod, init, init_sd = 0, restarts = 0, threads = 1, 
-                       penalty = penalty, ...)
+                       ...)
       fit <- permute_clusters(fit, pcp_mle)
       for (j in seq_len(D)) {
         out <- permute_states(
@@ -197,7 +189,7 @@ bootstrap_coefs.mnhmm <- function(model, B = 1000,
         N, T_, M, S, D, formula_pi, formula_A, formula_B, formula_omega,
         data = d, time, id, init)$model
       fit <- fit_mnhmm(mod, init, init_sd = 0, restarts = 0, threads = 1, 
-                        penalty = penalty, ...)
+                       ...)
       fit <- permute_clusters(fit, pcp_mle)
       for (j in seq_len(D)) {
         out <- permute_states(

diff --git a/R/check_build_arguments.R b/R/check_build_arguments.R
@@ -122,14 +122,10 @@
   dim(sequence_lengths) <- length(sequence_lengths)
   if (n_channels > 1L) {
     nobs <- sum(sapply(x, function(x) {
-      sum(!(x == attr(x[[1]], "nr") |
-              x == attr(x[[1]], "void") |
-              is.na(x)))
+      sum(!(x == attr(x, "nr") | x == attr(x, "void") | is.na(x)))
     })) / n_channels
   } else {
-    nobs <- sum(!(x == attr(x, "nr") |
-                    x == attr(x, "void") |
-                    is.na(x)))
+    nobs <- sum(!(x == attr(x, "nr") | x == attr(x, "void") | is.na(x)))
   }
 
   attr(x, "n_channels") <- ifelse(

diff --git a/R/estimate_mnhmm.R b/R/estimate_mnhmm.R
@@ -44,7 +44,7 @@ estimate_mnhmm <- function(
     transition_formula = ~1, emission_formula = ~1, cluster_formula = ~1,
     data = NULL, time = NULL, id = NULL, state_names = NULL, 
     channel_names = NULL, cluster_names = NULL, inits = "random", init_sd = 2, 
-    restarts = 0L, threads = 1L, store_data = TRUE, penalty = 0, ...) {
+    restarts = 0L, threads = 1L, store_data = TRUE, ...) {
 
   call <- match.call()
   model <- build_mnhmm(
@@ -59,7 +59,7 @@ estimate_mnhmm <- function(
   if (store_data) {
     model$data <- data
   }
-  out <- fit_mnhmm(model, inits, init_sd, restarts, threads, penalty, ...)
+  out <- fit_mnhmm(model, inits, init_sd, restarts, threads, ...)
 
   attr(out, "call") <- call
   out

diff --git a/R/estimate_nhmm.R b/R/estimate_nhmm.R
@@ -5,7 +5,7 @@
 #' (potentially) depend on covariates.
 #' 
 #' By default, the model parameters are estimated using LBFGS algorithm of 
-#' [nloptr::nloptr()]. The log-likelihood or the penalized log-likelihood is 
+#' [nloptr::nloptr()]. The log-likelihood is 
 #' scaled by the number of non-missing observations (`nobs(model)`), and the 
 #' convergence is claimed when either the absolute or relative change of this 
 #' objective function is less than `1e-8`, or the absolute change of the 
@@ -51,8 +51,6 @@
 #' is stored to the model object. For large datasets, this can be set to 
 #' `FALSE`, in which case you might need to pass the data separately to some 
 #' post-prosessing functions.
-#' @param penalty Penalty for penalized maximum likelihood. Default is `0` 
-#' i.e. no penalization as the penalty is of form `0.5 * sum(pars^2) * penalty`.
 #' @param ... Additional arguments to [nloptr::nloptr()]. Most importantly,
 #' argument `maxeval` defines the maximum number of iterations for optimization.
 #' The default is `1000` for restarts and `10000` for the final optimization. 
@@ -81,7 +79,7 @@ estimate_nhmm <- function(
     transition_formula = ~1, emission_formula = ~1, 
     data = NULL, time = NULL, id = NULL, state_names = NULL, channel_names = NULL, 
     inits = "random", init_sd = 2, restarts = 0L, threads = 1L, 
-    store_data = TRUE, penalty = 0, ...) {
+    store_data = TRUE, ...) {
 
   call <- match.call()
 
@@ -97,7 +95,7 @@ estimate_nhmm <- function(
   if (store_data) {
     model$data <- data
   }
-  out <- fit_nhmm(model, inits, init_sd, restarts, threads, penalty, ...)
+  out <- fit_nhmm(model, inits, init_sd, restarts, threads, ...)
   attr(out, "call") <- call
   out
 }
diff --git a/R/fit_mnhmm.R b/R/fit_mnhmm.R
@@ -1,7 +1,7 @@
 #' Estimate a Mixture Non-homogeneous Hidden Markov Model
 #'
 #' @noRd
-fit_mnhmm <- function(model, inits, init_sd, restarts, threads, penalty, 
+fit_mnhmm <- function(model, inits, init_sd, restarts, threads, 
                       save_all_solutions = FALSE, ...) {
   stopifnot_(
     checkmate::test_int(x = threads, lower = 1L), 
@@ -11,14 +11,15 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, penalty,
     checkmate::test_int(x = restarts, lower = 0L), 
     "Argument {.arg restarts} must be a single integer."
   )
-  obs <- create_obsArray(model)
-  if (model$n_channels == 1) {
-    obs <- array(obs, dim(obs)[2:3])
-  }
   M <- model$n_symbols
   S <- model$n_states
   D <- model$n_clusters
   T_ <- model$length_of_sequences
+  C <- model$n_channels
+  obs <- create_obsArray(model)
+  if (C == 1) {
+    obs <- array(obs, dim(obs)[2:3])
+  }
   if (identical(inits, "random")) {
     inits <- list(
       initial_probs = NULL, 
@@ -37,7 +38,7 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, penalty,
   iv_pi <- attr(model$X_initial, "iv")
   iv_A <- attr(model$X_transition, "iv")
   iv_B <- attr(model$X_emission, "iv")
-  iv_omega <- attr(model$X_omega, "iv")
+  iv_omega <- attr(model$X_cluster, "iv")
   tv_A <- attr(model$X_transition, "tv")
   tv_B <- attr(model$X_emission, "tv")
   X_i <- model$X_initial
@@ -68,7 +69,7 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, penalty,
     model$gammas$A <- c(eta_to_gamma_cube_field(
       model$etas$A
     ))
-    if (model$n_channels == 1L) {
+    if (C == 1L) {
       model$etas$B <- create_eta_B_mnhmm(
         pars[n_i + n_s + seq_len(n_o)], S, M, K_o, D
       )
@@ -107,7 +108,7 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, penalty,
   if (is.null(dots$check_derivatives)) 
     dots$check_derivatives <- FALSE
 
-  if (model$n_channels == 1L) {
+  if (C == 1L) {
     Qs <- t(create_Q(S))
     Qm <- t(create_Q(M))
     Qd <- t(create_Q(D))
@@ -126,8 +127,10 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, penalty,
           eta_omega, X_d, obs, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega, 
           Ti
         )
-        list(objective = - (out$loglik + 0.5 * sum(pars^2) * penalty) / n_obs,
-             gradient = - (unlist(out[-1]) + pars * penalty) / n_obs)
+        list(
+          objective = - out$loglik / n_obs, 
+          gradient = - unlist(out[-1]) / n_obs
+        )
       }
     } else {
       objectivef <- function(pars) {
@@ -144,7 +147,7 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, penalty,
           eta_omega, X_d, obs
         )
 
-        - (sum(apply(out[, T_, ], 2, logSumExp)) + 0.5 * sum(pars^2) * penalty) / n_obs
+        - sum(apply(out[, T_, ], 2, logSumExp)) / n_obs
       }
     }
   } else {
@@ -172,8 +175,10 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, penalty,
           eta_omega, X_d, obs, M, iv_pi, iv_A, iv_B, tv_A, tv_B, iv_omega,
           Ti
         )
-        list(objective = - (out$loglik + 0.5 * sum(pars^2) * penalty) / n_obs,
-             gradient = - (unlist(out[-1]) + pars * penalty) / n_obs)
+        list(
+          objective = - out$loglik / n_obs,
+          gradient = - unlist(out[-1]) / n_obs
+        )
       }
     } else {
       objectivef <- function(pars) {
@@ -196,8 +201,8 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, penalty,
           eta_B, X_o,
           eta_omega, X_d,
           obs, M)
-        
-        - (sum(apply(out[, T_, ], 2, logSumExp)) + 0.5 * sum(pars^2) * penalty) / n_obs
+
+        - sum(apply(out[, T_, ], 2, logSumExp)) / n_obs
       }
     }
   }
@@ -267,7 +272,7 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, penalty,
   model$gammas$A <- c(eta_to_gamma_cube_field(
     model$etas$A
   ))
-  if (model$n_channels == 1L) {
+  if (C == 1L) {
     model$etas$B <- create_eta_B_mnhmm(
       pars[n_i + n_s + seq_len(n_o)], S, M, K_o, D
     )
@@ -297,7 +302,6 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, penalty,
     logliks_of_restarts = if(restarts > 0L) logliks else NULL, 
     return_codes_of_restarts = if(restarts > 0L) return_codes else NULL,
     all_solutions = all_solutions,
-    penalty = penalty,
     time = end_time - start_time
   )
   model