refactored apes() and bias_corr()

pachadotdev · Jul 23, 2024 · 0f91f2c · 0f91f2c
1 parent 1cb54e7
commit 0f91f2c
Show file tree

Hide file tree

Showing 33 changed files with 254 additions and 157 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -73,8 +73,8 @@ importFrom(stats,rgamma)
 importFrom(stats,rlogis)
 importFrom(stats,rnorm)
 importFrom(stats,rpois)
-importFrom(stats,sd)
 importFrom(stats,terms)
+importFrom(stats,var)
 importFrom(stats,vcov)
 importFrom(utils,combn)
 useDynLib(capybara, .registration = TRUE)
diff --git a/NEWS.md b/NEWS.md
@@ -3,8 +3,7 @@
 * Moves all the heavy computation to C++ using Armadillo and it exports the 
   results to R. Previously, there were multiple data copies between R and C++
   that added overhead to the computations.
-* For a future release, I may rewrite the offset and APES computation to C++,
-  but with those the overhead is minimal.
+* The previous versions returned MX by default, now it has to be specified.
 
 # capybara 0.5.2
 

diff --git a/R/bias_corr.R b/R/bias_corr.R
@@ -16,7 +16,7 @@
 #'  weakly exogenous regressors, e.g. lagged outcome variables, we suggest to
 #'  choose a bandwidth between one and four. Note that the order of factors to
 #'  be partialed out is important for bandwidths larger than zero.
-#' @param panel.structure a string equal to \code{"classic"} or \code{"network"}
+#' @param panel_structure a string equal to \code{"classic"} or \code{"network"}
 #'  which determines the structure of the panel used. \code{"classic"} denotes
 #'  panel structures where for example the same cross-sectional units are
 #'  observed several times (this includes pseudo panels). \code{"network"}
@@ -59,28 +59,28 @@
 bias_corr <- function(
     object = NULL,
     L = 0L,
-    panel.structure = c("classic", "network")) {
+    panel_structure = c("classic", "network")) {
   # Check validity of 'object'
   if (is.null(object)) {
     stop("'object' has to be specified.", call. = FALSE)
   } else if (!inherits(object, "feglm")) {
     stop("'bias_corr' called on a non-'feglm' object.", call. = FALSE)
   }
 
-  # Check validity of 'panel.structure'
-  panel.structure <- match.arg(panel.structure)
+  # Check validity of 'panel_structure'
+  panel_structure <- match.arg(panel_structure)
 
   # Extract model information
-  beta.uncorr <- object[["coefficients"]]
+  beta_uncorr <- object[["coefficients"]]
   control <- object[["control"]]
   data <- object[["data"]]
   eps <- .Machine[["double.eps"]]
   family <- object[["family"]]
   formula <- object[["formula"]]
   lvls_k <- object[["lvls_k"]]
-  nms.sp <- names(beta.uncorr)
+  nms.sp <- names(beta_uncorr)
   nt <- object[["nobs"]][["nobs"]]
-  k.vars <- names(lvls_k)
+  k_vars <- names(lvls_k)
   k <- length(lvls_k)
 
   # Check if binary choice model
@@ -100,11 +100,11 @@ bias_corr <- function(
   }
 
   # Check if provided object matches requested panel structure
-  if (panel.structure == "classic") {
+  if (panel_structure == "classic") {
     if (!(k %in% c(1L, 2L))) {
       stop(
         paste(
-          "panel.structure == 'classic' expects a one- or two-way fixed",
+          "panel_structure == 'classic' expects a one- or two-way fixed",
           "effect model."
         ),
         call. = FALSE
@@ -114,7 +114,7 @@ bias_corr <- function(
     if (!(k %in% c(2L, 3L))) {
       stop(
         paste(
-          "panel.structure == 'network' expects a two- or three-way fixed",
+          "panel_structure == 'network' expects a two- or three-way fixed",
           "effects model."
         ),
         call. = FALSE
@@ -129,17 +129,17 @@ bias_corr <- function(
   wt <- object[["weights"]]
 
   # Generate auxiliary list of indexes for different sub panels
-  k.list <- get_index_list_(k.vars, data)
+  k_list <- get_index_list_(k_vars, data)
 
   # Compute derivatives and weights
   eta <- object[["eta"]]
   mu <- family[["linkinv"]](eta)
-  mu.eta <- family[["mu.eta"]](eta)
+  mu_eta <- family[["mu.eta"]](eta)
   v <- wt * (y - mu)
-  w <- wt * mu.eta
+  w <- wt * mu_eta
   z <- wt * partial_mu_eta_(eta, family, 2L)
   if (family[["link"]] != "logit") {
-    h <- mu.eta / family[["variance"]](mu)
+    h <- mu_eta / family[["variance"]](mu)
     v <- h * v
     w <- h * w
     z <- h * z
@@ -150,54 +150,54 @@ bias_corr <- function(
   if (control[["keep_mx"]]) {
     MX <- object[["MX"]]
   } else {
-    MX <- center_variables_(X, w, k.list, control[["center_tol"]], 10000L)
+    MX <- center_variables_r_(X, w, k_list, control[["center_tol"]], 10000L)
   }
 
   # Compute bias terms for requested bias correction
-  if (panel.structure == "classic") {
+  if (panel_structure == "classic") {
     # Compute \hat{B} and \hat{D}
-    b <- as.vector(group_sums_(MX * z, w, k.list[[1L]])) / 2.0 / nt
+    b <- as.vector(group_sums_(MX * z, w, k_list[[1L]])) / 2.0 / nt
     if (k > 1L) {
-      b <- b + as.vector(group_sums_(MX * z, w, k.list[[2L]])) / 2.0 / nt
+      b <- b + as.vector(group_sums_(MX * z, w, k_list[[2L]])) / 2.0 / nt
     }
 
     # Compute spectral density part of \hat{B}
     if (L > 0L) {
-      b <- (b + group_sums_spectral_(MX * w, v, w, L, k.list[[1L]])) / nt
+      b <- (b + group_sums_spectral_(MX * w, v, w, L, k_list[[1L]])) / nt
     }
   } else {
     # Compute \hat{D}_{1}, \hat{D}_{2}, and \hat{B}
-    b <- group_sums_(MX * z, w, k.list[[1L]]) / (2.0 * nt)
-    b <- (b + group_sums_(MX * z, w, k.list[[2L]])) / (2.0 * nt)
+    b <- group_sums_(MX * z, w, k_list[[1L]]) / (2.0 * nt)
+    b <- (b + group_sums_(MX * z, w, k_list[[2L]])) / (2.0 * nt)
     if (k > 2L) {
-      b <- (b + group_sums_(MX * z, w, k.list[[3L]])) / (2.0 * nt)
+      b <- (b + group_sums_(MX * z, w, k_list[[3L]])) / (2.0 * nt)
     }
 
     # Compute spectral density part of \hat{B}
     if (k > 2L && L > 0L) {
-      b <- (b + group_sums_spectral_(MX * w, v, w, L, k.list[[3L]])) / nt
+      b <- (b + group_sums_spectral_(MX * w, v, w, L, k_list[[3L]])) / nt
     }
   }
 
   # Compute bias-corrected structural parameters
-  beta <- beta.uncorr - solve(object[["hessian"]] / nt, b)
+  beta <- beta_uncorr - solve(object[["hessian"]] / nt, b)
   names(beta) <- nms.sp
 
   # Update \eta and first- and second-order derivatives
   eta <- feglm_offset_(object, X %*% beta)
   mu <- family[["linkinv"]](eta)
-  mu.eta <- family[["mu.eta"]](eta)
+  mu_eta <- family[["mu.eta"]](eta)
   v <- wt * (y - mu)
-  w <- wt * mu.eta
+  w <- wt * mu_eta
   if (family[["link"]] != "logit") {
-    h <- mu.eta / family[["variance"]](mu)
+    h <- mu_eta / family[["variance"]](mu)
     v <- h * v
     w <- h * w
     rm(h)
   }
 
   # Update centered regressor matrix
-  MX <- center_variables_r_(X, w, k.list, control[["center_tol"]], 10000L)
+  MX <- center_variables_r_(X, w, k_list, control[["center_tol"]], 10000L)
   colnames(MX) <- nms.sp
 
   # Update hessian
@@ -209,9 +209,9 @@ bias_corr <- function(
   object[["eta"]] <- eta
   if (control[["keep_mx"]]) object[["MX"]] <- MX
   object[["hessian"]] <- H
-  object[["coefficients.uncorr"]] <- beta.uncorr
-  object[["bias.term"]] <- b
-  object[["panel.structure"]] <- panel.structure
+  object[["coefficients_uncorr"]] <- beta_uncorr
+  object[["bias_term"]] <- b
+  object[["panel_structure"]] <- panel_structure
   object[["bandwidth"]] <- L
 
   # Add additional class to result list

diff --git a/R/capybara-package.R b/R/capybara-package.R
@@ -18,7 +18,7 @@
 #' @importFrom MASS negative.binomial theta.ml
 #' @importFrom rlang sym :=
 #' @importFrom stats as.formula binomial model.matrix na.omit gaussian poisson
-#'  pnorm printCoefmat rgamma rlogis rnorm rpois terms vcov predict sd
+#'  pnorm printCoefmat rgamma rlogis rnorm rpois terms vcov predict var
 #'  complete.cases
 #' @importFrom utils combn
 #' @useDynLib capybara, .registration = TRUE

diff --git a/R/cpp11.R b/R/cpp11.R
@@ -28,6 +28,10 @@ feglm_fit_ <- function(beta_r, eta_r, y_r, x_r, wt_r, theta, family, control, k_
   .Call(`_capybara_feglm_fit_`, beta_r, eta_r, y_r, x_r, wt_r, theta, family, control, k_list)
 }
 
+feglm_offset_fit_ <- function(eta_r, y_r, offset_r, wt_r, family, control, k_list) {
+  .Call(`_capybara_feglm_offset_fit_`, eta_r, y_r, offset_r, wt_r, family, control, k_list)
+}
+
 kendall_cor_ <- function(m) {
   .Call(`_capybara_kendall_cor_`, m)
 }

diff --git a/R/feglm.R b/R/feglm.R
@@ -157,19 +157,15 @@ feglm <- function(
   }
   dimnames(fit[["hessian"]]) <- list(nms_sp, nms_sp)
 
-  # Generate result list ----
-  reslist <- c(
-    fit, list(
-      nobs    = nobs,
-      lvls_k  = lvls_k,
-      nms_fe  = nms_fe,
-      formula = formula,
-      data    = data,
-      family  = family,
-      control = control
-    )
-  )
+  # Add to fit list ----
+  fit[["nobs"]] <- nobs
+  fit[["lvls_k"]] <- lvls_k
+  fit[["nms_fe"]] <- nms_fe
+  fit[["formula"]] <- formula
+  fit[["data"]] <- data
+  fit[["family"]] <- family
+  fit[["control"]] <- control
 
   # Return result list ----
-  structure(reslist, class = "feglm")
+  structure(fit, class = "feglm")
 }
diff --git a/R/feglm_control.R b/R/feglm_control.R
@@ -39,7 +39,7 @@ feglm_control <- function(
     limit = 10L,
     trace = FALSE,
     drop_pc = TRUE,
-    keep_mx = TRUE) {
+    keep_mx = FALSE) {
   # Check validity of tolerance parameters
   if (dev_tol <= 0.0 || center_tol <= 0.0) {
     stop(

diff --git a/R/feglm_offset.R b/R/feglm_offset.R
@@ -27,7 +27,7 @@ feglm_offset_ <- function(object, offset) {
   # Generate auxiliary list of indexes to project out the fixed effects
   k_list <- get_index_list_(k_vars, data)
 
-  # Compute starting guess for \eta
+  # Compute starting guess for eta
   if (family[["family"]] == "binomial") {
     eta <- rep(family[["linkfun"]](sum(wt * (y + 0.5) / 2.0) / sum(wt)), nt)
   } else if (family[["family"]] %in% c("Gamma", "inverse.gaussian")) {
@@ -36,54 +36,8 @@ feglm_offset_ <- function(object, offset) {
     eta <- rep(family[["linkfun"]](sum(wt * (y + 0.1)) / sum(wt)), nt)
   }
 
-  # Compute initial quantities for the maximization routine
-  mu <- family[["linkinv"]](eta)
-  dev <- sum(family[["dev.resids"]](y, mu, wt))
-  Myadj <- as.matrix(numeric(nt))
-
-  # Start maximization of the log-likelihood
-  for (iter in seq.int(iter_max)) {
-    # Store \eta, \beta, and deviance of the previous iteration
-    eta_old <- eta
-    dev_old <- dev
-
-    # Compute weights and dependent variable
-    mu.eta <- family[["mu.eta"]](eta)
-    w <- (wt * mu.eta^2) / family[["variance"]](mu)
-    yadj <- (y - mu) / mu.eta + eta - offset
-
-    # Centering dependent variable and compute \eta update
-    Myadj <- center_variables_r_(Myadj + yadj, w, k_list, center_tol, 10000L)
-    eta_upd <- yadj - drop(Myadj) + offset - eta
-
-    # Step-halving with three checks
-    # 1. finite deviance
-    # 2. valid \eta and \mu
-    # 3. improvement as in glm2
-    rho <- 1.0
-    for (inner.iter in seq.int(50L)) {
-      eta <- eta_old + rho * eta_upd
-      mu <- family[["linkinv"]](eta)
-      dev <- sum(family[["dev.resids"]](y, mu, wt))
-      dev.crit <- is.finite(dev)
-      val.crit <- family[["valideta"]](eta) && family[["validmu"]](mu)
-      imp.crit <- (dev - dev_old) / (0.1 + abs(dev)) <= -dev_tol
-      if (dev.crit && val.crit && imp.crit) break
-      rho <- rho / 2.0
-    }
-
-    # Check if step-halving failed
-    if (!dev.crit || !val.crit) {
-      stop("Inner loop failed; cannot correct step size.", call. = FALSE)
-    }
-
-    # Check termination condition
-    if (abs(dev - dev_old) / (0.1 + abs(dev)) < dev_tol) break
-
-    # Update starting guesses for acceleration
-    Myadj <- Myadj - yadj
-  }
-
   # Return eta
-  eta
+  if (is.integer(y)) { y <- as.numeric(y) }
+  feglm_offset_fit_(eta, y, offset, wt, family[["family"]], control,
+    k_list)
 }
diff --git a/R/helpers.R b/R/helpers.R
@@ -379,7 +379,7 @@ get_score_matrix_ <- function(object) {
     attr(X, "dimnames") <- NULL
 
     # Center variables
-    MX <- center_variables_(X, NA_real_, w, k.list, control[["center_tol"]], 10000L, FALSE)
+    MX <- center_variables_r_(X, w, k.list, control[["center_tol"]], 10000L)
     colnames(MX) <- nms_sp
   }
 

diff --git a/README.Rmd b/README.Rmd
@@ -125,7 +125,7 @@ Median time for the different models in the book
 |:------------|-------:|-----------------:|------------:|-----------------:|----------------------------:|--------------:|
 | Alpaca      |   0.4s |             2.6s |        1.6s |             2.0s |                        3.1s |          5.3s |
 | Base R      | 120.0s |             2.0m |     1380.0s |          1440.0s |                     1380.0s |       1500.0s |
-| **Capybara**|   0.3s |             2.2s |        1.4s |             1.7s |                        2.1s |          3.9s |
+| **Capybara**|   0.3s |             2.0s |        1.2s |             1.4s |                        1.7s |          3.4s |
 | Fixest      |   0.1s |             0.5s |        0.1s |             0.2s |                        0.3s |          0.5s |
 
 Memory allocation for the same models
@@ -134,7 +134,7 @@ Memory allocation for the same models
 |:------------|-------:|---------------:|-----------:|-----------------:|--------------------------:|-------------:|
 |Alpaca       |  307MB |          341MB |      306MB |            336MB |                     395MB |        541MB |
 |Base R       | 3000MB |         3000MB |    12000MB |          12000GB |                   12000GB |      12000MB |
-|**Capybara** |   29MB |           34MB |       21MB |             24MB |                      30MB |         47MB |
+|**Capybara** |   27MB |           32MB |       20MB |             23MB |                      29MB |         43MB |
 |Fixest       |   44MB |           36MB |       27MB |             32MB |                      41MB |         63MB |
 
 # Debugging

diff --git a/README.md b/README.md
@@ -121,7 +121,7 @@ Analysis](https://www.wto.org/english/res_e/publications_e/advancedguide2016_e.h
 | :----------- | -----: | --------------: | ----------: | ----------------: | -------------------------: | ------------: |
 | Alpaca       |   0.4s |            2.6s |        1.6s |              2.0s |                       3.1s |          5.3s |
 | Base R       | 120.0s |            2.0m |     1380.0s |           1440.0s |                    1380.0s |       1500.0s |
-| **Capybara** |   0.3s |            2.2s |        1.4s |              1.7s |                       2.1s |          3.9s |
+| **Capybara** |   0.3s |            2.0s |        1.2s |              1.4s |                       1.7s |          3.4s |
 | Fixest       |   0.1s |            0.5s |        0.1s |              0.2s |                       0.3s |          0.5s |
 
 Memory allocation for the same models
@@ -130,7 +130,7 @@ Memory allocation for the same models
 | :----------- | -----: | --------------: | ----------: | ----------------: | -------------------------: | ------------: |
 | Alpaca       |  307MB |           341MB |       306MB |             336MB |                      395MB |         541MB |
 | Base R       | 3000MB |          3000MB |     12000MB |           12000GB |                    12000GB |       12000MB |
-| **Capybara** |   29MB |            34MB |        21MB |              24MB |                       30MB |          47MB |
+| **Capybara** |   27MB |            32MB |        20MB |              23MB |                       29MB |          43MB |
 | Fixest       |   44MB |            36MB |        27MB |              32MB |                       41MB |          63MB |
 
 # Debugging

diff --git a/dev/benchmarks_tests_agtpa_capybara_only.R b/dev/benchmarks_tests_agtpa_capybara_only.R
@@ -1,7 +1,7 @@
 # this is not just about speed/memory, but also about obtaining the same
 # slopes as in base R
 
-library(alpaca)
+library(capybara)
 library(dplyr)
 library(tidyr)
 library(janitor)

diff --git a/dev/test-offset.R b/dev/test-offset.R
@@ -0,0 +1,18 @@
+test_that("offset works", {
+  m1 <- feglm(mpg ~ wt | cyl, data = mtcars, family = poisson())
+  y <- predict(m1, type = "response")
+  o1 <- feglm_offset_(m1, y)
+
+  # m2 <- alpaca::feglm(mpg ~ wt | cyl, data = mtcars, family = poisson())
+  # o2 <- drop(alpaca:::feglmOffset(m2, y)
+  # datapasta::vector_paste(round(o2, 4))
+  o2 <- c(
+    3.018703, 3.011154, 3.056387, 3.001613, 2.979713, 2.995091, 2.976723,
+    3.026537, 3.027809, 2.995612, 2.995612, 2.999650, 3.006936, 3.005836,
+    2.977558, 2.974679, 2.975975, 3.094682, 3.062526, 3.053450, 3.029361,
+    2.956144, 2.958109, 2.949010, 2.948902, 3.049442, 3.041447, 3.066858,
+    2.964431, 2.992499, 2.955002, 3.018302
+  )
+
+  expect_equal(round(o1, 4), round(o2, 4))
+})