v0.5.1

pachadotdev · Jun 1, 2024 · 8f4b2de · 8f4b2de
1 parent 342ceea
commit 8f4b2de
Show file tree

Hide file tree

Showing 49 changed files with 1,010 additions and 408 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: capybara
 Type: Package
 Title: Fast and Memory Efficient Fitting of Linear Models With High-Dimensional
     Fixed Effects
-Version: 0.5.0
+Version: 0.5.1
 Authors@R: c(
     person(
         given = "Mauricio",
@@ -42,5 +42,6 @@ LinkingTo: cpp11, cpp11armadillo
 VignetteBuilder: knitr
 Config/testthat/edition: 3
 Remotes: 
+    pachadotdev/cpp11armadillo
     ropenscilabs/srr
 Roxygen: list(markdown = TRUE, roclets = c("namespace", "rd", "srr::srr_stats_roclet"))
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# capybara 0.5.1
+
+* Using `arma::field` consistently instead of `std::vector<std::vector<>>` for indices.
+* Linear algebra changes, such as using `arma::inv` instead of solving `arma::qr` for the inverse.
+* Replaces multiple for loops with dedicated Armadillo functions.
+
 # capybara 0.5.0
 
 * Avoids for loops in the C++ code, and instead uses Armadillo's functions.

diff --git a/R/cpp11.R b/R/cpp11.R
@@ -32,20 +32,12 @@ gamma_ <- function(mx, hessian, j, ppsi, v, nt_full) {
   .Call(`_capybara_gamma_`, mx, hessian, j, ppsi, v, nt_full)
 }
 
-chol_crossprod_ <- function(x) {
-  .Call(`_capybara_chol_crossprod_`, x)
+inv_ <- function(h) {
+  .Call(`_capybara_inv_`, h)
 }
 
-chol2inv_ <- function(r) {
-  .Call(`_capybara_chol2inv_`, r)
-}
-
-chol_ <- function(x) {
-  .Call(`_capybara_chol_`, x)
-}
-
-qr_rank_ <- function(x) {
-  .Call(`_capybara_qr_rank_`, x)
+rank_ <- function(x) {
+  .Call(`_capybara_rank_`, x)
 }
 
 solve_bias_ <- function(beta_uncorr, hessian, nt, b) {
@@ -68,8 +60,8 @@ update_nu_ <- function(y, mu, mu_eta) {
   .Call(`_capybara_update_nu_`, y, mu, mu_eta)
 }
 
-solve_beta_ <- function(mx, mnu, wtilde, epsilon, weighted) {
-  .Call(`_capybara_solve_beta_`, mx, mnu, wtilde, epsilon, weighted)
+solve_beta_ <- function(mx, mnu, wtilde, weighted) {
+  .Call(`_capybara_solve_beta_`, mx, mnu, wtilde, weighted)
 }
 
 solve_eta_ <- function(mx, mnu, nu, beta) {

diff --git a/R/feglm.R b/R/feglm.R
@@ -76,7 +76,6 @@ feglm <- function(
   check_data_(data)
 
   # Check validity of family ----
-  # TODO: Add quasi families later
   check_family_(family)
 
   # Check validity of control + Extract control list ----

diff --git a/R/generics_vcov.R b/R/generics_vcov.R
@@ -55,32 +55,25 @@ vcov.feglm <- function(
   H <- object[["Hessian"]]
   p <- ncol(H)
   if (type == "hessian") {
-    # Check if the Hessian is invertible and compute its inverse
-    R <- try(chol_(H), silent = TRUE)
-    if (inherits(R, "try-error")) {
+    # If the Hessian is invertible, compute its inverse
+    V <- try(inv_(H), silent = TRUE)
+    if (inherits(V, "try-error")) {
       V <- matrix(Inf, p, p)
-    } else {
-      V <- chol2inv_(R)
     }
   } else {
     G <- getScoreMatrix(object)
     if (type == "outer.product") {
       # Check if the OPG is invertible and compute its inverse
-      R <- try(chol_crossprod_(G), silent = TRUE)
-      if (inherits(R, "try-error")) {
+      V <- try(inv_(G), silent = TRUE)
+      if (inherits(V, "try-error")) {
         V <- matrix(Inf, p, p)
-      } else {
-        V <- chol2inv_(R)
       }
     } else {
       # Check if the Hessian is invertible and compute its inverse
-      R <- try(chol_(H), silent = TRUE)
-      if (inherits(R, "try-error")) {
+      V <- try(inv_(H), silent = TRUE)
+      if (inherits(V, "try-error")) {
         V <- matrix(Inf, p, p)
       } else {
-        # Compute the inverse of the empirical Hessian
-        A <- chol2inv_(R)
-
         # Compute inner part of the sandwich formula
         if (type == "sandwich") {
           B <- crossprod_(G, NA_real_, FALSE, FALSE)
@@ -102,7 +95,7 @@ vcov.feglm <- function(
             stop(
               paste(
                 "At least one cluster variable was not found.",
-                "Ensure to pass variables that are not part of the model",
+                "Ensure to pass vhttps://www.instagram.com/p/C7fss5CCzNL/ariables that are not part of the model",
                 "itself, but are required to compute clustered standard errors",
                 "to 'feglm'. This can be done via 'formula'. See documentation",
                 "for details."
@@ -148,7 +141,7 @@ vcov.feglm <- function(
         }
 
         # Sandwich formula
-        V <- sandwich_(A, B)
+        V <- sandwich_(V, B)
       }
     }
   }

diff --git a/R/helpers.R b/R/helpers.R
@@ -229,7 +229,7 @@ model_response_ <- function(data, formula) {
 }
 
 check_linear_dependence_ <- function(X, p) {
-  if (qr_rank_(X) < p) {
+  if (rank_(X) < p) {
     stop("Linear dependent terms detected.", call. = FALSE)
   }
 }

diff --git a/R/internals.R b/R/internals.R
@@ -25,7 +25,7 @@ felm_fit_ <- function(y, X, wt, k.list, control) {
 
   # Compute the OLS estimate
   # beta <- as.vector(qr.solve(MX, y, epsilon))
-  beta <- solve_beta_(MX, y, NA_real_, epsilon, FALSE)
+  beta <- solve_beta_(MX, y, NA_real_, FALSE)
 
   # Generate result list
   reslist <- list(
@@ -81,7 +81,7 @@ feglm_fit_ <- function(beta, eta, y, X, wt, k.list, family, control) {
     # Compute update step and update eta
     # beta.upd <- as.vector(qr.solve(MX * w.tilde, Mnu * w.tilde, epsilon))
     # eta.upd <- nu - as.vector(Mnu - MX %*% beta.upd)
-    beta.upd <- solve_beta_(MX, Mnu, w.tilde, epsilon, TRUE)
+    beta.upd <- solve_beta_(MX, Mnu, w.tilde, TRUE)
     eta.upd <- solve_eta_(MX, Mnu, nu, beta.upd)
 
     # Step-halving with three checks

diff --git a/README.Rmd b/README.Rmd
@@ -123,19 +123,19 @@ Median time for the different models in the book
 
 |package      |    PPML| Trade Diversion| Endogeneity| Reverse Causality| Non-linear/Phasing Effects| Globalization|
 |:------------|-------:|---------------:|-----------:|-----------------:|--------------------------:|-------------:|
-|Alpaca       | 346.4ms|           2.52s|       1.51s|            1.9s  |                      2.96s|         5.57s|
-|Base R       |   1.5m |           1.53m|      23.43m|           23.52m |                     23.16m|        24.85m|
-|**Capybara** |   440ms|           2.86s|       1.92s|            2.29s |                      2.94s|         4.46s|
-|Fixest       |  64.9ms|           503ms|    106.14ms|          145.04ms|                   243.61ms|       524.7ms|
+|Alpaca       |   261ms|             2s |         2s |               2s |                        3s |           6s |
+|Base R       |     2m |             2m |        23m |              24m |                       23m |          25m |
+|**Capybara** |   364ms|             3s |         1s |               2s |                        2s |           4s |
+|Fixest       |    69ms|           488ms|       125ms|             148ms|                      251ms|         497ms|
 
 Memory allocation for the same models
 
 |package      |    PPML| Trade Diversion| Endogeneity| Reverse Causality| Non-linear/Phasing Effects| Globalization|
 |:------------|-------:|---------------:|-----------:|-----------------:|--------------------------:|-------------:|
-|Alpaca       |   306MB|         340.8MB|     306.4MB|           335.9MB|                    394.6MB|       541.3MB|
-|Base R       |   2.7GB|           2.6GB|      11.9GB|           11.92GB|                    11.95GB|       11.97GB|
-|**Capybara** |   210MB|           235MB|       241MB|             249MB|                      263MB|         299MB|
-|Fixest       |  44.4MB|          36.4MB|      27.9MB|            32.2MB|                     40.9MB|        62.7MB|
+|Alpaca       |   306MB|           341MB|       306MB|             336MB|                      395MB|         541MB|
+|Base R       |     3GB|             3GB|        12GB|              12GB|                       12GB|          12GB|
+|**Capybara** |   211MB|           235MB|       243MB|             250MB|                      265MB|         302MB|
+|Fixest       |    44MB|            36MB|        27MB|              32MB|                       41MB|          63MB|
 
 # Debugging