Skip to content

Commit

Permalink
v0.3
Browse files Browse the repository at this point in the history
  • Loading branch information
pachadotdev committed Mar 3, 2024
1 parent 2634f5c commit d998c04
Show file tree
Hide file tree
Showing 46 changed files with 456 additions and 140 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: capybara
Type: Package
Title: Fast and Memory Efficient Fitting of Linear Models With High-Dimensional
Fixed Effects
Version: 0.2
Version: 0.3
Authors@R: c(
person(
given = "Mauricio",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# capybara 0.3

* Reduces memory footprint ~45% by moving some computation to Armadillo's side

# capybara 0.2

* Includes pseudo R2 (same as Stata) for Poisson models
Expand Down
10 changes: 5 additions & 5 deletions R/apes.R
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ apes <- function(
if (control[["keep.mx"]]) {
MX <- object[["MX"]]
} else {
MX <- center_variables_(X, w, k.list, control[["center.tol"]], 100000)
MX <- center_variables_(X, NA_real_, w, k.list, control[["center.tol"]], 100000L, FALSE)
}

# Compute average partial effects, derivatives, and Jacobian
Expand Down Expand Up @@ -243,7 +243,7 @@ apes <- function(

# Compute projection and residual projection of \Psi
Psi <- -Delta1 / w
MPsi <- center_variables_(Psi, w, k.list, control[["center.tol"]], 100000L)
MPsi <- center_variables_(Psi, NA_real_, w, k.list, control[["center.tol"]], 100000L, FALSE)
PPsi <- Psi - MPsi
rm(Delta1, Psi)

Expand Down Expand Up @@ -303,18 +303,18 @@ apes <- function(
# Compute covariance matrix
WinvJ <- solve(object[["Hessian"]] / nt.full, J)
Gamma <- (MX %*% WinvJ - PPsi) * v / nt.full
V <- crossprod(Gamma)
V <- crossprod_(Gamma, NA_real_, FALSE, FALSE)
if (adj > 0.0) {
# Simplify covariance if sampling assumptions are imposed
if (sampling.fe == "independence") {
V <- V + adj * group_sums_var_(Delta, k.list[[1L]])
if (k > 1L) {
V <- V + adj * (group_sums_var_(Delta, k.list[[2L]]) - crossprod(Delta))
V <- V + adj * (group_sums_var_(Delta, k.list[[2L]]) - crossprod_(Delta, NA_real_, FALSE, FALSE))
}
if (panel.structure == "network") {
if (k > 2L) {
V <- V + adj * (group_sums_var_(Delta, k.list[[3L]]) -
crossprod(Delta))
crossprod_(Delta, NA_real_, FALSE, FALSE))
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions R/bias_corr.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ bias_corr <- function(
if (control[["keep.mx"]]) {
MX <- object[["MX"]]
} else {
MX <- center_variables_(X, w, k.list, control[["center.tol"]], 100000L)
MX <- center_variables_(X, NA_real_, w, k.list, control[["center.tol"]], 100000L, FALSE)
}

# Compute bias terms for requested bias correction
Expand Down Expand Up @@ -200,11 +200,11 @@ bias_corr <- function(
}

# Update centered regressor matrix
MX <- center_variables_(X, w, k.list, control[["center.tol"]], 100000L)
MX <- center_variables_(X, NA_real_, w, k.list, control[["center.tol"]], 100000L, FALSE)
colnames(MX) <- nms.sp

# Update Hessian
H <- crossprod(MX * sqrt(w))
H <- crossprod_(MX, w, TRUE, TRUE)
dimnames(H) <- list(nms.sp, nms.sp)

# Update result list
Expand Down
28 changes: 26 additions & 2 deletions R/cpp11.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,33 @@
# Generated by cpp11: do not edit by hand

center_variables_ <- function(V_r, w_r, klist, tol, maxiter) {
.Call(`_capybara_center_variables_`, V_r, w_r, klist, tol, maxiter)
center_variables_ <- function(V_r, v_sum_r, w_r, klist, tol, maxiter, sum_v) {
.Call(`_capybara_center_variables_`, V_r, v_sum_r, w_r, klist, tol, maxiter, sum_v)
}

solve_beta_ <- function(mx, mnu, wtilde, epsilon, weighted) {
.Call(`_capybara_solve_beta_`, mx, mnu, wtilde, epsilon, weighted)
}

get_alpha_ <- function(p_r, klist, tol) {
.Call(`_capybara_get_alpha_`, p_r, klist, tol)
}

solve_eta_ <- function(mx, mnu, nu, beta) {
.Call(`_capybara_solve_eta_`, mx, mnu, nu, beta)
}

solve_eta2_ <- function(yadj, myadj, offset, eta) {
.Call(`_capybara_solve_eta2_`, yadj, myadj, offset, eta)
}

crossprod_ <- function(x, w, weighted, root_weights) {
.Call(`_capybara_crossprod_`, x, w, weighted, root_weights)
}

chol_crossprod_ <- function(x) {
.Call(`_capybara_chol_crossprod_`, x)
}

group_sums_ <- function(M_r, w_r, jlist) {
.Call(`_capybara_group_sums_`, M_r, w_r, jlist)
}
Expand All @@ -27,3 +47,7 @@ group_sums_cov_ <- function(M_r, N_r, jlist) {
pairwise_cor_ <- function(y, yhat) {
.Call(`_capybara_pairwise_cor_`, y, yhat)
}

qr_rank_ <- function(x) {
.Call(`_capybara_qr_rank_`, x)
}
12 changes: 0 additions & 12 deletions R/cpp11_default_parameters.R

This file was deleted.

9 changes: 5 additions & 4 deletions R/generics_vcov.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ vcov.feglm <- function(
G <- getScoreMatrix(object)
if (type == "outer.product") {
# Check if the OPG is invertible and compute its inverse
R <- try(chol(crossprod(G)), silent = TRUE)
R <- try(chol_crossprod_(G), silent = TRUE)
if (inherits(R, "try-error")) {
V <- matrix(Inf, p, p)
} else {
Expand All @@ -83,7 +83,7 @@ vcov.feglm <- function(

# Compute inner part of the sandwich formula
if (type == "sandwich") {
B <- crossprod(G)
B <- crossprod_(G, NA_real_, FALSE, FALSE)
} else {
if (isFALSE(k >= 1L)) {
stop(
Expand Down Expand Up @@ -128,10 +128,11 @@ vcov.feglm <- function(
B.r <- matrix(0.0, p, p)
for (j in seq.int(ncol(cl.combn))) {
cl <- cl.combn[, j]
B.r <- B.r + crossprod(
B.r <- B.r + crossprod_(
as.matrix(
G[, lapply(.SD, sum), by = mget(cl), .SDcols = sp.vars][, (cl) := NULL]
)
),
NA_real_, FALSE, FALSE
)
}

Expand Down
2 changes: 1 addition & 1 deletion R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ model_response_ <- function(data, formula) {
}

check_linear_dependence_ <- function(X, p) {
if (qr(X)[["rank"]] < p) {
if (qr_rank_(X) < p) {
stop("Linear dependent terms detected.", call. = FALSE)
}

Expand Down
28 changes: 15 additions & 13 deletions R/internals.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ felm_fit_ <- function(y, X, wt, k.list, control) {
MX <- X

# Centering variables
MX <- center_variables_(MX, wt, k.list, center.tol, 10000L)
MX <- center_variables_(MX, NA_real_, wt, k.list, center.tol, 10000L, FALSE)

# Compute the OLS estimate
beta <- as.vector(qr.solve(MX, y, epsilon))
beta <- solve_beta_(MX, y, NA_real_, epsilon, FALSE)

# Generate result list
reslist <- list(
Expand Down Expand Up @@ -74,18 +74,20 @@ feglm_fit_ <- function(beta, eta, y, X, wt, k.list, family, control) {
nu <- (y - mu) / mu.eta

# Centering variables
Mnu <- center_variables_((Mnu + nu), w, k.list, center.tol, 10000L)
MX <- center_variables_(MX, w, k.list, center.tol, 10000L)
Mnu <- center_variables_(Mnu, nu, w, k.list, center.tol, 10000L, TRUE)
MX <- center_variables_(MX, NA_real_, w, k.list, center.tol, 10000L, FALSE)

# Compute update step and update \eta
beta.upd <- as.vector(qr.solve(MX * w.tilde, Mnu * w.tilde, epsilon))
eta.upd <- nu - as.vector(Mnu - MX %*% beta.upd)
# Compute update step and update eta
beta.upd <- solve_beta_(MX, Mnu, w.tilde, epsilon, TRUE)

eta.upd <- solve_eta_(MX, Mnu, nu, beta.upd)

# Step-halving with three checks
# 1. finite deviance
# 2. valid \eta and \mu
# 3. improvement as in glm2
rho <- 1.0

for (inner.iter in seq.int(50L)) {
eta <- eta.old + rho * eta.upd
beta <- beta.old + rho * beta.upd
Expand Down Expand Up @@ -141,9 +143,9 @@ feglm_fit_ <- function(beta, eta, y, X, wt, k.list, family, control) {
w <- (wt * mu.eta^2) / family[["variance"]](mu)

# Center variables
MX <- center_variables_(X, w, k.list, center.tol, 10000L)
MX <- center_variables_(X, NA_real_, w, k.list, center.tol, 10000L, FALSE)
# Recompute Hessian
H <- crossprod(MX * sqrt(w))
H <- crossprod_(MX, w, TRUE, TRUE)

# Generate result list
reslist <- list(
Expand Down Expand Up @@ -219,8 +221,9 @@ feglm_offset_ <- function(object, offset) {
yadj <- (y - mu) / mu.eta + eta - offset

# Centering dependent variable and compute \eta update
Myadj <- center_variables_((Myadj + yadj), w, k.list, center.tol, 10000L)
eta.upd <- yadj - as.vector(Myadj) + offset - eta
Myadj <- center_variables_(Myadj, yadj, w, k.list, center.tol, 10000L, TRUE)
# eta.upd <- yadj - as.vector(Myadj) + offset - eta
eta.upd <- solve_eta2_(yadj, Myadj, offset, eta)

# Step-halving with three checks
# 1. finite deviance
Expand Down Expand Up @@ -297,7 +300,7 @@ getScoreMatrix <- function(object) {
attr(X, "dimnames") <- NULL

# Center variables
MX <- center_variables_(X, w, k.list, control[["center.tol"]], 10000L)
MX <- center_variables_(X, NA_real_, w, k.list, control[["center.tol"]], 10000L, FALSE)
colnames(MX) <- nms.sp
}

Expand Down Expand Up @@ -340,7 +343,6 @@ partial_mu_eta_ <- function(eta, family, order) {
}
}


# Returns suitable name for a temporary variable
temp_var_ <- function(data) {
repeat {
Expand Down
10 changes: 5 additions & 5 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ knitr::opts_chunk$set(
<!-- badges: start -->
[![R-CMD-check](https://github.com/pachadotdev/capybara/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/pachadotdev/capybara/actions/workflows/R-CMD-check.yaml)
[![codecov](https://codecov.io/gh/pachadotdev/capybara/graph/badge.svg?token=kDP0pWmfRk)](https://codecov.io/gh/pachadotdev/capybara)
[![BuyMeACoffee](https://raw.githubusercontent.com/pachadotdev/buymeacoffee-badges/main/bmc-donate-white.svg)](https://www.buymeacoffee.com/pacha)
[![BuyMeACoffee](https://raw.githubusercontent.com/pachadotdev/buymeacoffee-badges/main/bmc-donate-yellow.svg)](https://www.buymeacoffee.com/pacha)
<!-- badges: end -->

## About
Expand Down Expand Up @@ -123,17 +123,17 @@ Median time for the different models in the book
|package | PPML| Trade Diversion| Endogeneity| Reverse Causality| Non-linear/Phasing Effects| Globalization|
|:------------|-------:|---------------:|-----------:|-----------------:|--------------------------:|-------------:|
|Alpaca | 213.4ms| 2.3s| 1.35s| 1.86s| 2.59s| 4.96s|
|Base R | 1.5m| 1.53m| 23.43m| 23.52m| 23.16m| 24.85m|
|**Capybara** | 371ms| 3s| 1.34s| 1.71s| 2.46s| 4.64s|
|Base R | 1.5m | 1.53m| 23.43m| 23.52m| 23.16m| 24.85m|
|**Capybara** | 263ms | 2.98s| 1.43s| 1.78s| 2.58s| 4.51s|
|Fixest | 67.4ms| 477.08ms| 95.88ms| 136.21ms| 206.12ms| 415.31ms|

Memory allocation for the same models

|package | PPML| Trade Diversion| Endogeneity| Reverse Causality| Non-linear/Phasing Effects| Globalization|
|:------------|--------:|---------------:|-----------:|-----------------:|--------------------------:|-------------:|
|Alpaca | 304.8MB| 339.8MB| 306.3MB| 335.61MB| 393.86MB| 539.49MB|
|Alpaca | 304.8MB | 339.8MB| 306.3MB| 335.61MB| 393.86MB| 539.49MB|
|Base R | 2.73GB| 2.6GB| 11.9GB| 11.94GB| 11.95GB| 11.97GB|
|**Capybara** | 307MB| 341MB| 306MB| 336MB| 395MB| 541MB|
|**Capybara** | 207MB | 231MB| 237MB| 244MB| 258MB| 293MB|
|Fixest | 44.59MB| 36.59MB| 28.1MB| 32.43MB| 41.12MB| 62.87MB|

# Debugging
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

[![R-CMD-check](https://github.com/pachadotdev/capybara/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/pachadotdev/capybara/actions/workflows/R-CMD-check.yaml)
[![codecov](https://codecov.io/gh/pachadotdev/capybara/graph/badge.svg?token=kDP0pWmfRk)](https://codecov.io/gh/pachadotdev/capybara)
[![BuyMeACoffee](https://raw.githubusercontent.com/pachadotdev/buymeacoffee-badges/main/bmc-donate-white.svg)](https://www.buymeacoffee.com/pacha)
[![BuyMeACoffee](https://raw.githubusercontent.com/pachadotdev/buymeacoffee-badges/main/bmc-donate-yellow.svg)](https://www.buymeacoffee.com/pacha)
<!-- badges: end -->

## About
Expand Down
24 changes: 12 additions & 12 deletions dev/benchmarks_tests_agtpa.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,41 +175,41 @@ bench_phasing <- readRDS("dev/bench_phasing.rds")
bench_globalization <- readRDS("dev/bench_globalization.rds")

bench_ppml %>%
mutate(package = c("Base R", "Capybara", "Fixest", "Alpaca")) %>%
mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>%
mutate(model = "PPML") %>%
select(model, package, median) %>%
pivot_wider(names_from = model, values_from = median) %>%
left_join(
bench_trade_diversion %>%
mutate(package = c("Base R", "Capybara", "Fixest", "Alpaca")) %>%
mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>%
mutate(model = "Trade Diversion") %>%
select(model, package, median) %>%
pivot_wider(names_from = model, values_from = median)
) %>%
left_join(
bench_endogeneity %>%
mutate(package = c("Base R", "Capybara", "Fixest", "Alpaca")) %>%
mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>%
mutate(model = "Endogeneity") %>%
select(model, package, median) %>%
pivot_wider(names_from = model, values_from = median)
) %>%
left_join(
bench_reverse_causality %>%
mutate(package = c("Base R", "Capybara", "Fixest", "Alpaca")) %>%
mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>%
mutate(model = "Reverse Causality") %>%
select(model, package, median) %>%
pivot_wider(names_from = model, values_from = median)
) %>%
left_join(
bench_phasing %>%
mutate(package = c("Base R", "Capybara", "Fixest", "Alpaca")) %>%
mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>%
mutate(model = "Non-linear/Phasing Effects") %>%
select(model, package, median) %>%
pivot_wider(names_from = model, values_from = median)
) %>%
left_join(
bench_globalization %>%
mutate(package = c("Base R", "Capybara", "Fixest", "Alpaca")) %>%
mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>%
mutate(model = "Globalization") %>%
select(model, package, median) %>%
pivot_wider(names_from = model, values_from = median)
Expand All @@ -218,41 +218,41 @@ bench_ppml %>%
kable()

bench_ppml %>%
mutate(package = c("Base R", "Capybara", "Fixest", "Alpaca")) %>%
mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>%
mutate(model = "PPML") %>%
select(model, package, mem_alloc) %>%
pivot_wider(names_from = model, values_from = mem_alloc) %>%
left_join(
bench_trade_diversion %>%
mutate(package = c("Base R", "Capybara", "Fixest", "Alpaca")) %>%
mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>%
mutate(model = "Trade Diversion") %>%
select(model, package, mem_alloc) %>%
pivot_wider(names_from = model, values_from = mem_alloc)
) %>%
left_join(
bench_endogeneity %>%
mutate(package = c("Base R", "Capybara", "Fixest", "Alpaca")) %>%
mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>%
mutate(model = "Endogeneity") %>%
select(model, package, mem_alloc) %>%
pivot_wider(names_from = model, values_from = mem_alloc)
) %>%
left_join(
bench_reverse_causality %>%
mutate(package = c("Base R", "Capybara", "Fixest", "Alpaca")) %>%
mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>%
mutate(model = "Reverse Causality") %>%
select(model, package, mem_alloc) %>%
pivot_wider(names_from = model, values_from = mem_alloc)
) %>%
left_join(
bench_phasing %>%
mutate(package = c("Base R", "Capybara", "Fixest", "Alpaca")) %>%
mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>%
mutate(model = "Non-linear/Phasing Effects") %>%
select(model, package, mem_alloc) %>%
pivot_wider(names_from = model, values_from = mem_alloc)
) %>%
left_join(
bench_globalization %>%
mutate(package = c("Base R", "Capybara", "Fixest", "Alpaca")) %>%
mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>%
mutate(model = "Globalization") %>%
select(model, package, mem_alloc) %>%
pivot_wider(names_from = model, values_from = mem_alloc)
Expand Down
Loading

0 comments on commit d998c04

Please sign in to comment.