Skip to content

Commit

Permalink
v0.4 using C API
Browse files Browse the repository at this point in the history
  • Loading branch information
pachadotdev committed Mar 4, 2024
1 parent a77f5ce commit 8c3c46b
Show file tree
Hide file tree
Showing 24 changed files with 417 additions and 293 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: capybara
Type: Package
Title: Fast and Memory Efficient Fitting of Linear Models With High-Dimensional
Fixed Effects
Version: 0.3.5
Version: 0.4
Authors@R: c(
person(
given = "Mauricio",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# capybara 0.4

* Uses R's C API efficiently to add a bit more of memory optimizations

# capybara 0.3.5

* Uses Mat<T> consistently for all matrix operations (avoids vectors)
Expand Down
36 changes: 24 additions & 12 deletions R/cpp11.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,10 @@ center_variables_ <- function(V_r, v_sum_r, w_r, klist, tol, maxiter, sum_v) {
.Call(`_capybara_center_variables_`, V_r, v_sum_r, w_r, klist, tol, maxiter, sum_v)
}

solve_beta_ <- function(mx, mnu, wtilde, epsilon, weighted) {
.Call(`_capybara_solve_beta_`, mx, mnu, wtilde, epsilon, weighted)
}

get_alpha_ <- function(p_r, klist, tol) {
.Call(`_capybara_get_alpha_`, p_r, klist, tol)
}

solve_eta_ <- function(mx, mnu, nu, beta) {
.Call(`_capybara_solve_eta_`, mx, mnu, nu, beta)
}

solve_eta2_ <- function(yadj, myadj, offset, eta) {
.Call(`_capybara_solve_eta2_`, yadj, myadj, offset, eta)
}

group_sums_ <- function(M_r, w_r, jlist) {
.Call(`_capybara_group_sums_`, M_r, w_r, jlist)
}
Expand Down Expand Up @@ -72,6 +60,30 @@ sandwich_ <- function(a, b) {
.Call(`_capybara_sandwich_`, a, b)
}

update_beta_eta_ <- function(old, upd, param) {
.Call(`_capybara_update_beta_eta_`, old, upd, param)
}

update_nu_ <- function(y, mu, mu_eta) {
.Call(`_capybara_update_nu_`, y, mu, mu_eta)
}

solve_beta_ <- function(mx, mnu, wtilde, epsilon, weighted) {
.Call(`_capybara_solve_beta_`, mx, mnu, wtilde, epsilon, weighted)
}

solve_eta_ <- function(mx, mnu, nu, beta) {
.Call(`_capybara_solve_eta_`, mx, mnu, nu, beta)
}

solve_eta2_ <- function(yadj, myadj, offset, eta) {
.Call(`_capybara_solve_eta2_`, yadj, myadj, offset, eta)
}

sqrt_ <- function(w) {
.Call(`_capybara_sqrt_`, w)
}

pairwise_cor_ <- function(y, yhat) {
.Call(`_capybara_pairwise_cor_`, y, yhat)
}
18 changes: 12 additions & 6 deletions R/internals.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ felm_fit_ <- function(y, X, wt, k.list, control) {
MX <- center_variables_(MX, NA_real_, wt, k.list, center.tol, 10000L, FALSE)

# Compute the OLS estimate
# beta <- as.vector(qr.solve(MX, y, epsilon))
beta <- solve_beta_(MX, y, NA_real_, epsilon, FALSE)

# Generate result list
Expand Down Expand Up @@ -70,16 +71,17 @@ feglm_fit_ <- function(beta, eta, y, X, wt, k.list, family, control) {
# Compute weights and dependent variable
mu.eta <- family[["mu.eta"]](eta)
w <- (wt * mu.eta^2) / family[["variance"]](mu)
w.tilde <- sqrt(w)
w.tilde <- sqrt_(w)
nu <- (y - mu) / mu.eta

# Centering variables
Mnu <- center_variables_(Mnu, nu, w, k.list, center.tol, 10000L, TRUE)
MX <- center_variables_(MX, NA_real_, w, k.list, center.tol, 10000L, FALSE)

# Compute update step and update eta
# beta.upd <- as.vector(qr.solve(MX * w.tilde, Mnu * w.tilde, epsilon))
# eta.upd <- nu - as.vector(Mnu - MX %*% beta.upd)
beta.upd <- solve_beta_(MX, Mnu, w.tilde, epsilon, TRUE)

eta.upd <- solve_eta_(MX, Mnu, nu, beta.upd)

# Step-halving with three checks
Expand All @@ -89,8 +91,10 @@ feglm_fit_ <- function(beta, eta, y, X, wt, k.list, family, control) {
rho <- 1.0

for (inner.iter in seq.int(50L)) {
eta <- eta.old + rho * eta.upd
beta <- beta.old + rho * beta.upd
# eta <- eta.old + rho * eta.upd
# beta <- beta.old + rho * beta.upd
eta <- update_beta_eta_(eta.old, eta.upd, rho)
beta <- update_beta_eta_(beta.old, beta.upd, rho)
mu <- family[["linkinv"]](eta)
dev <- sum(family[["dev.resids"]](y, mu, wt))
dev.crit <- is.finite(dev)
Expand Down Expand Up @@ -231,7 +235,8 @@ feglm_offset_ <- function(object, offset) {
# 3. improvement as in glm2
rho <- 1.0
for (inner.iter in seq.int(50L)) {
eta <- eta.old + rho * eta.upd
# eta <- eta.old + rho * eta.upd
eta <- update_beta_eta_(eta.old, eta.upd, rho)
mu <- family[["linkinv"]](eta)
dev <- sum(family[["dev.resids"]](y, mu, wt))
dev.crit <- is.finite(dev)
Expand Down Expand Up @@ -281,7 +286,8 @@ getScoreMatrix <- function(object) {
mu <- family[["linkinv"]](eta)
mu.eta <- family[["mu.eta"]](eta)
w <- (wt * mu.eta^2) / family[["variance"]](mu)
nu <- (y - mu) / mu.eta
# nu <- (y - mu) / mu.eta
nu <- update_nu_(y, mu, mu.eta)

# Center regressor matrix (if required)
if (control[["keep.mx"]]) {
Expand Down
4 changes: 2 additions & 2 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ Median time for the different models in the book
|:------------|-------:|---------------:|-----------:|-----------------:|--------------------------:|-------------:|
|Alpaca | 213.4ms| 2.3s| 1.35s| 1.86s| 2.59s| 4.96s|
|Base R | 1.5m | 1.53m| 23.43m| 23.52m| 23.16m| 24.85m|
|**Capybara** | 263ms | 2.98s| 1.43s| 1.78s| 2.58s| 4.51s|
|**Capybara** | 323ms | 3.24s| 1.47s| 1.84s| 2.44s| 4.56s|
|Fixest | 67.4ms| 477.08ms| 95.88ms| 136.21ms| 206.12ms| 415.31ms|

Memory allocation for the same models
Expand All @@ -133,7 +133,7 @@ Memory allocation for the same models
|:------------|--------:|---------------:|-----------:|-----------------:|--------------------------:|-------------:|
|Alpaca | 304.8MB | 339.8MB| 306.3MB| 335.61MB| 393.86MB| 539.49MB|
|Base R | 2.73GB| 2.6GB| 11.9GB| 11.94GB| 11.95GB| 11.97GB|
|**Capybara** | 207MB | 231MB| 237MB| 244MB| 258MB| 293MB|
|**Capybara** | 204MB | 231MB| 237MB| 244MB| 258MB| 293MB|
|Fixest | 44.59MB| 36.59MB| 28.1MB| 32.43MB| 41.12MB| 62.87MB|

# Debugging
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ Analysis](https://www.wto.org/english/res_e/publications_e/advancedguide2016_e.h
| :----------- | ------: | --------------: | ----------: | ----------------: | -------------------------: | ------------: |
| Alpaca | 213.4ms | 2.3s | 1.35s | 1.86s | 2.59s | 4.96s |
| Base R | 1.5m | 1.53m | 23.43m | 23.52m | 23.16m | 24.85m |
| **Capybara** | 263ms | 2.98s | 1.43s | 1.78s | 2.58s | 4.51s |
| **Capybara** | 323ms | 3.24s | 1.47s | 1.84s | 2.44s | 4.56s |
| Fixest | 67.4ms | 477.08ms | 95.88ms | 136.21ms | 206.12ms | 415.31ms |

Memory allocation for the same models
Expand All @@ -127,7 +127,7 @@ Memory allocation for the same models
| :----------- | ------: | --------------: | ----------: | ----------------: | -------------------------: | ------------: |
| Alpaca | 304.8MB | 339.8MB | 306.3MB | 335.61MB | 393.86MB | 539.49MB |
| Base R | 2.73GB | 2.6GB | 11.9GB | 11.94GB | 11.95GB | 11.97GB |
| **Capybara** | 207MB | 231MB | 237MB | 244MB | 258MB | 293MB |
| **Capybara** | 204MB | 231MB | 237MB | 244MB | 258MB | 293MB |
| Fixest | 44.59MB | 36.59MB | 28.1MB | 32.43MB | 41.12MB | 62.87MB |

# Debugging
Expand Down
7 changes: 5 additions & 2 deletions dev/benchmarks_tests_agtpa_capybara_only.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ bench_phasing <- readRDS("dev/bench_phasing.rds")

bench_globalization <- readRDS("dev/bench_globalization.rds")

bench_ppml %>%
t1 <- bench_ppml %>%
mutate(package = "**Capybara**") %>%
mutate(model = "PPML") %>%
select(model, package, median) %>%
Expand Down Expand Up @@ -200,7 +200,7 @@ bench_ppml %>%
arrange(package) %>%
kable()

bench_ppml %>%
t2 <- bench_ppml %>%
mutate(package = "**Capybara**") %>%
mutate(model = "PPML") %>%
select(model, package, mem_alloc) %>%
Expand Down Expand Up @@ -242,3 +242,6 @@ bench_ppml %>%
) %>%
arrange(package) %>%
kable()

t1
t2
32 changes: 32 additions & 0 deletions dev/check_bottlenecks.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
load_all()

# d <- trade_panel
# d$trade_100 <- ifelse(d$trade > 100, 1L, 0L)

# simulate a data frame with 1,000,000 rows with:
# trade_100: 0/1
# lang: 0/1
# clny: 0/1
# rta: 0/1
# year: 2000-2010
set.seed(200100)
d <- data.frame(
trade_100 = sample(0:1, 1e6, replace = TRUE),
lang = sample(0:1, 1e6, replace = TRUE),
clny = sample(0:1, 1e6, replace = TRUE),
rta = sample(0:1, 1e6, replace = TRUE),
year = sample(2000:2010, 1e6, replace = TRUE)
)

unique(d$trade_100)
unique(d$lang)
unique(d$clny)
unique(d$rta)
unique(d$year)

# Fit 'feglm()'
load_all()
profvis::profvis(feglm(trade_100 ~ lang + clny + rta | year, d, family = binomial()))

# Compute average partial effects
# bench::mark(apes(mod))
14 changes: 7 additions & 7 deletions docs/index.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ pkgdown: 2.0.7
pkgdown_sha: ~
articles:
intro: intro.html
last_built: 2024-03-03T21:12Z
last_built: 2024-03-04T01:22Z

2 changes: 1 addition & 1 deletion docs/reference/apes.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/reference/bias_corr.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/reference/feglm.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/reference/felm.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/reference/fenegbin.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/reference/fepoisson.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

File renamed without changes.
38 changes: 0 additions & 38 deletions src/02_solve_beta.cpp

This file was deleted.

File renamed without changes.
34 changes: 0 additions & 34 deletions src/03_solve_eta.cpp

This file was deleted.

Loading

0 comments on commit 8c3c46b

Please sign in to comment.