diff --git a/.gitignore b/.gitignore index 90e6405..d66d03b 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,6 @@ src/*.dll inst/doc dev/1-s2.0-S0014292116300630-mmc1 dev/armadillo-codes +dev/*.rds README.html pkgdown diff --git a/DESCRIPTION b/DESCRIPTION index f325983..afd1596 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: capybara Type: Package Title: Fast and Memory Efficient Fitting of Linear Models With High-Dimensional Fixed Effects -Version: 0.5.2 +Version: 0.6.0 Authors@R: c( person( given = "Mauricio", diff --git a/NAMESPACE b/NAMESPACE index bba62c5..6dbe056 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -73,8 +73,8 @@ importFrom(stats,rgamma) importFrom(stats,rlogis) importFrom(stats,rnorm) importFrom(stats,rpois) -importFrom(stats,sd) importFrom(stats,terms) +importFrom(stats,var) importFrom(stats,vcov) importFrom(utils,combn) useDynLib(capybara, .registration = TRUE) diff --git a/NEWS.md b/NEWS.md index 8d5b44f..1c31911 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# capybara 0.6.0 + +* Moves all the heavy computation to C++ using Armadillo and it exports the + results to R. Previously, there were multiple data copies between R and C++ + that added overhead to the computations. +* The previous versions returned MX by default, now it has to be specified. + # capybara 0.5.2 * Uses an O(n log(n)) algorithm to compute the Kendall correlation for the diff --git a/R/apes.R b/R/apes.R index e4c09f4..5e2b44a 100644 --- a/R/apes.R +++ b/R/apes.R @@ -13,27 +13,27 @@ #' #' @param object an object of class \code{"bias_corr"} or \code{"feglm"}; #' currently restricted to \code{\link[stats]{binomial}}. -#' @param n.pop unsigned integer indicating a finite population correction for +#' @param n_pop unsigned integer indicating a finite population correction for #' the estimation of the covariance matrix of the average partial effects #' proposed by Cruz-Gonzalez, Fernández-Val, and Weidner (2017). The correction #' factor is computed as follows: -#' \eqn{(n^{\ast} - n) / (n^{\ast} - 1)}{(n.pop - n) / (n.pop - 1)}, -#' where \eqn{n^{\ast}}{n.pop} and \eqn{n}{n} are the sizes of the entire +#' \eqn{(n^{\ast} - n) / (n^{\ast} - 1)}{(n_pop - n) / (n_pop - 1)}, +#' where \eqn{n^{\ast}}{n_pop} and \eqn{n}{n} are the sizes of the entire #' population and the full sample size. Default is \code{NULL}, which refers to #' a factor of zero and a covariance obtained by the delta method. -#' @param panel.structure a string equal to \code{"classic"} or \code{"network"} +#' @param panel_structure a string equal to \code{"classic"} or \code{"network"} #' which determines the structure of the panel used. \code{"classic"} denotes #' panel structures where for example the same cross-sectional units are #' observed several times (this includes pseudo panels). \code{"network"} #' denotes panel structures where for example bilateral trade flows are #' observed for several time periods. Default is \code{"classic"}. -#' @param sampling.fe a string equal to \code{"independence"} or +#' @param sampling_fe a string equal to \code{"independence"} or #' \code{"unrestricted"} which imposes sampling assumptions about the #' unobserved effects. \code{"independence"} imposes that all unobserved #' effects are independent sequences. \code{"unrestricted"} does not impose any #' sampling assumptions. Note that this option only affects the optional finite #' population correction. Default is \code{"independence"}. -#' @param weak.exo logical indicating if some of the regressors are assumed to +#' @param weak_exo logical indicating if some of the regressors are assumed to #' be weakly exogenous (e.g. predetermined). If object is of class #' \code{"bias_corr"}, the option will be automatically set to \code{TRUE} if #' the chosen bandwidth parameter is larger than zero. Note that this option @@ -83,10 +83,10 @@ #' @export apes <- function( object = NULL, - n.pop = NULL, - panel.structure = c("classic", "network"), - sampling.fe = c("independence", "unrestricted"), - weak.exo = FALSE) { + n_pop = NULL, + panel_structure = c("classic", "network"), + sampling_fe = c("independence", "unrestricted"), + weak_exo = FALSE) { # Check validity of 'object' if (is.null(object)) { stop("'object' has to be specified.", call. = FALSE) @@ -95,22 +95,22 @@ apes <- function( } # Extract prior information if available or check validity of - # 'panel.structure' + # 'panel_structure' bias_corr <- inherits(object, "bias_corr") if (bias_corr) { - panel.structure <- object[["panel.structure"]] + panel_structure <- object[["panel_structure"]] L <- object[["bandwidth"]] if (L > 0L) { - weak.exo <- TRUE + weak_exo <- TRUE } else { - weak.exo <- FALSE + weak_exo <- FALSE } } else { - panel.structure <- match.arg(panel.structure) + panel_structure <- match.arg(panel_structure) } - # Check validity of 'sampling.fe' - sampling.fe <- match.arg(sampling.fe) + # Check validity of 'sampling_fe' + sampling_fe <- match.arg(sampling_fe) # Extract model information beta <- object[["coefficients"]] @@ -119,11 +119,11 @@ apes <- function( eps <- .Machine[["double.eps"]] family <- object[["family"]] formula <- object[["formula"]] - lvls.k <- object[["lvls.k"]] + lvls_k <- object[["lvls_k"]] nt <- object[["nobs"]][["nobs"]] - nt.full <- object[["nobs"]][["nobs.full"]] - k <- length(lvls.k) - k.vars <- names(lvls.k) + nt.full <- object[["nobs"]][["nobs_full"]] + k <- length(lvls_k) + k_vars <- names(lvls_k) p <- length(beta) # Check if binary choice model @@ -134,11 +134,11 @@ apes <- function( } # Check if provided object matches requested panel structure - if (panel.structure == "classic") { + if (panel_structure == "classic") { if (!(k %in% c(1L, 2L))) { stop( paste( - "panel.structure == 'classic' expects a one- or two-way fixed", + "panel_structure == 'classic' expects a one- or two-way fixed", "effects model." ), call. = FALSE @@ -148,7 +148,7 @@ apes <- function( if (!(k %in% c(2L, 3L))) { stop( paste( - "panel.structure == 'network' expects a two- or three-way fixed", + "panel_structure == 'network' expects a two- or three-way fixed", "effects model." ), call. = FALSE @@ -156,11 +156,11 @@ apes <- function( } } - # Check validity of 'n.pop' + # Check validity of 'n_pop' # Note: Default option is no adjustment i.e. only delta method covariance - if (!is.null(n.pop)) { - n.pop <- as.integer(n.pop) - if (n.pop < nt.full) { + if (!is.null(n_pop)) { + n_pop <- as.integer(n_pop) + if (n_pop < nt.full) { warning( paste( "Size of the entire population is lower than the full sample size.", @@ -170,7 +170,7 @@ apes <- function( ) adj <- 0.0 } else { - adj <- (n.pop - nt.full) / (n.pop - 1L) + adj <- (n_pop - nt.full) / (n_pop - 1L) } } else { adj <- 0.0 @@ -187,7 +187,7 @@ apes <- function( binary <- apply(X, 2L, function(x) all(x %in% c(0.0, 1.0))) # Generate auxiliary list of indexes for different sub panels - k.list <- get_index_list_(k.vars, data) + k_list <- get_index_list_(k_vars, data) # Compute derivatives and weights eta <- object[["eta"]] @@ -205,10 +205,10 @@ apes <- function( } # Center regressor matrix (if required) - if (control[["keep.mx"]]) { + if (control[["keep_mx"]]) { MX <- object[["MX"]] } else { - MX <- center_variables_(X, NA_real_, w, k.list, control[["center.tol"]], 100000L, FALSE) + MX <- center_variables_r_(X, w, k_list, control[["center_tol"]], 10000L) } # Compute average partial effects, derivatives, and Jacobian @@ -243,7 +243,7 @@ apes <- function( # Compute projection and residual projection of \Psi Psi <- -Delta1 / w - MPsi <- center_variables_(Psi, NA_real_, w, k.list, control[["center.tol"]], 100000L, FALSE) + MPsi <- center_variables_r_(Psi, w, k_list, control[["center_tol"]], 10000L) PPsi <- Psi - MPsi rm(Delta1, Psi) @@ -264,28 +264,28 @@ apes <- function( } # Compute bias terms for requested bias correction - if (panel.structure == "classic") { + if (panel_structure == "classic") { # Compute \hat{B} and \hat{D} - b <- group_sums_(Delta2 + PPsi * z, w, k.list[[1L]]) / (2.0 * nt) + b <- group_sums_(Delta2 + PPsi * z, w, k_list[[1L]]) / (2.0 * nt) if (k > 1L) { - b <- (b + group_sums_(Delta2 + PPsi * z, w, k.list[[2L]])) / (2.0 * nt) + b <- (b + group_sums_(Delta2 + PPsi * z, w, k_list[[2L]])) / (2.0 * nt) } # Compute spectral density part of \hat{B} if (L > 0L) { - b <- (b - group_sums_spectral_(MPsi * w, v, w, L, k.list[[1L]])) / nt + b <- (b - group_sums_spectral_(MPsi * w, v, w, L, k_list[[1L]])) / nt } } else { # Compute \hat{D}_{1}, \hat{D}_{2}, and \hat{B} - b <- group_sums_(Delta2 + PPsi * z, w, k.list[[1L]]) / (2.0 * nt) - b <- (b + group_sums_(Delta2 + PPsi * z, w, k.list[[2L]])) / (2.0 * nt) + b <- group_sums_(Delta2 + PPsi * z, w, k_list[[1L]]) / (2.0 * nt) + b <- (b + group_sums_(Delta2 + PPsi * z, w, k_list[[2L]])) / (2.0 * nt) if (k > 2L) { - b <- (b + group_sums_(Delta2 + PPsi * z, w, k.list[[3L]])) / (2.0 * nt) + b <- (b + group_sums_(Delta2 + PPsi * z, w, k_list[[3L]])) / (2.0 * nt) } # Compute spectral density part of \hat{B} if (k > 2L && L > 0L) { - b <- (b - group_sums_spectral_(MPsi * w, v, w, L, k.list[[3L]])) / nt + b <- (b - group_sums_spectral_(MPsi * w, v, w, L, k_list[[3L]])) / nt } } rm(Delta2) @@ -296,33 +296,33 @@ apes <- function( rm(eta, w, z, MPsi) # Compute covariance matrix - Gamma <- gamma_(MX, object[["Hessian"]], J, PPsi, v, nt.full) - V <- crossprod_(Gamma, NA_real_, FALSE, FALSE) + Gamma <- gamma_(MX, object[["hessian"]], J, PPsi, v, nt.full) + V <- crossprod(Gamma) if (adj > 0.0) { # Simplify covariance if sampling assumptions are imposed - if (sampling.fe == "independence") { - V <- V + adj * group_sums_var_(Delta, k.list[[1L]]) + if (sampling_fe == "independence") { + V <- V + adj * group_sums_var_(Delta, k_list[[1L]]) if (k > 1L) { - V <- V + adj * (group_sums_var_(Delta, k.list[[2L]]) - crossprod_(Delta, NA_real_, FALSE, FALSE)) + V <- V + adj * (group_sums_var_(Delta, k_list[[2L]]) - crossprod(Delta)) } - if (panel.structure == "network") { + if (panel_structure == "network") { if (k > 2L) { - V <- V + adj * (group_sums_var_(Delta, k.list[[3L]]) - - crossprod_(Delta, NA_real_, FALSE, FALSE)) + V <- V + adj * (group_sums_var_(Delta, k_list[[3L]]) - + crossprod(Delta)) } } } # Add covariance in case of weak exogeneity - if (weak.exo) { - if (panel.structure == "classic") { - C <- group_sums_cov_(Delta, Gamma, k.list[[1L]]) + if (weak_exo) { + if (panel_structure == "classic") { + C <- group_sums_cov_(Delta, Gamma, k_list[[1L]]) V <- V + adj * (C + t(C)) rm(C) } else { if (k > 2L) { - C <- group_sums_cov_(Delta, Gamma, k.list[[3L]]) + C <- group_sums_cov_(Delta, Gamma, k_list[[3L]]) V <- V + adj * (C + t(C)) rm(C) } @@ -338,9 +338,9 @@ apes <- function( reslist <- list( delta = delta, vcov = V, - panel.structure = panel.structure, - sampling.fe = sampling.fe, - weak.exo = weak.exo + panel_structure = panel_structure, + sampling_fe = sampling_fe, + weak_exo = weak_exo ) # Update result list diff --git a/R/bias_corr.R b/R/bias_corr.R index a728833..1fa900a 100644 --- a/R/bias_corr.R +++ b/R/bias_corr.R @@ -16,7 +16,7 @@ #' weakly exogenous regressors, e.g. lagged outcome variables, we suggest to #' choose a bandwidth between one and four. Note that the order of factors to #' be partialed out is important for bandwidths larger than zero. -#' @param panel.structure a string equal to \code{"classic"} or \code{"network"} +#' @param panel_structure a string equal to \code{"classic"} or \code{"network"} #' which determines the structure of the panel used. \code{"classic"} denotes #' panel structures where for example the same cross-sectional units are #' observed several times (this includes pseudo panels). \code{"network"} @@ -59,7 +59,7 @@ bias_corr <- function( object = NULL, L = 0L, - panel.structure = c("classic", "network")) { + panel_structure = c("classic", "network")) { # Check validity of 'object' if (is.null(object)) { stop("'object' has to be specified.", call. = FALSE) @@ -67,21 +67,21 @@ bias_corr <- function( stop("'bias_corr' called on a non-'feglm' object.", call. = FALSE) } - # Check validity of 'panel.structure' - panel.structure <- match.arg(panel.structure) + # Check validity of 'panel_structure' + panel_structure <- match.arg(panel_structure) # Extract model information - beta.uncorr <- object[["coefficients"]] + beta_uncorr <- object[["coefficients"]] control <- object[["control"]] data <- object[["data"]] eps <- .Machine[["double.eps"]] family <- object[["family"]] formula <- object[["formula"]] - lvls.k <- object[["lvls.k"]] - nms.sp <- names(beta.uncorr) + lvls_k <- object[["lvls_k"]] + nms.sp <- names(beta_uncorr) nt <- object[["nobs"]][["nobs"]] - k.vars <- names(lvls.k) - k <- length(lvls.k) + k_vars <- names(lvls_k) + k <- length(lvls_k) # Check if binary choice model if (family[["family"]] != "binomial") { @@ -92,7 +92,7 @@ bias_corr <- function( } # Check if the number of FEs is > 3 - if (length(lvls.k) > 3) { + if (length(lvls_k) > 3) { stop( "bias_corr() only supports models with up to three-way fixed effects.", call. = FALSE @@ -100,11 +100,11 @@ bias_corr <- function( } # Check if provided object matches requested panel structure - if (panel.structure == "classic") { + if (panel_structure == "classic") { if (!(k %in% c(1L, 2L))) { stop( paste( - "panel.structure == 'classic' expects a one- or two-way fixed", + "panel_structure == 'classic' expects a one- or two-way fixed", "effect model." ), call. = FALSE @@ -114,7 +114,7 @@ bias_corr <- function( if (!(k %in% c(2L, 3L))) { stop( paste( - "panel.structure == 'network' expects a two- or three-way fixed", + "panel_structure == 'network' expects a two- or three-way fixed", "effects model." ), call. = FALSE @@ -129,17 +129,17 @@ bias_corr <- function( wt <- object[["weights"]] # Generate auxiliary list of indexes for different sub panels - k.list <- get_index_list_(k.vars, data) + k_list <- get_index_list_(k_vars, data) # Compute derivatives and weights eta <- object[["eta"]] mu <- family[["linkinv"]](eta) - mu.eta <- family[["mu.eta"]](eta) + mu_eta <- family[["mu.eta"]](eta) v <- wt * (y - mu) - w <- wt * mu.eta + w <- wt * mu_eta z <- wt * partial_mu_eta_(eta, family, 2L) if (family[["link"]] != "logit") { - h <- mu.eta / family[["variance"]](mu) + h <- mu_eta / family[["variance"]](mu) v <- h * v w <- h * w z <- h * z @@ -147,71 +147,71 @@ bias_corr <- function( } # Center regressor matrix (if required) - if (control[["keep.mx"]]) { + if (control[["keep_mx"]]) { MX <- object[["MX"]] } else { - MX <- center_variables_(X, NA_real_, w, k.list, control[["center.tol"]], 100000L, FALSE) + MX <- center_variables_r_(X, w, k_list, control[["center_tol"]], 10000L) } # Compute bias terms for requested bias correction - if (panel.structure == "classic") { + if (panel_structure == "classic") { # Compute \hat{B} and \hat{D} - b <- as.vector(group_sums_(MX * z, w, k.list[[1L]])) / 2.0 / nt + b <- as.vector(group_sums_(MX * z, w, k_list[[1L]])) / 2.0 / nt if (k > 1L) { - b <- b + as.vector(group_sums_(MX * z, w, k.list[[2L]])) / 2.0 / nt + b <- b + as.vector(group_sums_(MX * z, w, k_list[[2L]])) / 2.0 / nt } # Compute spectral density part of \hat{B} if (L > 0L) { - b <- (b + group_sums_spectral_(MX * w, v, w, L, k.list[[1L]])) / nt + b <- (b + group_sums_spectral_(MX * w, v, w, L, k_list[[1L]])) / nt } } else { # Compute \hat{D}_{1}, \hat{D}_{2}, and \hat{B} - b <- group_sums_(MX * z, w, k.list[[1L]]) / (2.0 * nt) - b <- (b + group_sums_(MX * z, w, k.list[[2L]])) / (2.0 * nt) + b <- group_sums_(MX * z, w, k_list[[1L]]) / (2.0 * nt) + b <- (b + group_sums_(MX * z, w, k_list[[2L]])) / (2.0 * nt) if (k > 2L) { - b <- (b + group_sums_(MX * z, w, k.list[[3L]])) / (2.0 * nt) + b <- (b + group_sums_(MX * z, w, k_list[[3L]])) / (2.0 * nt) } # Compute spectral density part of \hat{B} if (k > 2L && L > 0L) { - b <- (b + group_sums_spectral_(MX * w, v, w, L, k.list[[3L]])) / nt + b <- (b + group_sums_spectral_(MX * w, v, w, L, k_list[[3L]])) / nt } } # Compute bias-corrected structural parameters - beta <- solve_bias_(beta.uncorr, object[["Hessian"]], nt, -b) + beta <- beta_uncorr - solve(object[["hessian"]] / nt, b) names(beta) <- nms.sp # Update \eta and first- and second-order derivatives - eta <- feglm_offset_(object, solve_y_(X, beta)) + eta <- feglm_offset_(object, X %*% beta) mu <- family[["linkinv"]](eta) - mu.eta <- family[["mu.eta"]](eta) + mu_eta <- family[["mu.eta"]](eta) v <- wt * (y - mu) - w <- wt * mu.eta + w <- wt * mu_eta if (family[["link"]] != "logit") { - h <- mu.eta / family[["variance"]](mu) + h <- mu_eta / family[["variance"]](mu) v <- h * v w <- h * w rm(h) } # Update centered regressor matrix - MX <- center_variables_(X, NA_real_, w, k.list, control[["center.tol"]], 100000L, FALSE) + MX <- center_variables_r_(X, w, k_list, control[["center_tol"]], 10000L) colnames(MX) <- nms.sp - # Update Hessian - H <- crossprod_(MX, w, TRUE, TRUE) + # Update hessian + H <- crossprod(MX * sqrt(w)) dimnames(H) <- list(nms.sp, nms.sp) # Update result list object[["coefficients"]] <- beta object[["eta"]] <- eta - if (control[["keep.mx"]]) object[["MX"]] <- MX - object[["Hessian"]] <- H - object[["coefficients.uncorr"]] <- beta.uncorr - object[["bias.term"]] <- b - object[["panel.structure"]] <- panel.structure + if (control[["keep_mx"]]) object[["MX"]] <- MX + object[["hessian"]] <- H + object[["coefficients_uncorr"]] <- beta_uncorr + object[["bias_term"]] <- b + object[["panel_structure"]] <- panel_structure object[["bandwidth"]] <- L # Add additional class to result list diff --git a/R/capybara-package.R b/R/capybara-package.R index 3bd6236..1981179 100644 --- a/R/capybara-package.R +++ b/R/capybara-package.R @@ -18,7 +18,7 @@ #' @importFrom MASS negative.binomial theta.ml #' @importFrom rlang sym := #' @importFrom stats as.formula binomial model.matrix na.omit gaussian poisson -#' pnorm printCoefmat rgamma rlogis rnorm rpois terms vcov predict sd +#' pnorm printCoefmat rgamma rlogis rnorm rpois terms vcov predict var #' complete.cases #' @importFrom utils combn #' @useDynLib capybara, .registration = TRUE diff --git a/R/cpp11.R b/R/cpp11.R index 8bf0895..68b05aa 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -1,7 +1,7 @@ # Generated by cpp11: do not edit by hand -center_variables_ <- function(V_r, v_sum_r, w_r, klist, tol, maxiter, sum_v) { - .Call(`_capybara_center_variables_`, V_r, v_sum_r, w_r, klist, tol, maxiter, sum_v) +center_variables_r_ <- function(V_r, w_r, klist, tol, maxiter) { + .Call(`_capybara_center_variables_r_`, V_r, w_r, klist, tol, maxiter) } get_alpha_ <- function(p_r, klist, tol) { @@ -24,52 +24,12 @@ group_sums_cov_ <- function(M_r, N_r, jlist) { .Call(`_capybara_group_sums_cov_`, M_r, N_r, jlist) } -crossprod_ <- function(x, w, weighted, root_weights) { - .Call(`_capybara_crossprod_`, x, w, weighted, root_weights) +feglm_fit_ <- function(beta_r, eta_r, y_r, x_r, wt_r, theta, family, control, k_list) { + .Call(`_capybara_feglm_fit_`, beta_r, eta_r, y_r, x_r, wt_r, theta, family, control, k_list) } -gamma_ <- function(mx, hessian, j, ppsi, v, nt_full) { - .Call(`_capybara_gamma_`, mx, hessian, j, ppsi, v, nt_full) -} - -inv_ <- function(h) { - .Call(`_capybara_inv_`, h) -} - -rank_ <- function(x) { - .Call(`_capybara_rank_`, x) -} - -solve_bias_ <- function(beta_uncorr, hessian, nt, b) { - .Call(`_capybara_solve_bias_`, beta_uncorr, hessian, nt, b) -} - -solve_y_ <- function(a, x) { - .Call(`_capybara_solve_y_`, a, x) -} - -sandwich_ <- function(a, b) { - .Call(`_capybara_sandwich_`, a, b) -} - -update_beta_eta_ <- function(old, upd, param) { - .Call(`_capybara_update_beta_eta_`, old, upd, param) -} - -update_nu_ <- function(y, mu, mu_eta) { - .Call(`_capybara_update_nu_`, y, mu, mu_eta) -} - -solve_beta_ <- function(mx, mnu, wtilde, weighted) { - .Call(`_capybara_solve_beta_`, mx, mnu, wtilde, weighted) -} - -solve_eta_ <- function(mx, mnu, nu, beta) { - .Call(`_capybara_solve_eta_`, mx, mnu, nu, beta) -} - -solve_eta2_ <- function(yadj, myadj, offset, eta) { - .Call(`_capybara_solve_eta2_`, yadj, myadj, offset, eta) +feglm_offset_fit_ <- function(eta_r, y_r, offset_r, wt_r, family, control, k_list) { + .Call(`_capybara_feglm_offset_fit_`, eta_r, y_r, offset_r, wt_r, family, control, k_list) } kendall_cor_ <- function(m) { diff --git a/R/feglm.R b/R/feglm.R index 60fe7e6..37dce94 100644 --- a/R/feglm.R +++ b/R/feglm.R @@ -21,10 +21,10 @@ #' details of family functions. #' @param weights an optional string with the name of the 'prior weights' #' variable in \code{data}. -#' @param beta.start an optional vector of starting values for the structural +#' @param beta_start an optional vector of starting values for the structural #' parameters in the linear predictor. Default is #' \eqn{\boldsymbol{\beta} = \mathbf{0}}{\beta = 0}. -#' @param eta.start an optional vector of starting values for the linear +#' @param eta_start an optional vector of starting values for the linear #' predictor. #' @param control a named list of parameters for controlling the fitting #' process. See \code{\link{feglm_control}} for details. @@ -68,8 +68,8 @@ feglm <- function( data = NULL, family = gaussian(), weights = NULL, - beta.start = NULL, - eta.start = NULL, + beta_start = NULL, + eta_start = NULL, control = NULL) { # Check validity of formula ---- check_formula_(formula) @@ -88,32 +88,32 @@ feglm <- function( # Generate model.frame lhs <- NA # just to avoid global variable warning - nobs.na <- NA - nobs.full <- NA + nobs_na <- NA + nobs_full <- NA model_frame_(data, formula, weights) # Ensure that model response is in line with the chosen model ---- check_response_(data, lhs, family) # Get names of the fixed effects variables and sort ---- - k.vars <- attr(terms(formula, rhs = 2L), "term.labels") - k <- length(k.vars) + k_vars <- attr(terms(formula, rhs = 2L), "term.labels") + k <- length(k_vars) # Generate temporary variable ---- tmp.var <- temp_var_(data) # Drop observations that do not contribute to the log likelihood ---- - data <- drop_by_link_type_(data, lhs, family, tmp.var, k.vars, control) + data <- drop_by_link_type_(data, lhs, family, tmp.var, k_vars, control) # Transform fixed effects and clusters to factors ---- - data <- transform_fe_(data, formula, k.vars) + data <- transform_fe_(data, formula, k_vars) # Determine the number of dropped observations ---- nt <- nrow(data) - nobs <- nobs_(nobs.full, nobs.na, nt) + nobs <- nobs_(nobs_full, nobs_na, nt) # Extract model response and regressor matrix ---- - nms.sp <- NA + nms_sp <- NA p <- NA model_response_(data, formula) @@ -131,44 +131,41 @@ feglm <- function( check_weights_(wt) # Compute and check starting guesses ---- - start_guesses_(beta.start, eta.start, y, X, beta, nt, wt, p, family) + start_guesses_(beta_start, eta_start, y, X, beta, nt, wt, p, family) # Get names and number of levels in each fixed effects category ---- - nms.fe <- lapply(select(data, all_of(k.vars)), levels) - lvls.k <- vapply(nms.fe, length, integer(1)) + nms_fe <- lapply(select(data, all_of(k_vars)), levels) + lvls_k <- vapply(nms_fe, length, integer(1)) # Generate auxiliary list of indexes for different sub panels ---- - k.list <- get_index_list_(k.vars, data) + k_list <- get_index_list_(k_vars, data) # Fit generalized linear model ---- + if (is.integer(y)) { y <- as.numeric(y) } fit <- feglm_fit_( - beta, eta, y, X, wt, k.list, family, control + beta, eta, y, X, wt, 0.0, family[["family"]], control, k_list ) y <- NULL X <- NULL eta <- NULL - # Add names to beta, Hessian, and MX (if provided) ---- - names(fit[["coefficients"]]) <- nms.sp - if (control[["keep.mx"]]) { - colnames(fit[["MX"]]) <- nms.sp + # Add names to beta, hessian, and MX (if provided) ---- + names(fit[["coefficients"]]) <- nms_sp + if (control[["keep_mx"]]) { + colnames(fit[["MX"]]) <- nms_sp } - dimnames(fit[["Hessian"]]) <- list(nms.sp, nms.sp) - - # Generate result list ---- - reslist <- c( - fit, list( - nobs = nobs, - lvls.k = lvls.k, - nms.fe = nms.fe, - formula = formula, - data = data, - family = family, - control = control - ) - ) + dimnames(fit[["hessian"]]) <- list(nms_sp, nms_sp) + + # Add to fit list ---- + fit[["nobs"]] <- nobs + fit[["lvls_k"]] <- lvls_k + fit[["nms_fe"]] <- nms_fe + fit[["formula"]] <- formula + fit[["data"]] <- data + fit[["family"]] <- family + fit[["control"]] <- control # Return result list ---- - structure(reslist, class = "feglm") + structure(fit, class = "feglm") } diff --git a/R/feglm_control.R b/R/feglm_control.R index 838baef..31288f9 100644 --- a/R/feglm_control.R +++ b/R/feglm_control.R @@ -3,28 +3,28 @@ #' @description Set and change parameters used for fitting \code{\link{feglm}}. #' Termination conditions are similar to \code{\link[stats]{glm}}. #' -#' @param dev.tol tolerance level for the first stopping condition of the +#' @param dev_tol tolerance level for the first stopping condition of the #' maximization routine. The stopping condition is based on the relative change #' of the deviance in iteration \eqn{r} and can be expressed as follows: #' \eqn{|dev_{r} - dev_{r - 1}| / (0.1 + |dev_{r}|) < tol}{|dev - devold| / #' (0.1 + |dev|) < tol}. The default is \code{1.0e-08}. -#' @param center.tol tolerance level for the stopping condition of the centering +#' @param center_tol tolerance level for the stopping condition of the centering #' algorithm. The stopping condition is based on the relative change of the #' centered variable similar to the \code{'lfe'} package. The default is #' \code{1.0e-08}. -#' @param iter.max unsigned integer indicating the maximum number of iterations +#' @param iter_max unsigned integer indicating the maximum number of iterations #' in the maximization routine. The default is \code{25L}. #' @param limit unsigned integer indicating the maximum number of iterations of #' \code{\link[MASS]{theta.ml}}. The default is \code{10L}. #' @param trace logical indicating if output should be produced in each #' iteration. Default is \code{FALSE}. -#' @param drop.pc logical indicating to drop observations that are perfectly +#' @param drop_pc logical indicating to drop observations that are perfectly #' classified/separated and hence do not contribute to the log-likelihood. This #' option is useful to reduce the computational costs of the maximization #' problem and improves the numerical stability of the algorithm. Note that #' dropping perfectly separated observations does not affect the estimates. #' The default is \code{TRUE}. -#' @param keep.mx logical indicating if the centered regressor matrix should be +#' @param keep_mx logical indicating if the centered regressor matrix should be #' stored. The centered regressor matrix is required for some covariance #' estimators, bias corrections, and average partial effects. This option saves #' some computation time at the cost of memory. The default is \code{TRUE}. @@ -33,24 +33,24 @@ #' #' @seealso \code{\link{feglm}} feglm_control <- function( - dev.tol = 1.0e-08, - center.tol = 1.0e-08, - iter.max = 25L, + dev_tol = 1.0e-08, + center_tol = 1.0e-08, + iter_max = 25L, limit = 10L, trace = FALSE, - drop.pc = TRUE, - keep.mx = TRUE) { + drop_pc = TRUE, + keep_mx = FALSE) { # Check validity of tolerance parameters - if (dev.tol <= 0.0 || center.tol <= 0.0) { + if (dev_tol <= 0.0 || center_tol <= 0.0) { stop( "All tolerance parameters should be greater than zero.", call. = FALSE ) } - # Check validity of 'iter.max' - iter.max <- as.integer(iter.max) - if (iter.max < 1L) { + # Check validity of 'iter_max' + iter_max <- as.integer(iter_max) + if (iter_max < 1L) { stop( "Maximum number of iterations should be at least one.", call. = FALSE @@ -65,12 +65,12 @@ feglm_control <- function( # Return list with control parameters list( - dev.tol = dev.tol, - center.tol = center.tol, - iter.max = iter.max, + dev_tol = dev_tol, + center_tol = center_tol, + iter_max = iter_max, limit = limit, trace = as.logical(trace), - drop.pc = as.logical(drop.pc), - keep.mx = as.logical(keep.mx) + drop_pc = as.logical(drop_pc), + keep_mx = as.logical(keep_mx) ) } diff --git a/R/feglm_offset.R b/R/feglm_offset.R new file mode 100644 index 0000000..b46ca70 --- /dev/null +++ b/R/feglm_offset.R @@ -0,0 +1,43 @@ +# Efficient offset algorithm to update the linear predictor ---- + +feglm_offset_ <- function(object, offset) { + # Check validity of 'object' + if (!inherits(object, "feglm")) { + stop("'feglm_offset_' called on a non-'feglm' object.") + } + + # Extract required quantities from result list + control <- object[["control"]] + data <- object[["data"]] + wt <- object[["weights"]] + family <- object[["family"]] + formula <- object[["formula"]] + lvls_k <- object[["lvls_k"]] + nt <- object[["nobs"]][["nobs"]] + k_vars <- names(lvls_k) + + # Extract dependent variable + y <- data[[1L]] + + # Extract control arguments + center_tol <- control[["center_tol"]] + dev_tol <- control[["dev_tol"]] + iter_max <- control[["iter_max"]] + + # Generate auxiliary list of indexes to project out the fixed effects + k_list <- get_index_list_(k_vars, data) + + # Compute starting guess for eta + if (family[["family"]] == "binomial") { + eta <- rep(family[["linkfun"]](sum(wt * (y + 0.5) / 2.0) / sum(wt)), nt) + } else if (family[["family"]] %in% c("Gamma", "inverse.gaussian")) { + eta <- rep(family[["linkfun"]](sum(wt * y) / sum(wt)), nt) + } else { + eta <- rep(family[["linkfun"]](sum(wt * (y + 0.1)) / sum(wt)), nt) + } + + # Return eta + if (is.integer(y)) { y <- as.numeric(y) } + feglm_offset_fit_(eta, y, offset, wt, family[["family"]], control, + k_list) +} diff --git a/R/felm.R b/R/felm.R index 8eecdba..c939d0b 100644 --- a/R/felm.R +++ b/R/felm.R @@ -35,7 +35,7 @@ felm <- function(formula = NULL, data = NULL, weights = NULL) { names(reslist)[which(names(reslist) == "eta")] <- "fitted.values" - # reslist[["Hessian"]] <- NULL + # reslist[["hessian"]] <- NULL reslist[["family"]] <- NULL reslist[["deviance"]] <- NULL diff --git a/R/fenegbin.R b/R/fenegbin.R index 3211a96..7f690eb 100644 --- a/R/fenegbin.R +++ b/R/fenegbin.R @@ -2,7 +2,7 @@ #' effects #' @description A routine that uses the same internals as \code{\link{feglm}}. #' @inheritParams feglm -#' @param init.theta an optional initial value for the theta parameter (see +#' @param init_theta an optional initial value for the theta parameter (see #' \code{\link[MASS]{glm.nb}}). #' @param link the link function. Must be one of \code{"log"}, \code{"sqrt"}, or #' \code{"identity"}. @@ -22,9 +22,9 @@ fenegbin <- function( formula = NULL, data = NULL, weights = NULL, - beta.start = NULL, - eta.start = NULL, - init.theta = NULL, + beta_start = NULL, + eta_start = NULL, + init_theta = NULL, link = c("log", "identity", "sqrt"), control = NULL) { # Check validity of formula ---- @@ -44,36 +44,36 @@ fenegbin <- function( # Generate model.frame lhs <- NA # just to avoid global variable warning - nobs.na <- NA - nobs.full <- NA + nobs_na <- NA + nobs_full <- NA model_frame_(data, formula, weights) # Check starting guess of theta ---- - family <- init_theta_(init.theta, link) - rm(init.theta) + family <- init_theta_(init_theta, link) + rm(init_theta) # Ensure that model response is in line with the chosen model ---- check_response_(data, lhs, family) # Get names of the fixed effects variables and sort ---- - k.vars <- attr(terms(formula, rhs = 2L), "term.labels") - k <- length(k.vars) + k_vars <- attr(terms(formula, rhs = 2L), "term.labels") + k <- length(k_vars) # Generate temporary variable ---- tmp.var <- temp_var_(data) # Drop observations that do not contribute to the log likelihood ---- - data <- drop_by_link_type_(data, lhs, family, tmp.var, k.vars, control) + data <- drop_by_link_type_(data, lhs, family, tmp.var, k_vars, control) # Transform fixed effects and clusters to factors ---- - data <- transform_fe_(data, formula, k.vars) + data <- transform_fe_(data, formula, k_vars) # Determine the number of dropped observations ---- nt <- nrow(data) - nobs <- nobs_(nobs.full, nobs.na, nt) + nobs <- nobs_(nobs_full, nobs_na, nt) # Extract model response and regressor matrix ---- - nms.sp <- NA + nms_sp <- NA p <- NA model_response_(data, formula) @@ -91,29 +91,23 @@ fenegbin <- function( check_weights_(wt) # Compute and check starting guesses ---- - start_guesses_(beta.start, eta.start, y, X, beta, nt, wt, p, family) + start_guesses_(beta_start, eta_start, y, X, beta, nt, wt, p, family) # Get names and number of levels in each fixed effects category ---- - nms.fe <- lapply(select(data, all_of(k.vars)), levels) - lvls.k <- vapply(nms.fe, length, integer(1)) + nms_fe <- lapply(select(data, all_of(k_vars)), levels) + lvls_k <- vapply(nms_fe, length, integer(1)) # Generate auxiliary list of indexes for different sub panels ---- - k.list <- get_index_list_(k.vars, data) + k_list <- get_index_list_(k_vars, data) # Extract control arguments ---- - tol <- control[["dev.tol"]] + tol <- control[["dev_tol"]] limit <- control[["limit"]] - iter.max <- control[["iter.max"]] + iter_max <- control[["iter_max"]] trace <- control[["trace"]] # Initial negative binomial fit ---- - fit <- feglm_fit_( - beta, eta, y, X, wt, k.list, family, control - ) - beta <- fit[["coefficients"]] - eta <- fit[["eta"]] - dev <- fit[["deviance"]] theta <- suppressWarnings( theta.ml( y = y, @@ -124,17 +118,21 @@ fenegbin <- function( ) ) + fit <- feglm_fit_( + beta, eta, y, X, wt, theta, family[["family"]], control, k_list + ) + + beta <- fit[["coefficients"]] + eta <- fit[["eta"]] + dev <- fit[["deviance"]] + # Alternate between fitting glm and \theta ---- conv <- FALSE - for (iter in seq.int(iter.max)) { + for (iter in seq.int(iter_max)) { # Fit negative binomial model dev.old <- dev theta.old <- theta family <- negative.binomial(theta, link) - fit <- feglm_fit_(beta, eta, y, X, wt, k.list, family, control) - beta <- fit[["coefficients"]] - eta <- fit[["eta"]] - dev <- fit[["deviance"]] theta <- suppressWarnings( theta.ml( y = y, @@ -144,6 +142,10 @@ fenegbin <- function( trace = trace ) ) + fit <- feglm_fit_(beta, eta, y, X, wt, theta, family[["family"]], control, k_list) + beta <- fit[["coefficients"]] + eta <- fit[["eta"]] + dev <- fit[["deviance"]] # Progress information if (trace) { @@ -171,12 +173,12 @@ fenegbin <- function( # Information if convergence failed ---- if (!conv && trace) cat("Algorithm did not converge.\n") - # Add names to beta, Hessian, and MX (if provided) ---- - names(fit[["coefficients"]]) <- nms.sp - if (control[["keep.mx"]]) { - colnames(fit[["MX"]]) <- nms.sp + # Add names to beta, hessian, and MX (if provided) ---- + names(fit[["coefficients"]]) <- nms_sp + if (control[["keep_mx"]]) { + colnames(fit[["MX"]]) <- nms_sp } - dimnames(fit[["Hessian"]]) <- list(nms.sp, nms.sp) + dimnames(fit[["hessian"]]) <- list(nms_sp, nms_sp) # Generate result list ---- reslist <- c( @@ -185,8 +187,8 @@ fenegbin <- function( iter.outer = iter, conv.outer = conv, nobs = nobs, - lvls.k = lvls.k, - nms.fe = nms.fe, + lvls_k = lvls_k, + nms_fe = nms_fe, formula = formula, data = data, family = family, diff --git a/R/fepoisson.R b/R/fepoisson.R index 3a9ad24..0c6a03e 100644 --- a/R/fepoisson.R +++ b/R/fepoisson.R @@ -18,35 +18,11 @@ fepoisson <- function( formula = NULL, data = NULL, weights = NULL, - beta.start = NULL, - eta.start = NULL, + beta_start = NULL, + eta_start = NULL, control = NULL) { feglm( formula = formula, data = data, weights = weights, family = poisson(), - beta.start = beta.start, eta.start = eta.start, control = control + beta_start = beta_start, eta_start = eta_start, control = control ) } - -# fequasipoisson <- function( -# formula = NULL, -# data = NULL, -# weights = NULL, -# beta.start = NULL, -# eta.start = NULL, -# control = NULL) { -# # Fit the model using standard Poisson assumptions -# fit <- feglm( -# formula = formula, data = data, weights = weights, family = poisson(), -# beta.start = beta.start, eta.start = eta.start, control = control -# ) - -# # Estimate the dispersion parameter (phi) -# fitted_values <- predict(object, type = "response") -# residuals <- unlist(object$data[, 1], use.names = FALSE) - fitted_values -# phi <- sum((residuals^2) / fitted_values) / fit$df.residual? - -# # Adjust model diagnostics for Quasi Poisson -# fit$std.errors <- sqrt(phi) * fit$std.errors - -# return(fit) -# } diff --git a/R/fixed_effects.R b/R/fixed_effects.R index ffcbd07..b127902 100644 --- a/R/fixed_effects.R +++ b/R/fixed_effects.R @@ -4,7 +4,7 @@ #' function has to be applied to our solution to get meaningful estimates of #' the fixed effects. #' @param object an object of class \code{"feglm"}. -#' @param alpha.tol tolerance level for the stopping condition. The algorithm is +#' @param alpha_tol tolerance level for the stopping condition. The algorithm is #' stopped at iteration \eqn{i} if \eqn{||\boldsymbol{\alpha}_{i} - #' \boldsymbol{\alpha}_{i - 1}||_{2} < tol ||\boldsymbol{\alpha}_{i - 1}|| #' {2}}{||\Delta \alpha|| < tol ||\alpha_old||}. Default is \code{1.0e-08}. @@ -23,7 +23,7 @@ #' #' fixed_effects(mod) #' @export -fixed_effects <- function(object = NULL, alpha.tol = 1.0e-08) { +fixed_effects <- function(object = NULL, alpha_tol = 1.0e-08) { # Check validity of 'object' if (is.null(object)) { stop("'object' has to be specified.", call. = FALSE) @@ -39,31 +39,31 @@ fixed_effects <- function(object = NULL, alpha.tol = 1.0e-08) { beta <- object[["coefficients"]] data <- object[["data"]] formula <- object[["formula"]] - lvls.k <- object[["lvls.k"]] - nms.fe <- object[["nms.fe"]] - k.vars <- names(lvls.k) - k <- length(lvls.k) + lvls_k <- object[["lvls_k"]] + nms_fe <- object[["nms_fe"]] + k_vars <- names(lvls_k) + k <- length(lvls_k) eta <- object[["eta"]] # Extract regressor matrix X <- model.matrix(formula, data, rhs = 1L)[, -1L, drop = FALSE] - nms.sp <- attr(X, "dimnames")[[2L]] + nms_sp <- attr(X, "dimnames")[[2L]] attr(X, "dimnames") <- NULL # Generate auxiliary list of indexes for different sub panels - k.list <- get_index_list_(k.vars, data) + k_list <- get_index_list_(k_vars, data) # Recover fixed effects by alternating the solutions of normal equations - pie <- eta - solve_y_(X, beta) - fe.list <- as.list(get_alpha_(pie, k.list, alpha.tol)) + pie <- eta - X %*% beta + fe_list <- as.list(get_alpha_(pie, k_list, alpha_tol)) # Assign names to the different fixed effects categories for (i in seq.int(k)) { - colnames(fe.list[[i]]) <- k.vars[i] - rownames(fe.list[[i]]) <- nms.fe[[i]] + colnames(fe_list[[i]]) <- k_vars[i] + rownames(fe_list[[i]]) <- nms_fe[[i]] } - names(fe.list) <- k.vars + names(fe_list) <- k_vars # Return list of estimated fixed effects - fe.list + fe_list } diff --git a/R/generics_glance.R b/R/generics_glance.R index 3c80200..7dbdf70 100644 --- a/R/generics_glance.R +++ b/R/generics_glance.R @@ -9,10 +9,10 @@ glance.feglm <- function(x, ...) { summary(x), data.frame( deviance = deviance, - null.deviance = null.deviance, - nobs.full = nobs["nobs.full"], - nobs.na = nobs["nobs.na"], - nobs.pc = nobs["nobs.pc"], + null_deviance = null_deviance, + nobs_full = nobs["nobs_full"], + nobs_na = nobs["nobs_na"], + nobs_pc = nobs["nobs_pc"], nobs = nobs["nobs"] ) ) @@ -29,9 +29,9 @@ glance.felm <- function(x, ...) { tibble( r.squared = r.squared, adj.r.squared = adj.r.squared, - nobs.full = nobs["nobs.full"], - nobs.na = nobs["nobs.na"], - nobs.pc = nobs["nobs.pc"], + nobs_full = nobs["nobs_full"], + nobs_na = nobs["nobs_na"], + nobs_pc = nobs["nobs_pc"], nobs = nobs["nobs"] ) ) diff --git a/R/generics_print.R b/R/generics_print.R index 5d6823e..257c16b 100644 --- a/R/generics_print.R +++ b/R/generics_print.R @@ -137,8 +137,8 @@ summary_nobs_ <- function(x) { cat( "\nNumber of observations:", paste0("Full ", x[["nobs"]][["nobs"]], ";"), - paste0("Missing ", x[["nobs"]][["nobs.na"]], ";"), - paste0("Perfect classification ", x[["nobs"]][["nobs.pc"]]), "\n" + paste0("Missing ", x[["nobs"]][["nobs_na"]], ";"), + paste0("Perfect classification ", x[["nobs"]][["nobs_pc"]]), "\n" ) } @@ -171,7 +171,7 @@ print.feglm <- function(x, digits = max(3L, getOption("digits") - 3L), ...) { cat( sub("\\(.*\\)", "", x[["family"]][["family"]]), " - ", x[["family"]][["link"]], " link", - ", l= [", paste0(x[["lvls.k"]], collapse = ", "), "]\n\n", + ", l= [", paste0(x[["lvls_k"]], collapse = ", "), "]\n\n", sep = "" ) print(x[["coefficients"]], digits = digits) diff --git a/R/generics_summary.R b/R/generics_summary.R index ed8fe7c..7469acd 100644 --- a/R/generics_summary.R +++ b/R/generics_summary.R @@ -37,10 +37,10 @@ summary.feglm <- function( res <- list( cm = cm, deviance = object[["deviance"]], - null.deviance = object[["null.deviance"]], + null_deviance = object[["null_deviance"]], iter = object[["iter"]], nobs = object[["nobs"]], - lvls.k = object[["lvls.k"]], + lvls_k = object[["lvls_k"]], formula = object[["formula"]], family = object[["family"]] ) @@ -87,7 +87,7 @@ summary.felm <- function( e_sq <- (y - object[["fitted.values"]])^2 tss <- sum(w * ydemeaned_sq) rss <- sum(w * e_sq) - n <- unname(object[["nobs"]]["nobs.full"]) + n <- unname(object[["nobs"]]["nobs_full"]) k <- length(object[["coefficients"]]) + sum(vapply(object[["nms.fe"]], length, integer(1))) @@ -95,7 +95,7 @@ summary.felm <- function( res <- list( cm = cm, nobs = object[["nobs"]], - lvls.k = object[["lvls.k"]], + lvls_k = object[["lvls_k"]], formula = object[["formula"]], r.squared = 1 - (rss / tss), adj.r.squared = 1 - (rss / tss) * ((n - 1) / (n - k)) diff --git a/R/helpers.R b/R/helpers.R index 308ce71..57bde2d 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -10,6 +10,36 @@ check_factor_ <- function(x) { # Higher-order partial derivatives ---- +second_order_derivative_ <- function(eta, f, family) { + link <- family[["link"]] + linkinv_eta <- family[["linkinv"]](eta) + + if (link == "logit") { + return(f * (1.0 - 2.0 * linkinv_eta)) + } else if (link == "probit") { + return(-eta * f) + } else if (link == "cloglog") { + return(f * (1.0 - exp(eta))) + } else { + return(-2.0 * eta / (1.0 + eta^2) * f) + } +} + +third_order_derivative_ <- function(eta, f, family) { + link <- family[["link"]] + linkinv_eta <- family[["linkinv"]](eta) + + if (link == "logit") { + return(f * ((1.0 - 2.0 * linkinv_eta)^2 - 2.0 * f)) + } else if (link == "probit") { + return((eta^2 - 1.0) * f) + } else if (link == "cloglog") { + return(f * (1.0 - exp(eta)) * (2.0 - exp(eta)) - f) + } else { + return((6.0 * eta^2 - 2.0) / (1.0 + eta^2)^2 * f) + } +} + partial_mu_eta_ <- function(eta, family, order) { # Safeguard eta if necessary if (family[["link"]] != "logit") { @@ -19,27 +49,9 @@ partial_mu_eta_ <- function(eta, family, order) { f <- family[["mu.eta"]](eta) if (order == 2L) { - # Second-order derivative - if (family[["link"]] == "logit") { - f * (1.0 - 2.0 * family[["linkinv"]](eta)) - } else if (family[["link"]] == "probit") { - -eta * f - } else if (family[["link"]] == "cloglog") { - f * (1.0 - exp(eta)) - } else { - -2.0 * eta / (1.0 + eta^2) * f - } + return(second_order_derivative_(eta, f, family)) } else { - # Third-order derivative - if (family[["link"]] == "logit") { - f * ((1.0 - 2.0 * family[["linkinv"]](eta))^2 - 2.0 * f) - } else if (family[["link"]] == "probit") { - (eta^2 - 1.0) * f - } else if (family[["link"]] == "cloglog") { - f * (1.0 - exp(eta)) * (2.0 - exp(eta)) - f - } else { - (6.0 * eta^2 - 2.0) / (1.0 + eta^2)^2 * f - } + return(third_order_derivative_(eta, f, family)) } } @@ -92,6 +104,12 @@ check_family_ <- function(family) { } else if (startsWith(family[["family"]], "Negative Binomial")) { stop("Please use 'fenegbin' instead.", call. = FALSE) } + + if (family[["family"]] == "binomial" && family[["link"]] != "logit") { + stop("The current version only supports logit in the binomial family. + This is because I had to rewrite the links in C++ to use those with Armadillo. + Send me a Pull Request or open an issue if you need Probit.", call. = FALSE) + } } update_formula_ <- function(formula) { @@ -112,17 +130,17 @@ model_frame_ <- function(data, formula, weights) { lhs <- names(data)[[1L]] - nobs.full <- nrow(data) + nobs_full <- nrow(data) data <- na.omit(data) - nobs.na <- nobs.full - nrow(data) - nobs.full <- nrow(data) + nobs_na <- nobs_full - nrow(data) + nobs_full <- nrow(data) assign("data", data, envir = parent.frame()) assign("lhs", lhs, envir = parent.frame()) - assign("nobs.na", nobs.na, envir = parent.frame()) - assign("nobs.full", nobs.full, envir = parent.frame()) + assign("nobs_na", nobs_na, envir = parent.frame()) + assign("nobs_full", nobs_full, envir = parent.frame()) } check_response_ <- function(data, lhs, family) { @@ -167,7 +185,7 @@ check_response_ <- function(data, lhs, family) { drop_by_link_type_ <- function(data, lhs, family, tmp.var, k.vars, control) { if (family[["family"]] %in% c("binomial", "poisson")) { - if (control[["drop.pc"]]) { + if (control[["drop_pc"]]) { repeat { # Drop observations that do not contribute to the log-likelihood ncheck <- nrow(data) @@ -206,11 +224,11 @@ transform_fe_ <- function(data, formula, k.vars) { data } -nobs_ <- function(nobs.full, nobs.na, nt) { +nobs_ <- function(nobs_full, nobs_na, nt) { c( - nobs.full = nobs.full, - nobs.na = nobs.na, - nobs.pc = nobs.full - nt, + nobs_full = nobs_full, + nobs_na = nobs_na, + nobs_pc = nobs_full - nt, nobs = nt ) } @@ -218,18 +236,18 @@ nobs_ <- function(nobs.full, nobs.na, nt) { model_response_ <- function(data, formula) { y <- data[[1L]] X <- model.matrix(formula, data, rhs = 1L)[, -1L, drop = FALSE] - nms.sp <- attr(X, "dimnames")[[2L]] + nms_sp <- attr(X, "dimnames")[[2L]] attr(X, "dimnames") <- NULL p <- ncol(X) assign("y", y, envir = parent.frame()) assign("X", X, envir = parent.frame()) - assign("nms.sp", nms.sp, envir = parent.frame()) + assign("nms_sp", nms_sp, envir = parent.frame()) assign("p", p, envir = parent.frame()) } check_linear_dependence_ <- function(X, p) { - if (rank_(X) < p) { + if (qr(X)$rank < p) { stop("Linear dependent terms detected.", call. = FALSE) } } @@ -247,7 +265,7 @@ init_theta_ <- function(init.theta, link) { if (is.null(init.theta)) { family <- poisson(link) } else { - # Validity of input argument (beta.start) + # Validity of input argument (beta_start) if (length(init.theta) != 1L) { stop("'init.theta' has to be a scalar.", call. = FALSE) } else if (init.theta <= 0.0) { @@ -260,23 +278,23 @@ init_theta_ <- function(init.theta, link) { } start_guesses_ <- function( - beta.start, eta.start, y, X, beta, nt, wt, p, family) { - if (!is.null(beta.start) || !is.null(eta.start)) { - # If both are specified, ignore eta.start - if (!is.null(beta.start) && !is.null(eta.start)) { + beta_start, eta_start, y, X, beta, nt, wt, p, family) { + if (!is.null(beta_start) || !is.null(eta_start)) { + # If both are specified, ignore eta_start + if (!is.null(beta_start) && !is.null(eta_start)) { warning( - "'beta.start' and 'eta.start' are specified. Ignoring 'eta.start'.", + "'beta_start' and 'eta_start' are specified. Ignoring 'eta_start'.", call. = FALSE ) } # Compute and check starting guesses - if (!is.null(beta.start)) { - # Validity of input argument (beta.start) - if (length(beta.start) != p) { + if (!is.null(beta_start)) { + # Validity of input argument (beta_start) + if (length(beta_start) != p) { stop( paste( - "Length of 'beta.start' has to be equal to the number of", + "Length of 'beta_start' has to be equal to the number of", "structural parameters." ), call. = FALSE @@ -284,14 +302,14 @@ start_guesses_ <- function( } # Set starting guesses - beta <- beta.start - eta <- solve_y_(X, beta) + beta <- beta_start + eta <- X %*% beta } else { - # Validity of input argument (eta.start) - if (length(eta.start) != nt) { + # Validity of input argument (eta_start) + if (length(eta_start) != nt) { stop( paste( - "Length of 'eta.start' has to be equal to the number of", + "Length of 'eta_start' has to be equal to the number of", "observations." ), call. = FALSE @@ -300,7 +318,7 @@ start_guesses_ <- function( # Set starting guesses beta <- numeric(p) - eta <- eta.start + eta <- eta_start } } else { # Compute starting guesses if not user specified @@ -317,3 +335,73 @@ start_guesses_ <- function( assign("beta", beta, envir = parent.frame()) assign("eta", eta, envir = parent.frame()) } + +# Generate auxiliary list of indexes for different sub panels ---- + +get_index_list_ <- function(k.vars, data) { + indexes <- seq.int(0L, nrow(data) - 1L) + lapply(k.vars, function(x, indexes, data) { + split(indexes, data[[x]]) + }, indexes = indexes, data = data) +} + +# Compute score matrix ---- + +get_score_matrix_ <- function(object) { + # Extract required quantities from result list + control <- object[["control"]] + data <- object[["data"]] + eta <- object[["eta"]] + wt <- object[["weights"]] + family <- object[["family"]] + + # Update weights and dependent variable + y <- data[[1L]] + mu <- family[["linkinv"]](eta) + mu.eta <- family[["mu.eta"]](eta) + w <- (wt * mu.eta^2) / family[["variance"]](mu) + nu <- (y - mu) / mu.eta + + # Center regressor matrix (if required) + if (control[["keep_mx"]]) { + MX <- object[["MX"]] + } else { + # Extract additional required quantities from result list + formula <- object[["formula"]] + k.vars <- names(object[["lvls_k"]]) + + # Generate auxiliary list of indexes to project out the fixed effects + k.list <- get_index_list_(k.vars, data) + + # Extract regressor matrix + X <- model.matrix(formula, data, rhs = 1L)[, -1L, drop = FALSE] + nms_sp <- attr(X, "dimnames")[[2L]] + attr(X, "dimnames") <- NULL + + # Center variables + MX <- center_variables_r_(X, w, k.list, control[["center_tol"]], 10000L) + colnames(MX) <- nms_sp + } + + # Return score matrix + MX * (nu * w) +} + +# Suitable name for a temporary variable ---- + +temp_var_ <- function(data) { + repeat { + tmp.var <- paste0(sample(letters, 5L, replace = TRUE), collapse = "") + if (!(tmp.var %in% colnames(data))) { + break + } + } + tmp.var +} + +# Gamma computation (APES) ---- + +gamma_ <- function(MX, H, J, PPsi, v, nt) { + inv_nt <- 1.0 / nt + (MX %*% solve(H * inv_nt, J) - PPsi) * v * inv_nt +} diff --git a/R/internals.R b/R/internals.R deleted file mode 100644 index 68279b5..0000000 --- a/R/internals.R +++ /dev/null @@ -1,325 +0,0 @@ -# Transform factor ---- - -check_factor_ <- function(x) { - if (is.factor(x)) { - droplevels(x) - } else { - factor(x) - } -} - -# Fitting algorithm (similar to lm.fit) ---- - -felm_fit_ <- function(y, X, wt, k.list, control) { - # Extract control arguments - center.tol <- control[["center.tol"]] - epsilon <- max(1.0e-07, .Machine[["double.eps"]]) - keep.mx <- control[["keep.mx"]] - - # Generate temporary variables - nt <- length(y) - MX <- X - - # Centering variables - MX <- center_variables_(MX, NA_real_, wt, k.list, center.tol, 10000L, FALSE) - - # Compute the OLS estimate - # beta <- as.vector(qr.solve(MX, y, epsilon)) - beta <- solve_beta_(MX, y, NA_real_, FALSE) - - # Generate result list - reslist <- list( - coefficients = beta - ) - - # Update result list - if (keep.mx) reslist[["MX"]] <- MX - - # Return result list - reslist -} - -# Fitting algorithm (similar to glm.fit) ---- - -feglm_fit_ <- function(beta, eta, y, X, wt, k.list, family, control) { - # Extract control arguments - center.tol <- control[["center.tol"]] - dev.tol <- control[["dev.tol"]] - epsilon <- max(min(1.0e-07, dev.tol / 1000.0), .Machine[["double.eps"]]) - iter.max <- control[["iter.max"]] - trace <- control[["trace"]] - keep.mx <- control[["keep.mx"]] - - # Compute initial quantities for the maximization routine - nt <- length(y) - mu <- family[["linkinv"]](eta) - dev <- sum(family[["dev.resids"]](y, mu, wt)) - null.dev <- sum(family[["dev.resids"]](y, mean(y), wt)) - - # Generate temporary variables - Mnu <- as.matrix(numeric(nt)) - MX <- X - - # Start maximization of the log-likelihood - conv <- FALSE - for (iter in seq.int(iter.max)) { - # Store \eta, \beta, and deviance of the previous iteration - eta.old <- eta - beta.old <- beta - dev.old <- dev - - # Compute weights and dependent variable - mu.eta <- family[["mu.eta"]](eta) - w <- (wt * mu.eta^2) / family[["variance"]](mu) - nu <- (y - mu) / mu.eta - - # Centering variables - Mnu <- center_variables_(Mnu, nu, w, k.list, center.tol, 10000L, TRUE) - MX <- center_variables_(MX, NA_real_, w, k.list, center.tol, 10000L, FALSE) - - # Compute update step and update eta - # beta.upd <- as.vector(qr.solve(MX * w.tilde, Mnu * w.tilde, epsilon)) - # eta.upd <- nu - as.vector(Mnu - MX %*% beta.upd) - beta.upd <- solve_beta_(MX, Mnu, w, TRUE) - eta.upd <- solve_eta_(MX, Mnu, nu, beta.upd) - - # Step-halving with three checks - # 1. finite deviance - # 2. valid \eta and \mu - # 3. improvement as in glm2 - rho <- 1.0 - - for (inner.iter in seq.int(50L)) { - # eta <- eta.old + rho * eta.upd - # beta <- beta.old + rho * beta.upd - eta <- update_beta_eta_(eta.old, eta.upd, rho) - beta <- update_beta_eta_(beta.old, beta.upd, rho) - mu <- family[["linkinv"]](eta) - dev <- sum(family[["dev.resids"]](y, mu, wt)) - dev.crit <- is.finite(dev) - val.crit <- family[["valideta"]](eta) && family[["validmu"]](mu) - imp.crit <- (dev - dev.old) / (0.1 + abs(dev)) <= -dev.tol - if (dev.crit && val.crit && imp.crit) break - rho <- rho * 0.5 - } - - # Check if step-halving failed (deviance and invalid \eta or \mu) - if (!dev.crit || !val.crit) { - stop("Inner loop failed; cannot correct step size.", call. = FALSE) - } - - # Stop if we do not improve - if (!imp.crit) { - eta <- eta.old - beta <- beta.old - dev <- dev.old - mu <- family[["linkinv"]](eta) - } - - # Progress information - if (trace) { - cat( - "Deviance=", format(dev, digits = 5L, nsmall = 2L), "Iterations -", - iter, "\n" - ) - cat("Estimates=", format(beta, digits = 3L, nsmall = 2L), "\n") - } - - # Check convergence - dev.crit <- abs(dev - dev.old) / (0.1 + abs(dev)) - if (trace) cat("Stopping criterion=", dev.crit, "\n") - if (dev.crit < dev.tol) { - if (trace) cat("Convergence\n") - conv <- TRUE - break - } - - # Update starting guesses for acceleration - Mnu <- Mnu - nu - } - - # Information if convergence failed - if (!conv && trace) cat("Algorithm did not converge.\n") - - # Update weights and dependent variable - mu.eta <- family[["mu.eta"]](eta) - w <- (wt * mu.eta^2) / family[["variance"]](mu) - - # Center variables - MX <- center_variables_(X, NA_real_, w, k.list, center.tol, 10000L, FALSE) - # Recompute Hessian - H <- crossprod_(MX, w, TRUE, TRUE) - - # Generate result list - reslist <- list( - coefficients = beta, - eta = eta, - weights = wt, - Hessian = H, - deviance = dev, - null.deviance = null.dev, - conv = conv, - iter = iter - ) - - # Update result list - if (keep.mx) reslist[["MX"]] <- MX - - # Return result list - reslist -} - -# Efficient offset algorithm to update the linear predictor ---- - -feglm_offset_ <- function(object, offset) { - # Check validity of 'object' - if (!inherits(object, "feglm")) { - stop("'feglm_offset_' called on a non-'feglm' object.") - } - - # Extract required quantities from result list - control <- object[["control"]] - data <- object[["data"]] - wt <- object[["weights"]] - family <- object[["family"]] - formula <- object[["formula"]] - lvls.k <- object[["lvls.k"]] - nt <- object[["nobs"]][["nobs"]] - k.vars <- names(lvls.k) - - # Extract dependent variable - y <- data[[1L]] - - # Extract control arguments - center.tol <- control[["center.tol"]] - dev.tol <- control[["dev.tol"]] - iter.max <- control[["iter.max"]] - - # Generate auxiliary list of indexes to project out the fixed effects - k.list <- get_index_list_(k.vars, data) - - # Compute starting guess for \eta - if (family[["family"]] == "binomial") { - eta <- rep(family[["linkfun"]](sum(wt * (y + 0.5) / 2.0) / sum(wt)), nt) - } else if (family[["family"]] %in% c("Gamma", "inverse.gaussian")) { - eta <- rep(family[["linkfun"]](sum(wt * y) / sum(wt)), nt) - } else { - eta <- rep(family[["linkfun"]](sum(wt * (y + 0.1)) / sum(wt)), nt) - } - - # Compute initial quantities for the maximization routine - mu <- family[["linkinv"]](eta) - dev <- sum(family[["dev.resids"]](y, mu, wt)) - Myadj <- as.matrix(numeric(nt)) - - # Start maximization of the log-likelihood - for (iter in seq.int(iter.max)) { - # Store \eta, \beta, and deviance of the previous iteration - eta.old <- eta - dev.old <- dev - - # Compute weights and dependent variable - mu.eta <- family[["mu.eta"]](eta) - w <- (wt * mu.eta^2) / family[["variance"]](mu) - yadj <- (y - mu) / mu.eta + eta - offset - - # Centering dependent variable and compute \eta update - Myadj <- center_variables_(Myadj, yadj, w, k.list, center.tol, 10000L, TRUE) - # eta.upd <- yadj - drop(Myadj) + offset - eta - eta.upd <- solve_eta2_(yadj, Myadj, offset, eta) - - # Step-halving with three checks - # 1. finite deviance - # 2. valid \eta and \mu - # 3. improvement as in glm2 - rho <- 1.0 - for (inner.iter in seq.int(50L)) { - # eta <- eta.old + rho * eta.upd - eta <- update_beta_eta_(eta.old, eta.upd, rho) - mu <- family[["linkinv"]](eta) - dev <- sum(family[["dev.resids"]](y, mu, wt)) - dev.crit <- is.finite(dev) - val.crit <- family[["valideta"]](eta) && family[["validmu"]](mu) - imp.crit <- (dev - dev.old) / (0.1 + abs(dev)) <= -dev.tol - if (dev.crit && val.crit && imp.crit) break - rho <- rho / 2.0 - } - - # Check if step-halving failed - if (!dev.crit || !val.crit) { - stop("Inner loop failed; cannot correct step size.", call. = FALSE) - } - - # Check termination condition - if (abs(dev - dev.old) / (0.1 + abs(dev)) < dev.tol) break - - # Update starting guesses for acceleration - Myadj <- Myadj - yadj - } - - # Return \eta - eta -} - -# Generate auxiliary list of indexes for different sub panels ---- - -get_index_list_ <- function(k.vars, data) { - indexes <- seq.int(0L, nrow(data) - 1L) - lapply(k.vars, function(x, indexes, data) { - split(indexes, data[[x]]) - }, indexes = indexes, data = data) -} - -# Compute score matrix ---- - -get_score_matrix_ <- function(object) { - # Extract required quantities from result list - control <- object[["control"]] - data <- object[["data"]] - eta <- object[["eta"]] - wt <- object[["weights"]] - family <- object[["family"]] - - # Update weights and dependent variable - y <- data[[1L]] - mu <- family[["linkinv"]](eta) - mu.eta <- family[["mu.eta"]](eta) - w <- (wt * mu.eta^2) / family[["variance"]](mu) - # nu <- (y - mu) / mu.eta - nu <- update_nu_(y, mu, mu.eta) - - # Center regressor matrix (if required) - if (control[["keep.mx"]]) { - MX <- object[["MX"]] - } else { - # Extract additional required quantities from result list - formula <- object[["formula"]] - k.vars <- names(object[["lvls.k"]]) - - # Generate auxiliary list of indexes to project out the fixed effects - k.list <- get_index_list_(k.vars, data) - - # Extract regressor matrix - X <- model.matrix(formula, data, rhs = 1L)[, -1L, drop = FALSE] - nms.sp <- attr(X, "dimnames")[[2L]] - attr(X, "dimnames") <- NULL - - # Center variables - MX <- center_variables_(X, NA_real_, w, k.list, control[["center.tol"]], 10000L, FALSE) - colnames(MX) <- nms.sp - } - - # Return score matrix - MX * (nu * w) -} - -# Returns suitable name for a temporary variable -temp_var_ <- function(data) { - repeat { - tmp.var <- paste0(sample(letters, 5L, replace = TRUE), collapse = "") - if (!(tmp.var %in% colnames(data))) { - break - } - } - tmp.var -} diff --git a/README.Rmd b/README.Rmd index 7b03922..15a7f69 100644 --- a/README.Rmd +++ b/README.Rmd @@ -121,27 +121,27 @@ to test with testthat. Median time for the different models in the book [An Advanced Guide to Trade Policy Analysis](https://www.wto.org/english/res_e/publications_e/advancedguide2016_e.htm). -|package | PPML| Trade Diversion| Endogeneity| Reverse Causality| Non-linear/Phasing Effects| Globalization| -|:------------|-------:|---------------:|-----------:|-----------------:|--------------------------:|-------------:| -|Alpaca | 261ms| 2s | 2s | 2s | 3s | 6s | -|Base R | 2m | 2m | 23m | 24m | 23m | 25m | -|**Capybara** | 364ms| 3s | 1s | 2s | 2s | 4s | -|Fixest | 69ms| 488ms| 125ms| 148ms| 251ms| 497ms| +| package | PPML | Trade Diversion | Endogeneity | Reverse Causality | Non-linear/Phasing Effects | Globalization | +|:------------|-------:|-----------------:|------------:|-----------------:|----------------------------:|--------------:| +| Alpaca | 0.4s | 2.6s | 1.6s | 2.0s | 3.1s | 5.3s | +| Base R | 120.0s | 2.0m | 1380.0s | 1440.0s | 1380.0s | 1500.0s | +| **Capybara**| 0.3s | 2.0s | 1.2s | 1.4s | 1.7s | 3.4s | +| Fixest | 0.1s | 0.5s | 0.1s | 0.2s | 0.3s | 0.5s | Memory allocation for the same models |package | PPML| Trade Diversion| Endogeneity| Reverse Causality| Non-linear/Phasing Effects| Globalization| |:------------|-------:|---------------:|-----------:|-----------------:|--------------------------:|-------------:| -|Alpaca | 306MB| 341MB| 306MB| 336MB| 395MB| 541MB| -|Base R | 3GB| 3GB| 12GB| 12GB| 12GB| 12GB| -|**Capybara** | 211MB| 235MB| 243MB| 250MB| 265MB| 302MB| -|Fixest | 44MB| 36MB| 27MB| 32MB| 41MB| 63MB| +|Alpaca | 307MB | 341MB | 306MB | 336MB | 395MB | 541MB | +|Base R | 3000MB | 3000MB | 12000MB | 12000GB | 12000GB | 12000MB | +|**Capybara** | 27MB | 32MB | 20MB | 23MB | 29MB | 43MB | +|Fixest | 44MB | 36MB | 27MB | 32MB | 41MB | 63MB | # Debugging *This debugging is about code quality, not about statistical quality.* *There is a full set of numerical tests for testthat to check the math.* -*In this section of the test, I can write pi = 3 and if there are no memory +*In this section of the test, I could write "pi = 3" and if there are no memory leaks, it will pass the test.* I run `r_valgrind "dev/test_get_alpha.r"` or the corresponding test from the diff --git a/README.md b/README.md index 7155b6a..64db857 100644 --- a/README.md +++ b/README.md @@ -117,27 +117,27 @@ Median time for the different models in the book [An Advanced Guide to Trade Policy Analysis](https://www.wto.org/english/res_e/publications_e/advancedguide2016_e.htm). -| package | PPML | Trade Diversion | Endogeneity | Reverse Causality | Non-linear/Phasing Effects | Globalization | -| :----------- | ------: | --------------: | ----------: | ----------------: | -------------------------: | ------------: | -| Alpaca | 346.4ms | 2.52s | 1.51s | 1.9s | 2.96s | 5.57s | -| Base R | 1.5m | 1.53m | 23.43m | 23.52m | 23.16m | 24.85m | -| **Capybara** | 440ms | 2.86s | 1.92s | 2.29s | 2.96s | 4.46s | -| Fixest | 64.9ms | 503ms | 106.14ms | 145.04ms | 243.61ms | 524.7ms | +| package | PPML | Trade Diversion | Endogeneity | Reverse Causality | Non-linear/Phasing Effects | Globalization | +| :----------- | -----: | --------------: | ----------: | ----------------: | -------------------------: | ------------: | +| Alpaca | 0.4s | 2.6s | 1.6s | 2.0s | 3.1s | 5.3s | +| Base R | 120.0s | 2.0m | 1380.0s | 1440.0s | 1380.0s | 1500.0s | +| **Capybara** | 0.3s | 2.0s | 1.2s | 1.4s | 1.7s | 3.4s | +| Fixest | 0.1s | 0.5s | 0.1s | 0.2s | 0.3s | 0.5s | Memory allocation for the same models | package | PPML | Trade Diversion | Endogeneity | Reverse Causality | Non-linear/Phasing Effects | Globalization | | :----------- | -----: | --------------: | ----------: | ----------------: | -------------------------: | ------------: | -| Alpaca | 306MB | 340.8MB | 306.4MB | 335.9MB | 394.6MB | 541.3MB | -| Base R | 2.7GB | 2.6GB | 11.9GB | 11.92GB | 11.95GB | 11.97GB | -| **Capybara** | 210MB | 235MB | 241MB | 249MB | 263MB | 299MB | -| Fixest | 44.4MB | 36.4MB | 27.9MB | 32.2MB | 40.9MB | 62.7MB | +| Alpaca | 307MB | 341MB | 306MB | 336MB | 395MB | 541MB | +| Base R | 3000MB | 3000MB | 12000MB | 12000GB | 12000GB | 12000MB | +| **Capybara** | 27MB | 32MB | 20MB | 23MB | 29MB | 43MB | +| Fixest | 44MB | 36MB | 27MB | 32MB | 41MB | 63MB | # Debugging *This debugging is about code quality, not about statistical quality.* *There is a full set of numerical tests for testthat to check the math.* -*In this section of the test, I can write pi = 3 and if there are no +*In this section of the test, I could write “pi = 3” and if there are no memory leaks, it will pass the test.* I run `r_valgrind "dev/test_get_alpha.r"` or the corresponding test from @@ -219,3 +219,10 @@ leaks. When you are ready testing, you need to remove `-UDEGUG` from `src/Makevars`. + +## Code of Conduct + +Please note that the capybara project is released with a [Contributor +Code of +Conduct](https://contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). +By contributing to this project, you agree to abide by its terms. diff --git a/dev/07_helpers.cpp b/dev/07_helpers.cpp new file mode 100644 index 0000000..fa49b96 --- /dev/null +++ b/dev/07_helpers.cpp @@ -0,0 +1,19 @@ +#include "00_main.h" + +// Generate auxiliary list of indexes for different sub panels + +[[cpp11::register]] list get_index_list_(const strings &k_vars, + const data_frame &data) { + writable::integers indexes(data.nrow()); + std::iota(indexes.begin(), indexes.end(), 0); + + writable::list out; + + auto split = cpp11::package("base")["split"]; + + for (const auto &k_var : k_vars) { + out.push_back(split(indexes, data[k_var])); + } + + return out; +} diff --git a/dev/benchmarks_tests_agtpa.R b/dev/benchmarks_tests_agtpa.R index c1a1ce0..abf1a49 100644 --- a/dev/benchmarks_tests_agtpa.R +++ b/dev/benchmarks_tests_agtpa.R @@ -42,9 +42,6 @@ form2 <- trade ~ log_dist + cntg + lang + clny + d <- filter(ch1_application3, importer != exporter) bench_ppml <- mark( - # round(glm(form, family = stats::quasipoisson(link = "log"), data = d)$coefficients["rta"], 3), - round(capybara::fepoisson(form2, data = d)$coefficients["rta"], 3), - round(fixest::fepois(form2, data = d)$coefficients["rta"], 3), round(alpaca::feglm(form2, data = d, family = poisson())$coefficients["rta"], 3) ) @@ -63,9 +60,6 @@ form2 <- trade ~ log_dist + cntg + lang + clny + d <- ch1_application3 bench_trade_diversion <- mark( - # round(glm(form, family = stats::quasipoisson(link = "log"), data = d)$coefficients["rta"], 3), - round(capybara::fepoisson(form2, data = d)$coefficients["rta"], 3), - round(fixest::fepois(form2, data = d)$coefficients["rta"], 3), round(alpaca::feglm(form2, data = d, family = poisson())$coefficients["rta"], 3) ) @@ -81,9 +75,6 @@ form2 <- trade ~ rta | exp_year + imp_year + pair_id_2 d <- filter(ch1_application3, sum_trade > 0) bench_endogeneity <- mark( - # round(glm(form, family = stats::quasipoisson(link = "log"), data = d)$coefficients["rta"], 3), - round(capybara::fepoisson(form2, data = d)$coefficients["rta"], 3), - round(fixest::fepois(form2, data = d)$coefficients["rta"], 3), round(alpaca::feglm(form2, data = d, family = poisson())$coefficients["rta"], 3) ) @@ -99,9 +90,6 @@ form2 <- trade ~ rta + rta_lead4 | exp_year + imp_year + pair_id_2 d <- filter(ch1_application3, sum_trade > 0) bench_reverse_causality <- mark( - # round(glm(form, family = stats::quasipoisson(link = "log"), data = d)$coefficients["rta"], 3), - round(capybara::fepoisson(form2, data = d)$coefficients["rta"], 3), - round(fixest::fepois(form2, data = d)$coefficients["rta"], 3), round(alpaca::feglm(form2, data = d, family = poisson())$coefficients["rta"], 3) ) @@ -120,9 +108,6 @@ form2 <- trade ~ rta + rta_lag4 + rta_lag8 + rta_lag12 | d <- filter(ch1_application3, sum_trade > 0) bench_phasing <- mark( - # round(glm(form, family = stats::quasipoisson(link = "log"), data = d)$coefficients["rta"], 3), - round(capybara::fepoisson(form2, data = d)$coefficients["rta"], 3), - round(fixest::fepois(form2, data = d)$coefficients["rta"], 3), round(alpaca::feglm(form2, data = d, family = poisson())$coefficients["rta"], 3) ) @@ -145,9 +130,6 @@ form2 <- trade ~ rta + rta_lag4 + rta_lag8 + rta_lag12 + d <- filter(ch1_application3, sum_trade > 0) bench_globalization <- mark( - # round(glm(form, family = stats::quasipoisson(link = "log"), data = d)$coefficients["rta"], 3), - round(capybara::fepoisson(form2, data = d)$coefficients["rta"], 3), - round(fixest::fepois(form2, data = d)$coefficients["rta"], 3), round(alpaca::feglm(form2, data = d, family = poisson())$coefficients["rta"], 3) ) @@ -176,14 +158,15 @@ bench_globalization <- readRDS("dev/bench_globalization.rds") bench_ppml %>% # mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>% - mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% + mutate(package = c("Alpaca")) %>% + # mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% mutate(model = "PPML") %>% select(model, package, median) %>% pivot_wider(names_from = model, values_from = median) %>% left_join( bench_trade_diversion %>% # mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>% - mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% + mutate(package = c("Alpaca")) %>% mutate(model = "Trade Diversion") %>% select(model, package, median) %>% pivot_wider(names_from = model, values_from = median) @@ -191,7 +174,7 @@ bench_ppml %>% left_join( bench_endogeneity %>% # mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>% - mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% + mutate(package = c("Alpaca")) %>% mutate(model = "Endogeneity") %>% select(model, package, median) %>% pivot_wider(names_from = model, values_from = median) @@ -199,7 +182,7 @@ bench_ppml %>% left_join( bench_reverse_causality %>% # mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>% - mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% + mutate(package = c("Alpaca")) %>% mutate(model = "Reverse Causality") %>% select(model, package, median) %>% pivot_wider(names_from = model, values_from = median) @@ -207,7 +190,7 @@ bench_ppml %>% left_join( bench_phasing %>% # mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>% - mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% + mutate(package = c("Alpaca")) %>% mutate(model = "Non-linear/Phasing Effects") %>% select(model, package, median) %>% pivot_wider(names_from = model, values_from = median) @@ -215,7 +198,7 @@ bench_ppml %>% left_join( bench_globalization %>% # mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>% - mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% + mutate(package = c("Alpaca")) %>% mutate(model = "Globalization") %>% select(model, package, median) %>% pivot_wider(names_from = model, values_from = median) @@ -225,14 +208,14 @@ bench_ppml %>% bench_ppml %>% # mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>% - mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% + mutate(package = c("Alpaca")) %>% mutate(model = "PPML") %>% select(model, package, mem_alloc) %>% pivot_wider(names_from = model, values_from = mem_alloc) %>% left_join( bench_trade_diversion %>% # mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>% - mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% + mutate(package = c("Alpaca")) %>% mutate(model = "Trade Diversion") %>% select(model, package, mem_alloc) %>% pivot_wider(names_from = model, values_from = mem_alloc) @@ -240,7 +223,7 @@ bench_ppml %>% left_join( bench_endogeneity %>% # mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>% - mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% + mutate(package = c("Alpaca")) %>% mutate(model = "Endogeneity") %>% select(model, package, mem_alloc) %>% pivot_wider(names_from = model, values_from = mem_alloc) @@ -248,7 +231,7 @@ bench_ppml %>% left_join( bench_reverse_causality %>% # mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>% - mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% + mutate(package = c("Alpaca")) %>% mutate(model = "Reverse Causality") %>% select(model, package, mem_alloc) %>% pivot_wider(names_from = model, values_from = mem_alloc) @@ -256,7 +239,7 @@ bench_ppml %>% left_join( bench_phasing %>% # mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>% - mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% + mutate(package = c("Alpaca")) %>% mutate(model = "Non-linear/Phasing Effects") %>% select(model, package, mem_alloc) %>% pivot_wider(names_from = model, values_from = mem_alloc) @@ -264,7 +247,7 @@ bench_ppml %>% left_join( bench_globalization %>% # mutate(package = c("Base R", "**Capybara**", "Fixest", "Alpaca")) %>% - mutate(package = c("**Capybara**", "Fixest", "Alpaca")) %>% + mutate(package = c("Alpaca")) %>% mutate(model = "Globalization") %>% select(model, package, mem_alloc) %>% pivot_wider(names_from = model, values_from = mem_alloc) diff --git a/dev/benchmarks_tests_agtpa_capybara_only copy.R b/dev/benchmarks_tests_agtpa_capybara_only copy.R deleted file mode 100644 index 1a546f3..0000000 --- a/dev/benchmarks_tests_agtpa_capybara_only copy.R +++ /dev/null @@ -1,180 +0,0 @@ -# this is not just about speed/memory, but also about obtaining the same -# slopes as in base R - -load_all() -library(dplyr) -library(tidyr) -library(janitor) -library(bench) - -rm(list = ls()) -gc() - -# data ---- - -ch1_application3 <- tradepolicy::agtpa_applications %>% - clean_names() %>% - filter(year %in% seq(1986, 2006, 4)) %>% - mutate( - exp_year = paste0(exporter, year), - imp_year = paste0(importer, year), - year = paste0("intl_border_", year), - log_trade = log(trade), - log_dist = log(dist), - intl_brdr = ifelse(exporter == importer, pair_id, "inter"), - intl_brdr_2 = ifelse(exporter == importer, 0, 1), - pair_id_2 = ifelse(exporter == importer, "0-intra", pair_id) - ) %>% - spread(year, intl_brdr_2, fill = 0) - -ch1_application3 <- ch1_application3 %>% - group_by(pair_id) %>% - mutate(sum_trade = sum(trade)) %>% - ungroup() - -# ppml ---- - -form <- trade ~ 0 + log_dist + cntg + lang + clny + - rta + exp_year + imp_year - -form2 <- trade ~ log_dist + cntg + lang + clny + - rta | exp_year + imp_year - -d <- filter(ch1_application3, importer != exporter) - -bench_ppml <- mark( - fepoisson(form2, data = d)$coefficients["rta"] -) - -formula = form2 -data = d -weights = NULL -beta.start = NULL -eta.start = NULL -control = NULL -family <- poisson() - -check_formula_(formula) -check_data_(data) -check_family_(family) -control <- check_control_(control) -formula <- update_formula_(formula) -lhs <- NA # just to avoid global variable warning -nobs.na <- NA -nobs.full <- NA -model_frame_(data, formula, weights) -check_response_(data, lhs, family) -k.vars <- attr(terms(formula, rhs = 2L), "term.labels") -k <- length(k.vars) -tmp.var <- temp_var_(data) -data <- drop_by_link_type_(data, lhs, family, tmp.var, k.vars, control) -data <- transform_fe_(data, formula, k.vars) -nt <- nrow(data) -nobs <- nobs_(nobs.full, nobs.na, nt) -nms.sp <- NA -p <- NA -model_response_(data, formula) - -p - -qr_(X, FALSE)$rank -out <- qr(X) -dim(out$qr) - -bench_ppml$median -bench_ppml$mem_alloc - -# rm(d) - -# trade diversion ---- - -form <- trade ~ 0 + log_dist + cntg + lang + clny + - rta + exp_year + imp_year + intl_brdr - -form2 <- trade ~ log_dist + cntg + lang + clny + - rta | exp_year + imp_year + intl_brdr - -d <- ch1_application3 - -bench_trade_diversion <- mark( - round(fepoisson(form2, data = d)$coefficients["rta"], 3) -) - -bench_trade_diversion$median -bench_trade_diversion$mem_alloc - -# rm(d) - -# endogeneity ---- - -# form <- trade ~ 0 + rta + exp_year + imp_year + pair_id_2 -# form2 <- trade ~ rta | exp_year + imp_year + pair_id_2 - -# d <- filter(ch1_application3, sum_trade > 0) - -# bench_endogeneity <- mark( -# round(fepoisson(form2, data = d)$coefficients["rta"], 3), -# iterations = 1000L -# ) - -bench_endogeneity - -# rm(d) - -# reverse causality ---- - -# form <- trade ~ 0 + rta + rta_lead4 + exp_year + imp_year + pair_id_2 -# form2 <- trade ~ rta + rta_lead4 | exp_year + imp_year + pair_id_2 - -# d <- filter(ch1_application3, sum_trade > 0) - -# bench_reverse_causality <- mark( -# round(fepoisson(form2, data = d)$coefficients["rta"], 3) -# ) - -# bench_reverse_causality - -# rm(d) - -# non-linear/phasing effects ---- - -# form <- trade ~ 0 + rta + rta_lag4 + rta_lag8 + rta_lag12 + -# exp_year + imp_year + pair_id_2 - -# form2 <- trade ~ rta + rta_lag4 + rta_lag8 + rta_lag12 | -# exp_year + imp_year + pair_id_2 - -# d <- filter(ch1_application3, sum_trade > 0) - -# bench_phasing <- mark( -# round(fepoisson(form2, data = d)$coefficients["rta"], 3) -# ) - -# bench_phasing - -# rm(d) - -# globalization ---- - -form <- trade ~ 0 + rta + rta_lag4 + rta_lag8 + rta_lag12 + - intl_border_1986 + intl_border_1990 + intl_border_1994 + - intl_border_1998 + intl_border_2002 + - exp_year + imp_year + pair_id_2 - -form2 <- trade ~ rta + rta_lag4 + rta_lag8 + rta_lag12 + - intl_border_1986 + intl_border_1990 + intl_border_1994 + - intl_border_1998 + intl_border_2002 | - exp_year + imp_year + pair_id_2 - -d <- filter(ch1_application3, sum_trade > 0) - -bench_globalization <- mark( - round(fepoisson(form2, data = d)$coefficients["rta"], 3) -) - -bench_globalization - -rm(d, form, ch1_application3) - -rm(list = ls()) -gc() diff --git a/dev/glmfit.r b/dev/glmfit.r new file mode 100644 index 0000000..5967848 --- /dev/null +++ b/dev/glmfit.r @@ -0,0 +1,129 @@ +# Fitting algorithm (similar to glm.fit) ---- + +feglm_fit_ <- function(beta, eta, y, X, wt, k.list, family, control) { + # Extract control arguments + center.tol <- control[["center.tol"]] + dev.tol <- control[["dev.tol"]] + epsilon <- max(min(1.0e-07, dev.tol / 1000.0), .Machine[["double.eps"]]) + iter.max <- control[["iter.max"]] + trace <- control[["trace"]] + keep.mx <- control[["keep.mx"]] + + # Compute initial quantities for the maximization routine + nt <- length(y) + mu <- family[["linkinv"]](eta) + dev <- sum(family[["dev.resids"]](y, mu, wt)) + null.dev <- sum(family[["dev.resids"]](y, mean(y), wt)) + + # Generate temporary variables + Mnu <- as.matrix(numeric(nt)) + MX <- X + + # Start maximization of the log-likelihood + conv <- FALSE + for (iter in seq.int(iter.max)) { + # Store \eta, \beta, and deviance of the previous iteration + eta.old <- eta + beta.old <- beta + dev.old <- dev + + # Compute weights and dependent variable + mu.eta <- family[["mu.eta"]](eta) + w <- (wt * mu.eta^2) / family[["variance"]](mu) + nu <- (y - mu) / mu.eta + + # Centering variables + Mnu <- center_variables_(Mnu, nu, w, k.list, center.tol, 10000L, TRUE) + MX <- center_variables_(MX, NA_real_, w, k.list, center.tol, 10000L, FALSE) + + # Compute update step and update eta + # beta.upd <- as.vector(qr.solve(MX * w.tilde, Mnu * w.tilde, epsilon)) + # eta.upd <- nu - as.vector(Mnu - MX %*% beta.upd) + beta.upd <- solve_beta_(MX, Mnu, w, TRUE) + eta.upd <- solve_eta_(MX, Mnu, nu, beta.upd) + + # Step-halving with three checks + # 1. finite deviance + # 2. valid \eta and \mu + # 3. improvement as in glm2 + rho <- 1.0 + + for (inner.iter in seq.int(50L)) { + # eta <- eta.old + rho * eta.upd + # beta <- beta.old + rho * beta.upd + eta <- update_beta_eta_(eta.old, eta.upd, rho) + beta <- update_beta_eta_(beta.old, beta.upd, rho) + mu <- family[["linkinv"]](eta) + dev <- sum(family[["dev.resids"]](y, mu, wt)) + dev.crit <- is.finite(dev) + val.crit <- family[["valideta"]](eta) && family[["validmu"]](mu) + imp.crit <- (dev - dev.old) / (0.1 + abs(dev)) <= -dev.tol + if (dev.crit && val.crit && imp.crit) break + rho <- rho * 0.5 + } + + # Check if step-halving failed (deviance and invalid \eta or \mu) + if (!dev.crit || !val.crit) { + stop("Inner loop failed; cannot correct step size.", call. = FALSE) + } + + # Stop if we do not improve + if (!imp.crit) { + eta <- eta.old + beta <- beta.old + dev <- dev.old + mu <- family[["linkinv"]](eta) + } + + # Progress information + if (trace) { + cat( + "Deviance=", format(dev, digits = 5L, nsmall = 2L), "Iterations -", + iter, "\n" + ) + cat("Estimates=", format(beta, digits = 3L, nsmall = 2L), "\n") + } + + # Check convergence + dev.crit <- abs(dev - dev.old) / (0.1 + abs(dev)) + if (trace) cat("Stopping criterion=", dev.crit, "\n") + if (dev.crit < dev.tol) { + if (trace) cat("Convergence\n") + conv <- TRUE + break + } + + # Update starting guesses for acceleration + Mnu <- Mnu - nu + } + + # Information if convergence failed + if (!conv && trace) cat("Algorithm did not converge.\n") + + # Update weights and dependent variable + mu.eta <- family[["mu.eta"]](eta) + w <- (wt * mu.eta^2) / family[["variance"]](mu) + + # Center variables + MX <- center_variables_(X, NA_real_, w, k.list, center.tol, 10000L, FALSE) + # Recompute Hessian + H <- crossprod_(MX, w, TRUE, TRUE) + + # Generate result list + reslist <- list( + coefficients = beta, + eta = eta, + weights = wt, + Hessian = H, + deviance = dev, + null.deviance = null.dev, + conv = conv, + iter = iter + ) + + # Update result list + if (keep.mx) reslist[["MX"]] <- MX + + # Return result list + reslist +} diff --git a/dev/test-helpers.R b/dev/test-helpers.R new file mode 100644 index 0000000..04729f8 --- /dev/null +++ b/dev/test-helpers.R @@ -0,0 +1,18 @@ +test_that("multiplication works", { + # get_index_list_r_ <- function(k.vars, data) { + # indexes <- seq.int(0L, nrow(data) - 1L) + # lapply(k.vars, function(x, indexes, data) { + # split(indexes, data[[x]]) + # }, indexes = indexes, data = data) + # } + + # expect_equal( + # get_index_list_(names(mtcars), mtcars), + # get_index_list_r_(names(mtcars), mtcars) + # ) + + # expect_equal( + # get_index_list_(names(iris), iris), + # get_index_list_r_(names(iris), iris) + # ) +}) diff --git a/dev/test-offset.R b/dev/test-offset.R new file mode 100644 index 0000000..e833619 --- /dev/null +++ b/dev/test-offset.R @@ -0,0 +1,18 @@ +test_that("offset works", { + m1 <- feglm(mpg ~ wt | cyl, data = mtcars, family = poisson()) + y <- predict(m1, type = "response") + o1 <- feglm_offset_(m1, y) + + # m2 <- alpaca::feglm(mpg ~ wt | cyl, data = mtcars, family = poisson()) + # o2 <- drop(alpaca:::feglmOffset(m2, y) + # datapasta::vector_paste(round(o2, 4)) + o2 <- c( + 3.018703, 3.011154, 3.056387, 3.001613, 2.979713, 2.995091, 2.976723, + 3.026537, 3.027809, 2.995612, 2.995612, 2.999650, 3.006936, 3.005836, + 2.977558, 2.974679, 2.975975, 3.094682, 3.062526, 3.053450, 3.029361, + 2.956144, 2.958109, 2.949010, 2.948902, 3.049442, 3.041447, 3.066858, + 2.964431, 2.992499, 2.955002, 3.018302 + ) + + expect_equal(round(o1, 4), round(o2, 4)) +}) diff --git a/docs/404.html b/docs/404.html index 6b35323..a945fa2 100644 --- a/docs/404.html +++ b/docs/404.html @@ -39,7 +39,7 @@
diff --git a/docs/CODE_OF_CONDUCT.html b/docs/CODE_OF_CONDUCT.html index dab6e73..4590326 100644 --- a/docs/CODE_OF_CONDUCT.html +++ b/docs/CODE_OF_CONDUCT.html @@ -17,7 +17,7 @@ diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html index a8e0609..6184ced 100644 --- a/docs/CONTRIBUTING.html +++ b/docs/CONTRIBUTING.html @@ -17,7 +17,7 @@ diff --git a/docs/LICENSE.html b/docs/LICENSE.html index e2305ab..4f37ea2 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -17,7 +17,7 @@ diff --git a/docs/articles/index.html b/docs/articles/index.html index 9eabf7e..bc96bc8 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ diff --git a/docs/articles/intro.html b/docs/articles/intro.html index 9ef0b0b..0496226 100644 --- a/docs/articles/intro.html +++ b/docs/articles/intro.html @@ -40,7 +40,7 @@ diff --git a/docs/authors.html b/docs/authors.html index d61c09d..a177fc8 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ @@ -74,14 +74,14 @@Vargas Sepulveda M (2024). capybara: Fast and Memory Efficient Fitting of Linear Models With High-Dimensional Fixed Effects. -R package version 0.5.2, https://github.com/pachadotdev/capybara, https://pacha.dev/capybara/. +R package version 0.6.0, https://github.com/pachadotdev/capybara, https://pacha.dev/capybara/.
@Manual{, title = {capybara: Fast and Memory Efficient Fitting of Linear Models With High-Dimensional Fixed Effects}, author = {Mauricio {Vargas Sepulveda}}, year = {2024}, - note = {R package version 0.5.2, https://github.com/pachadotdev/capybara}, + note = {R package version 0.6.0, https://github.com/pachadotdev/capybara}, url = {https://pacha.dev/capybara/}, }diff --git a/docs/index.html b/docs/index.html index dbb9e93..4b687da 100644 --- a/docs/index.html +++ b/docs/index.html @@ -46,7 +46,7 @@ @@ -140,13 +140,13 @@
This debugging is about code quality, not about statistical quality. There is a full set of numerical tests for testthat to check the math. In this section of the test, I can write pi = 3 and if there are no memory leaks, it will pass the test.
+This debugging is about code quality, not about statistical quality. There is a full set of numerical tests for testthat to check the math. In this section of the test, I could write “pi = 3” and if there are no memory leaks, it will pass the test.
I run r_valgrind "dev/test_get_alpha.r"
or the corresponding test from the project’s root in a new terminal (bash).
This works because I previously defined this in .bashrc
, to make it work you need to run source ~/.bashrc
or reboot:
function r_debug_symbols () {
@@ -328,6 +328,11 @@ Debugging
+Code of Conduct
+
+Please note that the capybara project is released with a Contributor Code of Conduct. By contributing to this project, you agree to abide by its terms.
+
NEWS.md
+ binomial
.
-unsigned integer indicating a finite population correction for the estimation of the covariance matrix of the average partial effects proposed by Cruz-Gonzalez, Fernández-Val, and Weidner (2017). The correction @@ -107,7 +107,7 @@
a string equal to "classic"
or "network"
which determines the structure of the panel used. "classic"
denotes
panel structures where for example the same cross-sectional units are
@@ -116,7 +116,7 @@
"classic"
.a string equal to "independence"
or
"unrestricted"
which imposes sampling assumptions about the
unobserved effects. "independence"
imposes that all unobserved
@@ -125,7 +125,7 @@
"independence"
.logical indicating if some of the regressors are assumed to
be weakly exogenous (e.g. predetermined). If object is of class
"bias_corr"
, the option will be automatically set to TRUE
if
@@ -178,7 +178,7 @@
bias_corr(object = NULL, L = 0L, panel.structure = c("classic", "network"))
bias_corr(object = NULL, L = 0L, panel_structure = c("classic", "network"))
a string equal to "classic"
or "network"
which determines the structure of the panel used. "classic"
denotes
panel structures where for example the same cross-sectional units are
@@ -146,7 +146,7 @@
data
.an optional vector of starting values for the structural parameters in the linear predictor. Default is \(\boldsymbol{\beta} = \mathbf{0}\).
an optional vector of starting values for the linear predictor.
feglm
Control Parametersfeglm_control(
- dev.tol = 1e-08,
- center.tol = 1e-08,
- iter.max = 25L,
+ dev_tol = 1e-08,
+ center_tol = 1e-08,
+ iter_max = 25L,
limit = 10L,
trace = FALSE,
- drop.pc = TRUE,
- keep.mx = TRUE
+ drop_pc = TRUE,
+ keep_mx = FALSE
)
tolerance level for the first stopping condition of the
maximization routine. The stopping condition is based on the relative change
of the deviance in iteration \(r\) and can be expressed as follows:
\(|dev_{r} - dev_{r - 1}| / (0.1 + |dev_{r}|) < tol\). The default is 1.0e-08
.
tolerance level for the stopping condition of the centering
algorithm. The stopping condition is based on the relative change of the
centered variable similar to the 'lfe'
package. The default is
1.0e-08
.
unsigned integer indicating the maximum number of iterations
in the maximization routine. The default is 25L
.
FALSE
.
-logical indicating to drop observations that are perfectly classified/separated and hence do not contribute to the log-likelihood. This option is useful to reduce the computational costs of the maximization @@ -116,7 +116,7 @@
TRUE
.logical indicating if the centered regressor matrix should be stored. The centered regressor matrix is required for some covariance estimators, bias corrections, and average partial effects. This option saves diff --git a/docs/reference/felm.html b/docs/reference/felm.html index 53de5a3..82d4ea1 100644 --- a/docs/reference/felm.html +++ b/docs/reference/felm.html @@ -18,7 +18,7 @@
data
.
-an optional vector of starting values for the structural parameters in the linear predictor. Default is \(\boldsymbol{\beta} = \mathbf{0}\).
an optional vector of starting values for the linear predictor.
an optional initial value for the theta parameter (see
glm.nb
).
data
.
-an optional vector of starting values for the structural parameters in the linear predictor. Default is \(\boldsymbol{\beta} = \mathbf{0}\).
an optional vector of starting values for the linear predictor.
fixed_effects(object = NULL, alpha.tol = 1e-08)
fixed_effects(object = NULL, alpha_tol = 1e-08)
an object of class "feglm"
.
tolerance level for the stopping condition. The algorithm is stopped at iteration \(i\) if \(||\boldsymbol{\alpha}_{i} - \boldsymbol{\alpha}_{i - 1}||_{2} < tol ||\boldsymbol{\alpha}_{i - 1}|| diff --git a/docs/reference/index.html b/docs/reference/index.html index 2c38e11..80d9012 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@
Covariance matrix for the estimator of the structural parameters
from objects returned by feglm
. The covariance is computed
-from the Hessian, the scores, or a combination of both after convergence.
the type of covariance estimate required. "hessian"
refers
-to the inverse of the negative expected Hessian after convergence and is the
+to the inverse of the negative expected hessian after convergence and is the
default option. "outer.product"
is the outer-product-of-the-gradient
estimator. "sandwich"
is the sandwich estimator (sometimes also
referred as robust estimator), and "clustered"
computes a clustered
@@ -121,11 +121,11 @@
Covariance matrix for the estimator of the structural parameters
-from objects returned by feglm
. The covariance is computed
-from the Hessian, the scores, or a combination of both after convergence.
felm
. The covariance is computed
+from the hessian, the scores, or a combination of both after convergence.
an object of class "feglm"
.
an object of class "felm"
.
the type of covariance estimate required. "hessian"
refers
-to the inverse of the negative expected Hessian after convergence and is the
+to the inverse of the negative expected hessian after convergence and is the
default option. "outer.product"
is the outer-product-of-the-gradient
estimator. "sandwich"
is the sandwich estimator (sometimes also
referred as robust estimator), and "clustered"
computes a clustered
@@ -121,11 +121,11 @@