diff --git a/DESCRIPTION b/DESCRIPTION index 46b0ec4..b75d6bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,10 @@ Package: capybara Type: Package -Title: Capybaras are cool -Version: 2022.03.21 +Title: Fast and Memory Efficient Fitting of Linear Models With High-Dimensional + Fixed Effects +Version: 2024.01.15 Authors@R: c( - person("Mauricio", "Vargas", + person("Mauricio", "Vargas Sepulveda", role = c("aut", "cre"), email = "m.sepulveda@mail.utoronto.ca", c(ORCID = "0000-0003-1017-7574")) @@ -17,7 +18,11 @@ Suggests: knitr, rmarkdown Depends: R(>= 3.5.0) -Description: Add some description later. This is about linear algebra. +Description: Fast and user-friendly estimation of generalized linear models with + multiple fixed effects and cluster the standard errors. The method to obtain + the estimated fixed-effects coefficients is based on Stammann (2018) + and Gaure (2013) + . License: MIT + file LICENSE BugReports: https://github.com/pachadotdev/capybara/issues URL: https://pacha.dev/capybara/, https://github.com/pachadotdev/capybara diff --git a/NAMESPACE b/NAMESPACE index 0d83593..05824c4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,33 +1,35 @@ # Generated by roxygen2: do not edit by hand -S3method(coef,APEs) +S3method(coef,apes) S3method(coef,feglm) S3method(coef,felm) -S3method(coef,summary.APEs) +S3method(coef,summary.apes) S3method(coef,summary.feglm) +S3method(coef,summary.felm) S3method(fitted,feglm) S3method(fitted,felm) S3method(predict,feglm) -S3method(print,APEs) +S3method(predict,felm) +S3method(print,apes) S3method(print,feglm) S3method(print,felm) -S3method(print,summary.APEs) +S3method(print,summary.apes) S3method(print,summary.feglm) S3method(print,summary.felm) -S3method(summary,APEs) +S3method(summary,apes) S3method(summary,feglm) S3method(summary,felm) -S3method(vcov,APEs) +S3method(vcov,apes) S3method(vcov,feglm) S3method(vcov,felm) -export(biasCorr) +export(apes) +export(bias_corr) export(feglm) -export(feglm.control) -export(feglm.nb) -export(feglmControl) -export(getAPEs) -export(getFEs) -export(simGLM) +export(feglm_control) +export(felm) +export(fenegbin) +export(fepoisson) +export(fixed_effects) importFrom(Formula,Formula) importFrom(MASS,negative.binomial) importFrom(MASS,theta.ml) @@ -37,6 +39,7 @@ importFrom(data.table,setDT) importFrom(data.table,setkeyv) importFrom(stats,as.formula) importFrom(stats,binomial) +importFrom(stats,gaussian) importFrom(stats,model.matrix) importFrom(stats,na.omit) importFrom(stats,pnorm) diff --git a/README.Rmd b/README.Rmd index 1e53418..6145f9e 100644 --- a/README.Rmd +++ b/README.Rmd @@ -6,10 +6,10 @@ output: github_document ```{r, include = FALSE} knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - fig.path = "man/figures/README-", - out.width = "100%" + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/README-", + out.width = "100%" ) ``` @@ -25,33 +25,12 @@ The goal of capybara is to ... You can install the development version of capybara like so: ``` r -# FILL THIS IN! HOW CAN PEOPLE INSTALL YOUR DEV PACKAGE? +remotes::install_github("pachadotdev/capybara") ``` -## Example +## Examples -This is a basic example which shows you how to solve a common problem: - -```{r example} -library(capybara) -## basic example code -``` - -What is special about using `README.Rmd` instead of just `README.md`? You can include R chunks like so: - -```{r cars} -summary(cars) -``` - -You'll still need to render `README.Rmd` regularly, to keep `README.md` up-to-date. `devtools::build_readme()` is handy for this. - -You can also embed plots, for example: - -```{r pressure, echo = FALSE} -plot(pressure) -``` - -In that case, don't forget to commit and push the resulting figure files, so they display on GitHub and CRAN. +See the documentation in progress: https://pacha.dev/capybara # Debugging diff --git a/README.md b/README.md new file mode 100644 index 0000000..0770413 --- /dev/null +++ b/README.md @@ -0,0 +1,110 @@ + + + +# capybara + + + + + +The goal of capybara is to … + +## Installation + +You can install the development version of capybara like so: + +``` r +remotes::install_github("pachadotdev/capybara") +``` + +## Examples + +See the documentation in progress: + +# Debugging + +*This debugging is about code quality, not about statistical quality.* +*There is a full set of numerical tests for testthat to check the math.* +*In this section of the test, I can write pi = 3 and if there are no +memory leaks, it will pass the test.* + +I run `r_valgrind "dev/test_get_alpha.r" "dev/test_get_alpha.txt"` or +the corresponding test from the project’s root in a new terminal (bash). + +This works because I previously defined this in `.bashrc`, to make it +work you need to run `source ~/.bashrc` or reboot: + + # create an alias for R + alias r="R" + + alias rvalgrind="R --vanilla -d 'valgrind -s --track-origins=yes'" + + function r_debug_symbols () { + # if src/Makevars does not exist, exit + if [ ! -f src/Makevars ]; then + echo "File src/Makevars does not exist" + return 1 + fi + + # if src/Makevars contains a line that says "PKG_CPPFLAGS" + # but there is no "-UDEBUG -g" on it + # then add "PKG_CPPFLAGS += -UDEBUG -g" at the end + if grep -q "PKG_CPPFLAGS" src/Makevars; then + if ! grep -q "PKG_CPPFLAGS.*-UDEBUG.*-g" src/Makevars; then + echo "PKG_CPPFLAGS += -UDEBUG -g" >> src/Makevars + fi + fi + + # if src/Makevars does not contain a line that reads + # PKG_CPPFLAGS ...something... -UDEBUG -g ...something... + # then add PKG_CPPFLAGS = -UDEBUG -g to it + if ! grep -q "PKG_CPPFLAGS.*-UDEBUG.*-g" src/Makevars; then + echo "PKG_CPPFLAGS = -UDEBUG -g" >> src/Makevars + fi + } + + function r_valgrind () { + # if no argument is provided, ask for a file + if [ -z "$1" ]; then + read -p "Enter the script to debug: " script + else + script=$1 + fi + + # if no output file is provided, use dev/valgrind.txt + if [ -z "$2" ]; then + output="dev/valgrind.txt" + else + output=$2 + fi + + # if the file does not exist, exit + if [ ! -f "$script" ]; then + echo "File $script does not exist" + return 1 + fi + + # if the file does not end in .R/.r, exit + shopt -s nocasematch + if [[ "$script" != *.R ]]; then + echo "File $script does not end in .R or .r" + return 1 + fi + shopt -u nocasematch + + # run R in debug mode, but after that we compiled with debug symbols + # see https://reside-ic.github.io/blog/debugging-memory-errors-with-valgrind-and-gdb/ + # R -d 'valgrind -s --leak-check=full --show-leak-kinds=all --track-origins=yes' -f $script 2>&1 | tee valgrind.txt + R --vanilla -d 'valgrind -s --track-origins=yes' -f $script 2>&1 | tee $output + } + +`r_debug_symbols()` makes everything slower, but makes sure that all +compiler optimizations are disabled and then valgrind will point us to +the lines that create memory leaks. + +`r_valgrind()` will run an R script and use Linux system tools to test +for initialized values and all kinds of problems that result in memory +leaks. + +When you are ready testing, you need to remove `-UDEGUG` from +`src/Makevars`. diff --git a/man/apes.Rd b/man/apes.Rd new file mode 100644 index 0000000..a477fc8 --- /dev/null +++ b/man/apes.Rd @@ -0,0 +1,109 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/apes.R +\name{apes} +\alias{apes} +\title{Compute average partial effects after fitting binary choice models + with a 1,2,3-way error component} +\usage{ +apes( + object = NULL, + n.pop = NULL, + panel.structure = c("classic", "network"), + sampling.fe = c("independence", "unrestricted"), + weak.exo = FALSE +) +} +\arguments{ +\item{object}{an object of class \code{"bias_corr"} or \code{"feglm"}; +currently restricted to \code{\link[stats]{binomial}}.} + +\item{n.pop}{unsigned integer indicating a finite population correction for +the estimation of the covariance matrix of the average partial effects +proposed by Cruz-Gonzalez, Fernández-Val, and Weidner (2017). The correction +factor is computed as follows: +\eqn{(n^{\ast} - n) / (n^{\ast} - 1)}{(n.pop - n) / (n.pop - 1)}, +where \eqn{n^{\ast}}{n.pop} and \eqn{n}{n} are the sizes of the entire +population and the full sample size. Default is \code{NULL}, which refers to +a factor of zero and a covariance obtained by the delta method.} + +\item{panel.structure}{a string equal to \code{"classic"} or \code{"network"} +which determines the structure of the panel used. \code{"classic"} denotes +panel structures where for example the same cross-sectional units are +observed several times (this includes pseudo panels). \code{"network"} +denotes panel structures where for example bilateral trade flows are +observed for several time periods. Default is \code{"classic"}.} + +\item{sampling.fe}{a string equal to \code{"independence"} or +\code{"unrestricted"} which imposes sampling assumptions about the +unobserved effects. \code{"independence"} imposes that all unobserved +effects are independent sequences. \code{"unrestricted"} does not impose any +sampling assumptions. Note that this option only affects the optional finite +population correction. Default is \code{"independence"}.} + +\item{weak.exo}{logical indicating if some of the regressors are assumed to +be weakly exogenous (e.g. predetermined). If object is of class +\code{"bias_corr"}, the option will be automatically set to \code{TRUE} if +the chosen bandwidth parameter is larger than zero. Note that this option +only affects the estimation of the covariance matrix. Default is +\code{FALSE}, which assumes that all regressors are strictly exogenous.} +} +\value{ +The function \code{\link{apes}} returns a named list of class + \code{"apes"}. +} +\description{ +\code{\link{apes}} is a post-estimation routine that can be used + to estimate average partial effects with respect to all covariates in the + model and the corresponding covariance matrix. The estimation of the + covariance is based on a linear approximation (delta method) plus an + optional finite population correction. Note that the command automatically + determines which of the regressors are binary or non-binary. + + \strong{Remark:} The routine currently does not allow to compute average + partial effects based on functional forms like interactions and polynomials. +} +\examples{ +trade_short <- trade_panel[trade_panel$year \%in\% 2002L:2006L, ] +trade_short$trade <- ifelse(trade_short$trade > 100, 1L, 0L) + +# Fit 'feglm()' +mod <- feglm(trade ~ lang | year, trade_short, family = binomial()) + +# Compute average partial effects +mod.ape <- apes(mod) +summary(mod.ape) + +# Apply analytical bias correction +mod.bc <- bias_corr(mod) +summary(mod.bc) + +# Compute bias-corrected average partial effects +mod.ape.bc <- apes(mod.bc) +summary(mod.ape.bc) +} +\references{ +Cruz-Gonzalez, M., I. Fernández-Val, and M. Weidner (2017). "Bias + corrections for probit and logit models with two-way fixed effects". The + Stata Journal, 17(3), 517-545. + +Czarnowske, D. and A. Stammann (2020). "Fixed Effects Binary + Choice Models: Estimation and Inference with Long Panels". ArXiv e-prints. + +Fernández-Val, I. and M. Weidner (2016). "Individual and time + effects in nonlinear panel models with large N, T". Journal of Econometrics, + 192(1), 291-312. + +Fernández-Val, I. and M. Weidner (2018). "Fixed effects + estimation of large-t panel data models". Annual Review of Economics, 10, + 109-138. + +Hinz, J., A. Stammann, and J. Wanner (2020). "State Dependence + and Unobserved Heterogeneity in the Extensive Margin of Trade". ArXiv + e-prints. + +Neyman, J. and E. L. Scott (1948). "Consistent estimates based on + partially consistent observations". Econometrica, 16(1), 1-32. +} +\seealso{ +\code{\link{bias_corr}}, \code{\link{feglm}} +} diff --git a/man/bias_corr.Rd b/man/bias_corr.Rd new file mode 100644 index 0000000..4c58fd4 --- /dev/null +++ b/man/bias_corr.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bias_corr.R +\name{bias_corr} +\alias{bias_corr} +\title{Asymptotic bias correction after fitting binary choice models with a + 1,2,3-way error component} +\usage{ +bias_corr(object = NULL, L = 0L, panel.structure = c("classic", "network")) +} +\arguments{ +\item{object}{an object of class \code{"feglm"}.} + +\item{L}{unsigned integer indicating a bandwidth for the estimation of +spectral densities proposed by Hahn and Kuersteiner (2011). The default is +zero, which should be used if all regressors are assumed to be strictly +exogenous with respect to the idiosyncratic error term. In the presence of +weakly exogenous regressors, e.g. lagged outcome variables, we suggest to +choose a bandwidth between one and four. Note that the order of factors to +be partialed out is important for bandwidths larger than zero.} + +\item{panel.structure}{a string equal to \code{"classic"} or \code{"network"} +which determines the structure of the panel used. \code{"classic"} denotes +panel structures where for example the same cross-sectional units are +observed several times (this includes pseudo panels). \code{"network"} +denotes panel structures where for example bilateral trade flows are +observed for several time periods. Default is \code{"classic"}.} +} +\value{ +A named list of classes \code{"bias_corr"} and \code{"feglm"}. +} +\description{ +Post-estimation routine to substantially reduce the incidental + parameter bias problem. Applies the analytical bias correction derived by + Fernández-Val and Weidner (2016) and Hinz, Stammann, and Wanner (2020) to + obtain bias-corrected estimates of the structural parameters and is + currently restricted to \code{\link[stats]{binomial}} with 1,2,3-way fixed + effects. +} +\examples{ +trade_short <- trade_panel[trade_panel$year \%in\% 2002L:2006L, ] +trade_short$trade <- ifelse(trade_short$trade > 100, 1L, 0L) + +# Fit 'feglm()' +mod <- feglm(trade ~ lang | year, trade_short, family = binomial()) + +# Apply analytical bias correction +mod.bc <- bias_corr(mod) +summary(mod.bc) + +} +\references{ +Czarnowske, D. and A. Stammann (2020). "Fixed Effects Binary + Choice Models: Estimation and Inference with Long Panels". ArXiv e-prints. + +Fernández-Val, I. and M. Weidner (2016). "Individual and time + effects in nonlinear panel models with large N, T". Journal of Econometrics, + 192(1), 291-312. + +Fernández-Val, I. and M. Weidner (2018). "Fixed effects + estimation of large-t panel data models". Annual Review of Economics, 10, + 109-138. + +Hahn, J. and G. Kuersteiner (2011). "Bias reduction for dynamic + nonlinear panel models with fixed effects". Econometric Theory, 27(6), + 1152-1191. + +Hinz, J., A. Stammann, and J. Wanner (2020). "State Dependence + and Unobserved Heterogeneity in the Extensive Margin of Trade". ArXiv + e-prints. + +Neyman, J. and E. L. Scott (1948). "Consistent estimates based on + partially consistent observations". Econometrica, 16(1), 1-32. +} +\seealso{ +\code{\link{feglm}} +} diff --git a/man/capybara-package.Rd b/man/capybara-package.Rd new file mode 100644 index 0000000..b885786 --- /dev/null +++ b/man/capybara-package.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/capybara-package.R +\docType{package} +\name{capybara-package} +\alias{capybara-package} +\title{Generalized Linear Models (GLMs) with high-dimensional k-way fixed + effects} +\description{ +Provides a routine to partial out factors with many levels during the +optimization of the log-likelihood function of the corresponding GLM. The +package is based on the algorithm described in Stammann (2018). It also +offers an efficient algorithm to recover estimates of the fixed effects in a +post-estimation routine and includes robust and multi-way clustered standard +errors. Further the package provides analytical bias corrections for binary +choice models derived by Fernández-Val and Weidner (2016) and Hinz, Stammann, +and Wanner (2020). +} diff --git a/man/feglm.Rd b/man/feglm.Rd new file mode 100644 index 0000000..0a01bc2 --- /dev/null +++ b/man/feglm.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/feglm.R +\name{feglm} +\alias{feglm} +\title{GLM fitting with high-dimensional k-way fixed effects} +\usage{ +feglm( + formula = NULL, + data = NULL, + family = binomial(), + weights = NULL, + beta.start = NULL, + eta.start = NULL, + control = NULL +) +} +\arguments{ +\item{formula}{an object of class \code{"formula"}: a symbolic description of +the model to be fitted. \code{formula} must be of type \code{y ~ x | k}, +where the second part of the formula refers to factors to be concentrated +out. It is also possible to pass additional variables to \code{\link{feglm}} +(e.g. to cluster standard errors). This can be done by specifying the third +part of the formula: \code{y ~ x | k | add}.} + +\item{data}{an object of class \code{"data.frame"} containing the variables +in the model.} + +\item{family}{a description of the error distribution and link function to be +used in the model. Similar to \code{\link[stats]{glm.fit}} this has to be +the result of a call to a family function. Default is \code{binomial()}. See +\code{\link[stats]{family}} for details of family functions.} + +\item{weights}{an optional string with the name of the 'prior weights' +variable in \code{data}.} + +\item{beta.start}{an optional vector of starting values for the structural +parameters in the linear predictor. Default is +\eqn{\boldsymbol{\beta} = \mathbf{0}}{\beta = 0}.} + +\item{eta.start}{an optional vector of starting values for the linear +predictor.} + +\item{control}{a named list of parameters for controlling the fitting +process. See \code{\link{feglm_control}} for details.} +} +\value{ +The function \code{\link{feglm}} returns a named list of class + \code{"feglm"}. +} +\description{ +\code{\link{feglm}} can be used to fit generalized linear models + with many high-dimensional fixed effects. The estimation procedure is based + on unconditional maximum likelihood and can be interpreted as a + \dQuote{weighted demeaning} approach that combines the work of Gaure (2013) + and Stammann et. al. (2016). For technical details see Stammann (2018). The + routine is well suited for large data sets that would be otherwise + infeasible to use due to memory limitations. + +\strong{Remark:} The term fixed effect is used in econometrician's sense of + having intercepts for each level in each category. +} +\details{ +If \code{\link{feglm}} does not converge this is often a sign of + linear dependence between one or more regressors and a fixed effects + category. In this case, you should carefully inspect your model + specification. +} +\examples{ +mod <- feglm( + trade ~ dist + lang + cntg + clny | exp_year + imp_year, + trade_panel, + family = poisson(link = "log") +) + +summary(mod) +} +\references{ +Gaure, S. (2013). "OLS with Multiple High Dimensional Category + Variables". Computational Statistics and Data Analysis, 66. + +Marschner, I. (2011). "glm2: Fitting generalized linear models + with convergence problems". The R Journal, 3(2). + +Stammann, A., F. Heiss, and D. McFadden (2016). "Estimating Fixed + Effects Logit Models with Large Panel Data". Working paper. + +Stammann, A. (2018). "Fast and Feasible Estimation of Generalized + Linear Models with High-Dimensional k-Way Fixed Effects". ArXiv e-prints. +} diff --git a/man/feglm_control.Rd b/man/feglm_control.Rd new file mode 100644 index 0000000..a4c246e --- /dev/null +++ b/man/feglm_control.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/feglm_control.R +\name{feglm_control} +\alias{feglm_control} +\title{Set \code{feglm} Control Parameters} +\usage{ +feglm_control( + dev.tol = 1e-08, + center.tol = 1e-08, + iter.max = 25L, + limit = 10L, + trace = FALSE, + drop.pc = TRUE, + keep.mx = TRUE +) +} +\arguments{ +\item{dev.tol}{tolerance level for the first stopping condition of the +maximization routine. The stopping condition is based on the relative change +of the deviance in iteration \eqn{r} and can be expressed as follows: +\eqn{|dev_{r} - dev_{r - 1}| / (0.1 + |dev_{r}|) < tol}{|dev - devold| / +(0.1 + |dev|) < tol}. The default is \code{1.0e-08}.} + +\item{center.tol}{tolerance level for the stopping condition of the centering +algorithm. The stopping condition is based on the relative change of the +centered variable similar to \link[lfe]{felm}. The default is +\code{1.0e-08}.} + +\item{iter.max}{unsigned integer indicating the maximum number of iterations +in the maximization routine. The default is \code{25L}.} + +\item{limit}{unsigned integer indicating the maximum number of iterations of +\code{\link[MASS]{theta.ml}}. The default is \code{10L}.} + +\item{trace}{logical indicating if output should be produced in each +iteration. Default is \code{FALSE}.} + +\item{drop.pc}{logical indicating to drop observations that are perfectly +classified/separated and hence do not contribute to the log-likelihood. This +option is useful to reduce the computational costs of the maximization +problem and improves the numerical stability of the algorithm. Note that +dropping perfectly separated observations does not affect the estimates. +The default is \code{TRUE}.} + +\item{keep.mx}{logical indicating if the centered regressor matrix should be +stored. The centered regressor matrix is required for some covariance +estimators, bias corrections, and average partial effects. This option saves +some computation time at the cost of memory. The default is \code{TRUE}.} +} +\value{ +A named list of control parameters. +} +\description{ +Set and change parameters used for fitting \code{\link{feglm}}. + Termination conditions are similar to \code{\link[stats]{glm}}. +} +\seealso{ +\code{\link{feglm}} +} diff --git a/man/felm.Rd b/man/felm.Rd new file mode 100644 index 0000000..c2f87d8 --- /dev/null +++ b/man/felm.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/felm.R +\name{felm} +\alias{felm} +\title{LM fitting with high-dimensional k-way fixed effects} +\usage{ +felm(formula = NULL, data = NULL, weights = NULL) +} +\arguments{ +\item{formula}{an object of class \code{"formula"}: a symbolic description of +the model to be fitted. \code{formula} must be of type \code{y ~ x | k}, +where the second part of the formula refers to factors to be concentrated +out. It is also possible to pass additional variables to \code{\link{feglm}} +(e.g. to cluster standard errors). This can be done by specifying the third +part of the formula: \code{y ~ x | k | add}.} + +\item{data}{an object of class \code{"data.frame"} containing the variables +in the model.} + +\item{weights}{an optional string with the name of the 'prior weights' +variable in \code{data}.} +} +\value{ +The function \code{\link{felm}} returns a named list of class + \code{"felm"}. +} +\description{ +A wrapper for \code{\link{feglm}} with + \code{family = gaussian()}. +} +\examples{ +mod <- felm( + log(trade) ~ dist + lang + cntg + clny | exp_year + imp_year, + trade_panel +) + +summary(mod) +} +\references{ +Gaure, S. (2013). "OLS with Multiple High Dimensional Category + Variables". Computational Statistics and Data Analysis, 66. + +Marschner, I. (2011). "glm2: Fitting generalized linear models + with convergence problems". The R Journal, 3(2). + +Stammann, A., F. Heiss, and D. McFadden (2016). "Estimating Fixed + Effects Logit Models with Large Panel Data". Working paper. + +Stammann, A. (2018). "Fast and Feasible Estimation of Generalized + Linear Models with High-Dimensional k-Way Fixed Effects". ArXiv e-prints. +} diff --git a/man/fenegbin.Rd b/man/fenegbin.Rd new file mode 100644 index 0000000..f7c93ee --- /dev/null +++ b/man/fenegbin.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fenegbin.R +\name{fenegbin} +\alias{fenegbin} +\title{Negative Binomial model fitting with high-dimensional k-way fixed + effects} +\usage{ +fenegbin( + formula = NULL, + data = NULL, + weights = NULL, + beta.start = NULL, + eta.start = NULL, + init.theta = NULL, + link = c("log", "identity", "sqrt"), + control = NULL +) +} +\arguments{ +\item{formula}{an object of class \code{"formula"}: a symbolic description of +the model to be fitted. \code{formula} must be of type \code{y ~ x | k}, +where the second part of the formula refers to factors to be concentrated +out. It is also possible to pass additional variables to \code{\link{feglm}} +(e.g. to cluster standard errors). This can be done by specifying the third +part of the formula: \code{y ~ x | k | add}.} + +\item{data}{an object of class \code{"data.frame"} containing the variables +in the model.} + +\item{weights}{an optional string with the name of the 'prior weights' +variable in \code{data}.} + +\item{beta.start}{an optional vector of starting values for the structural +parameters in the linear predictor. Default is +\eqn{\boldsymbol{\beta} = \mathbf{0}}{\beta = 0}.} + +\item{eta.start}{an optional vector of starting values for the linear +predictor.} + +\item{init.theta}{an optional initial value for the theta parameter (see +\code{\link[MASS]{glm.nb}}).} + +\item{link}{the link function. Must be one of \code{"log"}, \code{"sqrt"}, or +\code{"identity"}.} + +\item{control}{a named list of parameters for controlling the fitting +process. See \code{\link{feglm_control}} for details.} +} +\description{ +A routine that uses the same internals as \code{\link{feglm}}. +} +\examples{ +# same as the example in fepoisson but with overdispersion/underdispersion +mod <- fenegbin( + trade ~ dist + lang + cntg + clny | exp_year + imp_year, + trade_panel +) + +summary(mod) +} diff --git a/man/fepoisson.Rd b/man/fepoisson.Rd new file mode 100644 index 0000000..aff9ed0 --- /dev/null +++ b/man/fepoisson.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fepoisson.R +\name{fepoisson} +\alias{fepoisson} +\title{Poisson model fitting high-dimensional with k-way fixed effects} +\usage{ +fepoisson( + formula = NULL, + data = NULL, + weights = NULL, + beta.start = NULL, + eta.start = NULL, + control = NULL +) +} +\arguments{ +\item{formula}{an object of class \code{"formula"}: a symbolic description of +the model to be fitted. \code{formula} must be of type \code{y ~ x | k}, +where the second part of the formula refers to factors to be concentrated +out. It is also possible to pass additional variables to \code{\link{feglm}} +(e.g. to cluster standard errors). This can be done by specifying the third +part of the formula: \code{y ~ x | k | add}.} + +\item{data}{an object of class \code{"data.frame"} containing the variables +in the model.} + +\item{weights}{an optional string with the name of the 'prior weights' +variable in \code{data}.} + +\item{beta.start}{an optional vector of starting values for the structural +parameters in the linear predictor. Default is +\eqn{\boldsymbol{\beta} = \mathbf{0}}{\beta = 0}.} + +\item{eta.start}{an optional vector of starting values for the linear +predictor.} + +\item{control}{a named list of parameters for controlling the fitting +process. See \code{\link{feglm_control}} for details.} +} +\description{ +A wrapper for \code{\link{feglm}} with + \code{family = poisson()}. +} +\examples{ +# same as the example in feglm but with less typing +mod <- fepoisson( + trade ~ dist + lang + cntg + clny | exp_year + imp_year, + trade_panel +) + +summary(mod) +} diff --git a/man/fixed_effects.Rd b/man/fixed_effects.Rd new file mode 100644 index 0000000..d5c91f1 --- /dev/null +++ b/man/fixed_effects.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fixed_effects.R +\name{fixed_effects} +\alias{fixed_effects} +\title{Recover the estimates of the fixed effects after fitting GLMs} +\usage{ +fixed_effects(object = NULL, alpha.tol = 1e-08) +} +\arguments{ +\item{object}{an object of class \code{"feglm"}.} + +\item{alpha.tol}{tolerance level for the stopping condition. The algorithm is +stopped at iteration \eqn{i} if \eqn{||\boldsymbol{\alpha}_{i} - +\boldsymbol{\alpha}_{i - 1}||_{2} < tol ||\boldsymbol{\alpha}_{i - 1}|| +{2}}{||\Delta \alpha|| < tol ||\alpha_old||}. Default is \code{1.0e-08}.} +} +\value{ +A named list containing named vectors of estimated fixed effects. +} +\description{ +The system might not have a unique solution since we do not take + collinearity into account. If the solution is not unique, an estimable + function has to be applied to our solution to get meaningful estimates of + the fixed effects. +} +\references{ +Stammann, A. (2018). "Fast and Feasible Estimation of Generalized + Linear Models with High-Dimensional k-way Fixed Effects". ArXiv e-prints. + +Gaure, S. (n. d.). "Multicollinearity, identification, and + estimable functions". Unpublished. +} +\seealso{ +\code{\link{felm}}, \code{\link{feglm}} +} diff --git a/man/trade_panel.Rd b/man/trade_panel.Rd new file mode 100644 index 0000000..c054839 --- /dev/null +++ b/man/trade_panel.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/capybara-package.R +\docType{data} +\name{trade_panel} +\alias{trade_panel} +\title{Trade Panel 1986-2006} +\format{ +## `trade_panel` +A data frame with 14,285 rows and 7 columns: +\describe{ + \item{trade}{Nominal trade flows in current US dollars} + \item{dist}{Population-weighted bilateral distance between country 'i' and 'j', in kilometers} + \item{cntg}{Indicator. Equal to 1 if country 'i' and 'j' share a common border} + \item{lang}{Indicator. Equal to 1 if country 'i' and 'j' speak the same official language} + \item{clny}{Indicator. Equal to 1 if country 'i' and 'j' share a colonial relationship} + \item{year}{Year of observation} + \item{exp_year}{Exporter ISO country code and year} + \item{imp_year}{Importer ISO country code and year} +} +} +\source{ +Advanced Guide to Trade Policy Analysis (ISBN: 978-92-870-4367-2) +} +\usage{ +trade_panel +} +\description{ +Aggregated exports at origin-destination-year level for 1986-2006. +} +\keyword{datasets} diff --git a/man/vcov.apes.Rd b/man/vcov.apes.Rd new file mode 100644 index 0000000..e87ca13 --- /dev/null +++ b/man/vcov.apes.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics_vcov.R +\name{vcov.apes} +\alias{vcov.apes} +\title{Covariance matrix for APEs} +\usage{ +\method{vcov}{apes}(object, ...) +} +\arguments{ +\item{object}{an object of class \code{"apes"}.} + +\item{...}{other arguments.} +} +\value{ +A named matrix of covariance estimates. +} +\description{ +Covariance matrix for the estimator of the + average partial effects from objects returned by \code{\link{apes}}. +} +\seealso{ +\code{\link{apes}} +} diff --git a/man/vcov.feglm.Rd b/man/vcov.feglm.Rd new file mode 100644 index 0000000..f5953ec --- /dev/null +++ b/man/vcov.feglm.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics_vcov.R +\name{vcov.feglm} +\alias{vcov.feglm} +\title{Covariance matrix for GLMs} +\usage{ +\method{vcov}{feglm}( + object, + type = c("hessian", "outer.product", "sandwich", "clustered"), + cluster = NULL, + ... +) +} +\arguments{ +\item{object}{an object of class \code{"feglm"}.} + +\item{type}{the type of covariance estimate required. \code{"hessian"} refers +to the inverse of the negative expected Hessian after convergence and is the +default option. \code{"outer.product"} is the outer-product-of-the-gradient +estimator. \code{"sandwich"} is the sandwich estimator (sometimes also +referred as robust estimator), and \code{"clustered"} computes a clustered +covariance matrix given some cluster variables.} + +\item{cluster}{a symbolic description indicating the clustering of +observations.} + +\item{...}{other arguments.} +} +\value{ +A named matrix of covariance estimates. +} +\description{ +Covariance matrix for the estimator of the structural parameters + from objects returned by \code{\link{feglm}}. The covariance is computed +from the Hessian, the scores, or a combination of both after convergence. +} +\references{ +Cameron, C., J. Gelbach, and D. Miller (2011). "Robust Inference + With Multiway Clustering". Journal of Business & Economic Statistics 29(2). +} +\seealso{ +\code{\link{feglm}} +} diff --git a/man/vcov.felm.Rd b/man/vcov.felm.Rd new file mode 100644 index 0000000..5674671 --- /dev/null +++ b/man/vcov.felm.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics_vcov.R +\name{vcov.felm} +\alias{vcov.felm} +\title{Covariance matrix for GLMs} +\usage{ +\method{vcov}{felm}( + object, + type = c("hessian", "outer.product", "sandwich", "clustered"), + cluster = NULL, + ... +) +} +\arguments{ +\item{object}{an object of class \code{"feglm"}.} + +\item{type}{the type of covariance estimate required. \code{"hessian"} refers +to the inverse of the negative expected Hessian after convergence and is the +default option. \code{"outer.product"} is the outer-product-of-the-gradient +estimator. \code{"sandwich"} is the sandwich estimator (sometimes also +referred as robust estimator), and \code{"clustered"} computes a clustered +covariance matrix given some cluster variables.} + +\item{cluster}{a symbolic description indicating the clustering of +observations.} + +\item{...}{other arguments.} +} +\value{ +A named matrix of covariance estimates. +} +\description{ +Covariance matrix for the estimator of the structural parameters + from objects returned by \code{\link{feglm}}. The covariance is computed +from the Hessian, the scores, or a combination of both after convergence. +} +\references{ +Cameron, C., J. Gelbach, and D. Miller (2011). "Robust Inference + With Multiway Clustering". Journal of Business & Economic Statistics 29(2). +} +\seealso{ +\code{\link{felm}} +} diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 7d849bd..c646a78 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -12,6 +12,13 @@ extern "C" SEXP _capybara_center_variables_(SEXP V, SEXP w, SEXP klist, SEXP tol return cpp11::as_sexp(center_variables_(cpp11::as_cpp&>>(V), cpp11::as_cpp>(w), cpp11::as_cpp>(klist), cpp11::as_cpp>(tol), cpp11::as_cpp>(maxiter))); END_CPP11 } +// 02_get_alpha.cpp +list get_alpha_(const doubles_matrix<>& p, const list& klist, const double tol); +extern "C" SEXP _capybara_get_alpha_(SEXP p, SEXP klist, SEXP tol) { + BEGIN_CPP11 + return cpp11::as_sexp(get_alpha_(cpp11::as_cpp&>>(p), cpp11::as_cpp>(klist), cpp11::as_cpp>(tol))); + END_CPP11 +} // 03_group_sums.cpp doubles_matrix<> group_sums_(const doubles_matrix<>& M, const doubles_matrix<>& w, const list& jlist); extern "C" SEXP _capybara_group_sums_(SEXP M, SEXP w, SEXP jlist) { @@ -44,6 +51,7 @@ extern "C" SEXP _capybara_group_sums_cov_(SEXP M, SEXP N, SEXP jlist) { extern "C" { static const R_CallMethodDef CallEntries[] = { {"_capybara_center_variables_", (DL_FUNC) &_capybara_center_variables_, 5}, + {"_capybara_get_alpha_", (DL_FUNC) &_capybara_get_alpha_, 3}, {"_capybara_group_sums_", (DL_FUNC) &_capybara_group_sums_, 3}, {"_capybara_group_sums_cov_", (DL_FUNC) &_capybara_group_sums_cov_, 3}, {"_capybara_group_sums_spectral_", (DL_FUNC) &_capybara_group_sums_spectral_, 5},