diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 000000000..1b4632f95 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,94 @@ +name: Docs + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + build-docs: + name: "Build Docs" + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.12"] + steps: + - name: Checkout source + uses: actions/checkout@v4 + + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - uses: prefix-dev/setup-pixi@v0.8.3 + with: + pixi-version: v0.41.4 + cache: true + + - name: Set up Quarto + uses: quarto-dev/quarto-actions/setup@v2 + + - name: install tinytex + run: | + quarto install tinytex + + - name: Compile docstrings with quartodoc + run: | + pixi run docs-build + + - name: Render docs + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + pixi run docs-render -- --no-cache + else + pixi run docs-render + fi + + - name: Save docs artifact + uses: actions/upload-artifact@v4 + with: + name: docs-html-${{ matrix.python-version }} + path: docs/_site + + publish-docs: + name: "Publish Docs" + runs-on: ubuntu-latest + permissions: + contents: write + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + needs: ["build-docs"] + if: github.ref == 'refs/heads/master' && github.event_name == 'push' + steps: + - name: Download docs artifact + uses: actions/download-artifact@v4 + with: + name: docs-html-3.12 + path: docs/_site + + - name: Publish docs to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/_site + + preview-docs: + name: "Preview Docs" + runs-on: ubuntu-latest + needs: ["build-docs"] + if: github.event_name == 'pull_request' + steps: + - name: Download docs artifact + uses: actions/download-artifact@v4 + with: + name: docs-html-3.12 + path: docs/_site + - name: Upload docs artifact + uses: actions/upload-artifact@v4 + with: + name: docs-preview + path: docs/_site diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 000000000..025055b25 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,21 @@ +name: Lint + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + lint-docs: + name: "Lint Docs" + runs-on: ubuntu-latest + steps: + - name: Checkout source + uses: actions/checkout@v4 + - name: Install markdownlint-cli + run: npm install -g markdownlint-cli + - name: Lint markdown files + run: markdownlint docs/*.md diff --git a/docs/.gitignore b/docs/.gitignore index 9c3d6bdd0..7002672a0 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,6 +1,4 @@ -/.quarto/ -site_libs/ _site/ -search.json -objects.json +.quarto/ reference/ +*.swp diff --git a/docs/_quarto.yml b/docs/_quarto.yml index b2e48dcf7..3375a0a8f 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -1,10 +1,10 @@ project: type: website output-dir: _site + freeze: auto -execute: - # point quarto to the correct python environment - python: "/pyfixest/.pixi/envs/docs/Scripts/python.exe" +exclude: + - "compare-fixest-pyfixest.qmd" metadata-files: - _sidebar.yml @@ -47,8 +47,6 @@ website: text: "On Small Sample Corrections" - file: quantile-regression.qmd text: "Quantile Regression" - - text: "Compare fixest & PyFixest" - file: compare-fixest-pyfixest.qmd - text: "Compare Stata & PyFixest" file: stata-2-pyfixest.qmd - text: "PyFixest on the GPU" @@ -103,7 +101,6 @@ quartodoc: - report.dtable - report.coefplot - report.iplot - - did.visualize.panelview - title: Misc / Utilities desc: | PyFixest internals and utilities diff --git a/docs/_sidebar.yml b/docs/_sidebar.yml index 304bf5da1..d0af77646 100644 --- a/docs/_sidebar.yml +++ b/docs/_sidebar.yml @@ -29,7 +29,6 @@ website: - reference/report.dtable.qmd - reference/report.coefplot.qmd - reference/report.iplot.qmd - - reference/did.visualize.panelview.qmd section: Summarize and Visualize - contents: - reference/estimation.demean.qmd diff --git a/docs/compare-fixest-pyfixest.qmd b/docs/compare-fixest-pyfixest.qmd deleted file mode 100644 index d160dc89c..000000000 --- a/docs/compare-fixest-pyfixest.qmd +++ /dev/null @@ -1,299 +0,0 @@ ---- -title: Does `PyFixest` match `fixest`? -toc: true -toc-title: "On this page" -toc-location: left ---- - -This vignette compares estimation results from `fixest` with `pyfixest` via the `rpy2` package. - -## Setup - - -```{python} -import pandas as pd -import rpy2.robjects as ro -from rpy2.robjects import pandas2ri -from rpy2.robjects.packages import importr - -import pyfixest as pf - -# Activate pandas2ri -pandas2ri.activate() - -# Import R packages -fixest = importr("fixest") -stats = importr("stats") -broom = importr("broom") - -# IPython magic commands for autoreloading -%load_ext autoreload -%autoreload 2 - -# Get data using pyfixest -data = pf.get_data(model="Feols", N=10_000, seed=99292) -``` - - -## Ordinary Least Squares (OLS) - -### IID Inference - -First, we estimate a model via `pyfixest. We compute "iid" standard errors. - - -```{python} -fit = pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="iid") -``` - -We estimate the same model with weights: - - -```{python} -fit_weights = pf.feols( - fml="Y ~ X1 + X2 | f1 + f2", data=data, weights="weights", vcov="iid" -) -``` - -Via `r-fixest` and `rpy2`, we get - - -```{python} -r_fit = fixest.feols( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - vcov="iid", -) - -r_fit_weights = fixest.feols( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - weights=ro.Formula("~weights"), - vcov="iid", -) -``` - - R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1). - - R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1). - - - -Let's compare how close the covariance matrices are: - - -```{python} -fit_vcov = fit._vcov -r_vcov = stats.vcov(r_fit) -fit_vcov - r_vcov -``` - - -And for WLS: - - -```{python} -fit_weights._vcov - stats.vcov(r_fit_weights) -``` - -We conclude by comparing all estimation results via the `tidy` methods: - -```{python} -fit.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(r_fit)).T -``` - -```{python} -fit_weights.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T -``` - - -### Heteroskedastic Errors - -We repeat the same exercise with heteroskedastic (HC1) errors: - - -```{python} -fit = pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero") -fit_weights = pf.feols( - fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero", weights="weights" -) -``` - - -```{python} -r_fit = fixest.feols( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - vcov="hetero", -) - -r_fit_weights = fixest.feols( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - weights=ro.Formula("~weights"), - vcov="hetero", -) -``` - -As before, we compare the variance covariance matrices: - - -```{python} -fit._vcov - stats.vcov(r_fit) -``` - -```{python} -fit_weights._vcov - stats.vcov(r_fit_weights) -``` - -We conclude by comparing all estimation results via the `tidy` methods: - -```{python} -fit.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(r_fit)).T -``` - -```{python} -fit_weights.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T -``` - - -### Cluster-Robust Errors - -We conclude with cluster robust errors. - - -```{python} -fit = pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"}) -fit_weights = pf.feols( - fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"}, weights="weights" -) - -r_fit = fixest.feols( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - vcov=ro.Formula("~f1"), -) -r_fit_weights = fixest.feols( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - weights=ro.Formula("~weights"), - vcov=ro.Formula("~f1"), -) -``` - -```{python} -fit._vcov - stats.vcov(r_fit) -``` - -```{python} -fit_weights._vcov - stats.vcov(r_fit_weights) -``` - -We conclude by comparing all estimation results via the `tidy` methods: - -```{python} -fit.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(r_fit)).T -``` - -```{python} -fit_weights.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T -``` - -## Poisson Regression - - -```{python} -data = pf.get_data(model="Fepois") -``` - - -```{python} -fit_iid = pf.fepois(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="iid", iwls_tol=1e-10) -fit_hetero = pf.fepois( - fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero", iwls_tol=1e-10 -) -fit_crv = pf.fepois( - fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"}, iwls_tol=1e-10 -) - -fit_r_iid = fixest.fepois( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - vcov="iid", -) - -fit_r_hetero = fixest.fepois( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - vcov="hetero", -) - -fit_r_crv = fixest.fepois( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - vcov=ro.Formula("~f1"), -) -``` - -```{python} -fit_iid._vcov - stats.vcov(fit_r_iid) -``` - -```{python} -fit_hetero._vcov - stats.vcov(fit_r_hetero) -``` - -```{python} -fit_crv._vcov - stats.vcov(fit_r_crv) -``` - -We conclude by comparing all estimation results via the `tidy` methods: - - -```{python} -fit_iid.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(fit_r_iid)).T -``` - -```{python} -fit_hetero.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(fit_r_hetero)).T -``` - -```{python} -fit_crv.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(fit_r_crv)).T -``` diff --git a/docs/pyfixest.md b/docs/pyfixest.md index 628bf001c..37f085a55 100644 --- a/docs/pyfixest.md +++ b/docs/pyfixest.md @@ -1,4 +1,6 @@ -![](figures/pyfixest-logo.png) +# PyFixest + +![PyFixest Logo](figures/pyfixest-logo.png) # PyFixest: Fast High-Dimensional Fixed Effects Regression in Python @@ -17,76 +19,95 @@ [![Citation](https://img.shields.io/badge/Cite%20as-PyFixest-blue)](https://github.com/py-econometrics/pyfixest?tab=readme-ov-file#how-to-cite) [![Donate | GiveDirectly](https://img.shields.io/static/v1?label=GiveDirectly&message=Donate&color=blue&style=flat-square)](https://github.com/py-econometrics/pyfixest?tab=readme-ov-file#support-pyfixest) -[pixi-badge]:https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/prefix-dev/pixi/main/assets/badge/v0.json&style=flat-square +[pixi-badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/prefix-dev/pixi/main/assets/badge/v0.json&style=flat-square [pixi-url]: https://pixi.sh [chat-badge]: https://img.shields.io/discord/1259933360726216754.svg?label=&logo=discord&logoColor=ffffff&color=7389D8&labelColor=6A7EC2&style=flat-square [chat-url]: https://discord.gg/gBAydeDMVK -`PyFixest` is a Python implementation of the formidable [fixest](https://github.com/lrberge/fixest) package for fast high-dimensional fixed effects regression. +`PyFixest` is a Python implementation of the formidable +[fixest](https://github.com/lrberge/fixest) package for fast high-dimensional +fixed effects regression. -The package aims to mimic `fixest` syntax and functionality as closely as Python allows: if you know `fixest` well, the goal is that you won't have to read the docs to get started! In particular, this means that all of `fixest's` defaults are mirrored by `PyFixest`. +The package aims to mimic `fixest` syntax and functionality as closely as Python +allows: if you know `fixest` well, the goal is that you won't have to read the +docs to get started! In particular, this means that all of `fixest's` defaults +are mirrored by `PyFixest`. -Nevertheless, for a quick introduction, you can take a look at the [quickstart](https://py-econometrics.github.io/pyfixest/quickstart.html) or the regression chapter of [Arthur Turrell's](https://github.com/aeturrell) book on [Coding for Economists](https://aeturrell.github.io/coding-for-economists/econmt-regression.html#imports). +Nevertheless, for a quick introduction, you can take a look at the +[quickstart](https://py-econometrics.github.io/pyfixest/quickstart.html) or the +regression chapter of [Arthur Turrell's](https://github.com/aeturrell) book on +[Coding for Economists](https://aeturrell.github.io/coding-for-economists/econmt-regression.html#imports). -For questions on `PyFixest`, head on over to our [github discussions](https://github.com/py-econometrics/pyfixest/discussions), or (more informally) join our [Discord server](https://discord.gg/gBAydeDMVK). +For questions on `PyFixest`, head on over to our +[github discussions](https://github.com/py-econometrics/pyfixest/discussions), +or (more informally) join our [Discord server](https://discord.gg/gBAydeDMVK). ## Support PyFixest -If you enjoy using `PyFixest`, please consider donating to [GiveDirectly](https://donate.givedirectly.org/dedicate) and dedicating your donation to `pyfixest.dev@gmail.com`. -You can also leave a message through the donation form - your support and encouragement mean a lot to the developers! +If you enjoy using `PyFixest`, please consider donating to +[GiveDirectly](https://donate.givedirectly.org/dedicate) and dedicating your +donation to `pyfixest.dev@gmail.com`. You can also leave a message through the +donation form - your support and encouragement mean a lot to the developers! ## Features -- **OLS**, **WLS** and **IV** Regression with Fixed-Effects Demeaning via [Frisch-Waugh-Lovell](https://bookdown.org/ts_robinson1994/10EconometricTheorems/frisch.html) -- **Poisson Regression** following the [pplmhdfe algorithm](https://journals.sagepub.com/doi/full/10.1177/1536867X20909691) -- Probit, Logit and Gaussian Family **GLMs** (currently without fixed effects demeaning, this is WIP) -- **Quantile Regression** using an Interior Point Solver -- Multiple Estimation Syntax -- Several **Robust** and **Cluster Robust Variance-Covariance** Estimators - -- **Wild Cluster Bootstrap** Inference (via - [wildboottest](https://github.com/py-econometrics/wildboottest)) -- **Difference-in-Differences** Estimators: - - The canonical Two-Way Fixed Effects Estimator - - [Gardner's two-stage - ("`Did2s`")](https://jrgcmu.github.io/2sdd_current.pdf) - estimator - - Basic Versions of the Local Projections estimator following - [Dube et al (2023)](https://www.nber.org/papers/w31184) - - The fully saturated Event-Study estimator following [Sun & Abraham (2021)](https://www.sciencedirect.com/science/article/abs/pii/S030440762030378X) -- **Multiple Hypothesis Corrections** following the Procedure by [Romano and Wolf](https://journals.sagepub.com/doi/pdf/10.1177/1536867X20976314) and **Simultaneous Confidence Intervals** using a **Multiplier Bootstrap** -- The **Causal Cluster Variance Estimator (CCV)** following [Abadie et al.](https://economics.mit.edu/sites/default/files/2022-09/When%20Should%20You%20Adjust%20Standard%20Errors%20for%20Clustering.pdf) -- Regression **Decomposition** following [Gelbach (2016)](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1425737) -- **Publication-ready tables** with [Great Tables](https://posit-dev.github.io/great-tables/articles/intro.html) or LaTex booktabs +- **OLS**, **WLS** and **IV** Regression with Fixed-Effects Demeaning via + [Frisch-Waugh-Lovell](https://bookdown.org/ts_robinson1994/10EconometricTheorems/frisch.html) +- **Poisson Regression** following the + [pplmhdfe algorithm](https://journals.sagepub.com/doi/full/10.1177/1536867X20909691) +- Probit, Logit and Gaussian Family **GLMs** (currently without fixed effects + demeaning, this is WIP) +- **Quantile Regression** using an Interior Point Solver +- Multiple Estimation Syntax +- Several **Robust** and **Cluster Robust Variance-Covariance** Estimators +- **Wild Cluster Bootstrap** Inference (via + [wildboottest](https://github.com/py-econometrics/wildboottest)) +- **Difference-in-Differences** Estimators: + - The canonical Two-Way Fixed Effects Estimator + - [Gardner's two-stage + ("`Did2s`")](https://jrgcmu.github.io/2sdd_current.pdf) estimator + - Basic Versions of the Local Projections estimator following + [Dube et al (2023)](https://www.nber.org/papers/w31184) + - The fully saturated Event-Study estimator following + [Sun & Abraham (2021)](https://www.sciencedirect.com/science/article/abs/pii/S030440762030378X) +- **Multiple Hypothesis Corrections** following the Procedure by + [Romano and Wolf](https://journals.sagepub.com/doi/pdf/10.1177/1536867X20976314) + and **Simultaneous Confidence Intervals** using a **Multiplier Bootstrap** +- The **Causal Cluster Variance Estimator (CCV)** following + [Abadie et al.](https://economics.mit.edu/sites/default/files/2022-09/When%20Should%20You%20Adjust%20Standard%20Errors%20for%20Clustering.pdf) +- Regression **Decomposition** following + [Gelbach (2016)](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1425737) +- **Publication-ready tables** with + [Great Tables](https://posit-dev.github.io/great-tables/articles/intro.html) or + LaTex booktabs ## Installation You can install the release version from `PyPi` by running -``` py +```py pip install -U pyfixest ``` or the development version from github by running -``` py +```py pip install git+https://github.com/py-econometrics/pyfixest.git ``` ## Benchmarks -All benchmarks follow the [fixest -benchmarks](https://github.com/lrberge/fixest/tree/master/_BENCHMARK). +All benchmarks follow the +[fixest benchmarks](https://github.com/lrberge/fixest/tree/master/_BENCHMARK). All non-pyfixest timings are taken from the `fixest` benchmarks. -![](figures/benchmarks_ols.svg) -![](figures/benchmarks_poisson.svg) -![](figures/quantreg_benchmarks.png) +![OLS Benchmarks](figures/benchmarks_ols.svg) +![Poisson Benchmarks](figures/benchmarks_poisson.svg) +![Quantile Regression Benchmarks](figures/quantreg_benchmarks.png) ## Quickstart - ```python import pyfixest as pf @@ -94,26 +115,25 @@ data = pf.get_data() pf.feols("Y ~ X1 | f1 + f2", data=data).summary() ``` - ### - - Estimation: OLS - Dep. var.: Y, Fixed effects: f1+f2 - Inference: CRV1 - Observations: 997 - - | Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% | - |:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:| - | X1 | -0.919 | 0.065 | -14.057 | 0.000 | -1.053 | -0.786 | - --- - RMSE: 1.441 R2: 0.609 R2 Within: 0.2 - +``` +### + +Estimation: OLS +Dep. var.: Y, Fixed effects: f1+f2 +Inference: CRV1 +Observations: 997 + +| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% | +|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:| +| X1 | -0.919 | 0.065 | -14.057 | 0.000 | -1.053 | -0.786 | +--- +RMSE: 1.441 R2: 0.609 R2 Within: 0.2 +``` ### Multiple Estimation -You can estimate multiple models at once by using [multiple estimation -syntax](https://aeturrell.github.io/coding-for-economists/econmt-regression.html#multiple-regression-models): - - +You can estimate multiple models at once by using +[multiple estimation syntax](https://aeturrell.github.io/coding-for-economists/econmt-regression.html#multiple-regression-models): ```python # OLS Estimation: estimate multiple models at once @@ -122,6 +142,7 @@ fit = pf.feols("Y + Y2 ~X1 | csw0(f1, f2)", data = data, vcov = {'CRV1':'group_i fit.etable() ``` +``` est1 est2 est3 est4 est5 est6 ------------ ----------------- ----------------- ----------------- ----------------- ----------------- ----------------- depvar Y Y2 Y Y2 Y Y2 @@ -139,20 +160,18 @@ fit.etable() Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001 Format of coefficient cell: Coefficient (Std. Error) - - - +``` ### Adjust Standard Errors "on-the-fly" Standard Errors can be adjusted after estimation, "on-the-fly": - ```python fit1 = fit.fetch_model(0) fit1.vcov("hetero").summary() ``` +``` Model: Y~X1 ### @@ -167,18 +186,18 @@ fit1.vcov("hetero").summary() | X1 | -1.000 | 0.082 | -12.134 | 0.000 | -1.162 | -0.838 | --- RMSE: 2.158 R2: 0.123 - +``` ### Poisson Regression via `fepois()` You can estimate Poisson Regressions via the `fepois()` function: - ```python poisson_data = pf.get_data(model = "Fepois") pf.fepois("Y ~ X1 + X2 | f1 + f2", data = poisson_data).summary() ``` +``` ### Estimation: Poisson @@ -192,23 +211,22 @@ pf.fepois("Y ~ X1 + X2 | f1 + f2", data = poisson_data).summary() | X2 | -0.015 | 0.010 | -1.449 | 0.147 | -0.035 | 0.005 | --- Deviance: 1068.169 - +``` ### IV Estimation via three-part formulas -Last, `PyFixest` also supports IV estimation via three part formula -syntax: - +Last, `PyFixest` also supports IV estimation via three part formula syntax: ```python fit_iv = pf.feols("Y ~ 1 | f1 | X1 ~ Z1", data = data) fit_iv.summary() ``` +``` ### Estimation: IV - Dep. var.: Y, Fixed effects: f1 + Dep.var.: Y, Fixed effects: f1 Inference: CRV1 Observations: 997 @@ -216,6 +234,7 @@ fit_iv.summary() |:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:| | X1 | -1.025 | 0.115 | -8.930 | 0.000 | -1.259 | -0.790 | --- +``` ## Quantile Regression via `pf.quantreg` @@ -223,15 +242,18 @@ fit_iv.summary() fit_qr = pf.quantreg("Y ~ X1 + X2", data = data, quantile = 0.5) ``` - ## Call for Contributions Thanks for showing interest in contributing to `pyfixest`! We appreciate all -contributions and constructive feedback, whether that be reporting bugs, requesting -new features, or suggesting improvements to documentation. +contributions and constructive feedback, whether that be reporting bugs, +requesting new features, or suggesting improvements to documentation. -If you'd like to get involved, but are not yet sure how, please feel free to send us an [email](alexander-fischer1801@t-online.de). Some familiarity with -either Python or econometrics will help, but you really don't need to be a `numpy` core developer or have published in [Econometrica](https://onlinelibrary.wiley.com/journal/14680262) =) We'd be more than happy to invest time to help you get started! +If you'd like to get involved, but are not yet sure how, please feel free to +send us an [email](alexander-fischer1801@t-online.de). Some familiarity with +either Python or econometrics will help, but you really don't need to be a +`numpy` core developer or have published in +[Econometrica](https://onlinelibrary.wiley.com/journal/14680262) =) We'd be more +than happy to invest time to help you get started! ## Contributors ✨ @@ -259,14 +281,15 @@ Thanks goes to these wonderful people: -This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! +This project follows the +[all-contributors](https://github.com/all-contributors/all-contributors) +specification. Contributions of any kind welcome! ## Acknowledgements We thank all institutions that have funded or supported work on PyFixest! - - +![AAI Institute Logo](../figures/aai-institute-logo.svg) ## How to Cite diff --git a/pyfixest/estimation/feiv_.py b/pyfixest/estimation/feiv_.py index 29d604a24..262bf4267 100644 --- a/pyfixest/estimation/feiv_.py +++ b/pyfixest/estimation/feiv_.py @@ -18,40 +18,58 @@ class Feiv(Feols): Non user-facing class to estimate an IV model using a 2SLS estimator. Inherits from the Feols class. Users should not directly instantiate this class, - but rather use the [feols()](/reference/estimation.feols.qmd) function. Note that + but rather use the [feols()](estimation.feols) function. Note that no demeaning is performed in this class: demeaning is performed in the - [FixestMulti](/reference/estimation.fixest_multi.qmd) class (to allow for caching + [FixestMulti](estimation.fixest_multi) class (to allow for caching of demeaned variables for multiple estimation). Parameters ---------- - Y : np.ndarray - Dependent variable, a two-dimensional np.array. - X : np.ndarray - Independent variables, a two-dimensional np.array. - endgvar : np.ndarray - Endogenous Indenpendent variables, a two-dimensional np.array. - Z : np.ndarray - Instruments, a two-dimensional np.array. - weights : np.ndarray - Weights, a one-dimensional np.array. - coefnames_x : list - Names of the coefficients of X. - coefnames_z : list - Names of the coefficients of Z. + FixestFormula : FixestFormula + A formula object describing the model to be estimated. + data : pd.DataFrame + The dataframe containing the data. + ssc_dict : dict[str, Union[str, bool]] + A dictionary specifying the small sample correction to use. + drop_singletons : bool + Whether to drop singleton fixed effects. + drop_intercept : bool + Whether to drop the intercept. + weights : Optional[str] + The name of the weights column. + weights_type : Optional[str] + The type of weights to use. collin_tol : float - Tolerance for collinearity check. + The tolerance for collinearity detection. + fixef_tol : float + The tolerance for the fixed effects algorithm. + fixef_maxiter : int + The maximum number of iterations for the fixed effects algorithm. + lookup_demeaned_data : dict[str, pd.DataFrame] + A dictionary of demeaned data. solver: Literal["np.linalg.lstsq", "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"], default is "scipy.linalg.solve". Solver to use for the estimation. demeaner_backend: DemeanerBackendOptions, optional The backend to use for demeaning. Can be either "numba", "jax", or "rust". Defaults to "numba". - weights_name : Optional[str] - Name of the weights variable. - weights_type : Optional[str] - Type of the weights variable. Either "aweights" for analytic weights - or "fweights" for frequency weights. + store_data : bool, optional + Whether to store the data in the model object. Defaults to True. + copy_data : bool, optional + Whether to copy the data before estimation. Defaults to True. + lean : bool, optional + Whether to use a lean estimation, which stores less data in the model object. + Defaults to False. + context : int or Mapping[str, Any] + A dictionary containing additional context variables to be used by + formulaic during the creation of the model matrix. This can include + custom factorization functions, transformations, or any other + variables that need to be available in the formula environment. + sample_split_var : Optional[str], optional + The name of the variable to use for sample splitting. Defaults to None. + sample_split_value : Optional[Union[str, int]], optional + The value of the sample splitting variable to use for the current model. + Defaults to None. Attributes ---------- diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index 503d0887c..4fc785d1a 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -68,37 +68,59 @@ class Feols: Non user-facing class to estimate a linear regression via OLS. Users should not directly instantiate this class, - but rather use the [feols()](/reference/estimation.feols.qmd) function. Note that + but rather use the [feols()](estimation.feols) function. Note that no demeaning is performed in this class: demeaning is performed in the - [FixestMulti](/reference/estimation.fixest_multi.qmd) class (to allow for caching + [FixestMulti](estimation.fixest_multi) class (to allow for caching of demeaned variables for multiple estimation). Parameters ---------- - Y : np.ndarray - Dependent variable, a two-dimensional numpy array. - X : np.ndarray - Independent variables, a two-dimensional numpy array. - weights : np.ndarray - Weights, a one-dimensional numpy array. - collin_tol : float - Tolerance level for collinearity checks. - coefnames : list[str] - Names of the coefficients (of the design matrix X). - weights_name : Optional[str] - Name of the weights variable. + FixestFormula : FixestFormula + A formula object describing the model to be estimated. + data : pd.DataFrame + The dataframe containing the data. + ssc_dict : dict[str, Union[str, bool]] + A dictionary specifying the small sample correction to use. + drop_singletons : bool + Whether to drop singleton fixed effects. + drop_intercept : bool + Whether to drop the intercept. + weights : Optional[str] + The name of the weights column. weights_type : Optional[str] - Type of the weights variable. Either "aweights" for analytic weights or - "fweights" for frequency weights. - solver : str, optional. + The type of weights to use. + collin_tol : float + The tolerance for collinearity detection. + fixef_tol : float + The tolerance for the fixed effects algorithm. + fixef_maxiter : int + The maximum number of iterations for the fixed effects algorithm. + lookup_demeaned_data : dict[str, pd.DataFrame] + A dictionary of demeaned data. + solver : str, optional The solver to use for the regression. Can be "np.linalg.lstsq", "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax". Defaults to "scipy.linalg.solve". + demeaner_backend : str, optional + The backend to use for demeaning. Can be "numba" or "jax". + Defaults to "numba". + store_data : bool, optional + Whether to store the data in the model object. Defaults to True. + copy_data : bool, optional + Whether to copy the data before estimation. Defaults to True. + lean : bool, optional + Whether to use a lean estimation, which stores less data in the model object. + Defaults to False. context : int or Mapping[str, Any] A dictionary containing additional context variables to be used by formulaic during the creation of the model matrix. This can include custom factorization functions, transformations, or any other variables that need to be available in the formula environment. + sample_split_var : Optional[str], optional + The name of the variable to use for sample splitting. Defaults to None. + sample_split_value : Optional[Union[str, int, float]], optional + The value of the sample splitting variable to use for the current model. + Defaults to None. Attributes ---------- @@ -535,11 +557,9 @@ def _get_predictors(self) -> None: def get_fit(self) -> None: """ - Fit an OLS model. + Fit the regression model. - Returns - ------- - None + This method estimates the regression coefficients and computes the residuals. """ if self._X_is_empty: self._u_hat = self._Y @@ -589,7 +609,7 @@ def vcov( Returns ------- Feols - An instance of class [Feols(/reference/Feols.qmd) with updated inference. + An instance of class Feols with updated inference. """ # Assuming `data` is the DataFrame in question @@ -1088,10 +1108,10 @@ def wald_test(self, R=None, q=None, distribution="F"): Parameters ---------- - R : array-like, optional + R : np.ndarray, optional The matrix R of the linear hypothesis. If None, defaults to an identity matrix. - q : array-like, optional + q : np.ndarray, optional The vector q of the linear hypothesis. If None, defaults to a vector of zeros. distribution : str, optional