From be6ed1df7af90b3900a0692b8866be21475eb832 Mon Sep 17 00:00:00 2001 From: alal Date: Mon, 14 Jul 2025 20:37:54 -0700 Subject: [PATCH 1/5] docs: improve quarto workflow and fix minor issues --- .github/workflows/docs.yaml | 101 ++++++++++++++++++++++++++++++++++++ docs/.gitignore | 7 +-- docs/_quarto.yml | 5 -- docs/_sidebar.yml | 1 - 4 files changed, 102 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/docs.yaml diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 000000000..a42ddbe14 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,101 @@ +name: Docs + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + lint-docs: + name: "Lint Docs" + runs-on: ubuntu-latest + steps: + - name: Checkout source + uses: actions/checkout@v4 + - name: Install markdownlint-cli + run: npm install -g markdownlint-cli + - name: Lint markdown files + run: markdownlint docs/*.md + + build-docs: + name: "Build Docs" + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.12"] + steps: + - name: Checkout source + uses: actions/checkout@v4 + + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - uses: prefix-dev/setup-pixi@v0.8.3 + with: + pixi-version: v0.41.4 + cache: true + + - name: Set up Quarto + uses: quarto-dev/quarto-actions/setup@v2 + + - name: install tinytex + run: | + quarto install tinytex + + - name: Compile docstrings with quartodoc + run: | + pixi run docs-build + + - name: Render docs + run: | + pixi run docs-render + + - name: Save docs artifact + uses: actions/upload-artifact@v4 + with: + name: docs-html + path: docs/_site + + publish-docs: + name: "Publish Docs" + runs-on: ubuntu-latest + permissions: + contents: write + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + needs: ["build-docs"] + if: github.ref == 'refs/heads/master' && github.event_name == 'push' + steps: + - name: Download docs artifact + uses: actions/download-artifact@v4 + with: + name: docs-html + path: docs/_site + + - name: Publish docs to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/_site + + preview-docs: + name: "Preview Docs" + runs-on: ubuntu-latest + needs: ["build-docs"] + if: github.event_name == 'pull_request' + steps: + - name: Download docs artifact + uses: actions/download-artifact@v4 + with: + name: docs-html + path: docs/_site + - name: Upload docs artifact + uses: actions/upload-artifact@v4 + with: + name: docs-preview + path: docs/_site diff --git a/docs/.gitignore b/docs/.gitignore index 9c3d6bdd0..f1fddd3ff 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,6 +1 @@ -/.quarto/ -site_libs/ -_site/ -search.json -objects.json -reference/ +*.swp \ No newline at end of file diff --git a/docs/_quarto.yml b/docs/_quarto.yml index b2e48dcf7..33b4a7bbe 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -2,10 +2,6 @@ project: type: website output-dir: _site -execute: - # point quarto to the correct python environment - python: "/pyfixest/.pixi/envs/docs/Scripts/python.exe" - metadata-files: - _sidebar.yml @@ -103,7 +99,6 @@ quartodoc: - report.dtable - report.coefplot - report.iplot - - did.visualize.panelview - title: Misc / Utilities desc: | PyFixest internals and utilities diff --git a/docs/_sidebar.yml b/docs/_sidebar.yml index 304bf5da1..d0af77646 100644 --- a/docs/_sidebar.yml +++ b/docs/_sidebar.yml @@ -29,7 +29,6 @@ website: - reference/report.dtable.qmd - reference/report.coefplot.qmd - reference/report.iplot.qmd - - reference/did.visualize.panelview.qmd section: Summarize and Visualize - contents: - reference/estimation.demean.qmd From b6df3fe176e234abfba48f3bd686c7b2a0a87957 Mon Sep 17 00:00:00 2001 From: alal Date: Mon, 14 Jul 2025 20:38:50 -0700 Subject: [PATCH 2/5] refactor: separate linting into its own workflow --- .github/workflows/docs.yaml | 11 ----------- .github/workflows/lint.yaml | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/lint.yaml diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index a42ddbe14..00cfab43d 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -9,17 +9,6 @@ on: - master jobs: - lint-docs: - name: "Lint Docs" - runs-on: ubuntu-latest - steps: - - name: Checkout source - uses: actions/checkout@v4 - - name: Install markdownlint-cli - run: npm install -g markdownlint-cli - - name: Lint markdown files - run: markdownlint docs/*.md - build-docs: name: "Build Docs" runs-on: ubuntu-latest diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 000000000..025055b25 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,21 @@ +name: Lint + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + lint-docs: + name: "Lint Docs" + runs-on: ubuntu-latest + steps: + - name: Checkout source + uses: actions/checkout@v4 + - name: Install markdownlint-cli + run: npm install -g markdownlint-cli + - name: Lint markdown files + run: markdownlint docs/*.md From d34a45bceb5f513407b16ad814cff5125f2e7619 Mon Sep 17 00:00:00 2001 From: alal Date: Mon, 14 Jul 2025 20:39:06 -0700 Subject: [PATCH 3/5] ci: separate docs and linting workflows --- docs/_quarto.yml | 5 +++++ docs/_sidebar.yml | 1 + 2 files changed, 6 insertions(+) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 33b4a7bbe..b2e48dcf7 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -2,6 +2,10 @@ project: type: website output-dir: _site +execute: + # point quarto to the correct python environment + python: "/pyfixest/.pixi/envs/docs/Scripts/python.exe" + metadata-files: - _sidebar.yml @@ -99,6 +103,7 @@ quartodoc: - report.dtable - report.coefplot - report.iplot + - did.visualize.panelview - title: Misc / Utilities desc: | PyFixest internals and utilities diff --git a/docs/_sidebar.yml b/docs/_sidebar.yml index d0af77646..304bf5da1 100644 --- a/docs/_sidebar.yml +++ b/docs/_sidebar.yml @@ -29,6 +29,7 @@ website: - reference/report.dtable.qmd - reference/report.coefplot.qmd - reference/report.iplot.qmd + - reference/did.visualize.panelview.qmd section: Summarize and Visualize - contents: - reference/estimation.demean.qmd From 7e6bde4e447a40908c8f934b1726624da1566fa0 Mon Sep 17 00:00:00 2001 From: alal Date: Mon, 14 Jul 2025 20:39:48 -0700 Subject: [PATCH 4/5] docs: improve quarto config and sidebar --- docs/_quarto.yml | 5 ----- docs/_sidebar.yml | 1 - 2 files changed, 6 deletions(-) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index b2e48dcf7..33b4a7bbe 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -2,10 +2,6 @@ project: type: website output-dir: _site -execute: - # point quarto to the correct python environment - python: "/pyfixest/.pixi/envs/docs/Scripts/python.exe" - metadata-files: - _sidebar.yml @@ -103,7 +99,6 @@ quartodoc: - report.dtable - report.coefplot - report.iplot - - did.visualize.panelview - title: Misc / Utilities desc: | PyFixest internals and utilities diff --git a/docs/_sidebar.yml b/docs/_sidebar.yml index 304bf5da1..d0af77646 100644 --- a/docs/_sidebar.yml +++ b/docs/_sidebar.yml @@ -29,7 +29,6 @@ website: - reference/report.dtable.qmd - reference/report.coefplot.qmd - reference/report.iplot.qmd - - reference/did.visualize.panelview.qmd section: Summarize and Visualize - contents: - reference/estimation.demean.qmd From dcb41b1cdb1436c24aabf91cd635a875c63f5929 Mon Sep 17 00:00:00 2001 From: alal Date: Mon, 14 Jul 2025 23:47:44 -0700 Subject: [PATCH 5/5] feat(docs): improve and automate quarto documentation workflow This commit overhauls the Quarto documentation generation and CI process. - Automates the documentation build and deployment via GitHub Actions. - Implements Quarto's 'freeze' feature to significantly speed up builds by caching computational results. - Adds a markdown linting job to the CI to ensure documentation quality. - Fixes numerous docstrings and cross-reference links in the API documentation. - Updates the Quarto configuration to correctly build the reference documentation and ignore build artifacts (, ). --- .github/workflows/docs.yaml | 12 +- docs/.gitignore | 5 +- docs/_quarto.yml | 6 +- docs/compare-fixest-pyfixest.qmd | 299 ------------------------------- docs/pyfixest.md | 169 +++++++++-------- pyfixest/estimation/feiv_.py | 62 ++++--- pyfixest/estimation/feols_.py | 68 ++++--- 7 files changed, 196 insertions(+), 425 deletions(-) delete mode 100644 docs/compare-fixest-pyfixest.qmd diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 00cfab43d..1b4632f95 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -42,12 +42,16 @@ jobs: - name: Render docs run: | - pixi run docs-render + if [ "${{ github.event_name }}" == "pull_request" ]; then + pixi run docs-render -- --no-cache + else + pixi run docs-render + fi - name: Save docs artifact uses: actions/upload-artifact@v4 with: - name: docs-html + name: docs-html-${{ matrix.python-version }} path: docs/_site publish-docs: @@ -63,7 +67,7 @@ jobs: - name: Download docs artifact uses: actions/download-artifact@v4 with: - name: docs-html + name: docs-html-3.12 path: docs/_site - name: Publish docs to GitHub Pages @@ -81,7 +85,7 @@ jobs: - name: Download docs artifact uses: actions/download-artifact@v4 with: - name: docs-html + name: docs-html-3.12 path: docs/_site - name: Upload docs artifact uses: actions/upload-artifact@v4 diff --git a/docs/.gitignore b/docs/.gitignore index f1fddd3ff..7002672a0 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1 +1,4 @@ -*.swp \ No newline at end of file +_site/ +.quarto/ +reference/ +*.swp diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 33b4a7bbe..3375a0a8f 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -1,6 +1,10 @@ project: type: website output-dir: _site + freeze: auto + +exclude: + - "compare-fixest-pyfixest.qmd" metadata-files: - _sidebar.yml @@ -43,8 +47,6 @@ website: text: "On Small Sample Corrections" - file: quantile-regression.qmd text: "Quantile Regression" - - text: "Compare fixest & PyFixest" - file: compare-fixest-pyfixest.qmd - text: "Compare Stata & PyFixest" file: stata-2-pyfixest.qmd - text: "PyFixest on the GPU" diff --git a/docs/compare-fixest-pyfixest.qmd b/docs/compare-fixest-pyfixest.qmd deleted file mode 100644 index d160dc89c..000000000 --- a/docs/compare-fixest-pyfixest.qmd +++ /dev/null @@ -1,299 +0,0 @@ ---- -title: Does `PyFixest` match `fixest`? -toc: true -toc-title: "On this page" -toc-location: left ---- - -This vignette compares estimation results from `fixest` with `pyfixest` via the `rpy2` package. - -## Setup - - -```{python} -import pandas as pd -import rpy2.robjects as ro -from rpy2.robjects import pandas2ri -from rpy2.robjects.packages import importr - -import pyfixest as pf - -# Activate pandas2ri -pandas2ri.activate() - -# Import R packages -fixest = importr("fixest") -stats = importr("stats") -broom = importr("broom") - -# IPython magic commands for autoreloading -%load_ext autoreload -%autoreload 2 - -# Get data using pyfixest -data = pf.get_data(model="Feols", N=10_000, seed=99292) -``` - - -## Ordinary Least Squares (OLS) - -### IID Inference - -First, we estimate a model via `pyfixest. We compute "iid" standard errors. - - -```{python} -fit = pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="iid") -``` - -We estimate the same model with weights: - - -```{python} -fit_weights = pf.feols( - fml="Y ~ X1 + X2 | f1 + f2", data=data, weights="weights", vcov="iid" -) -``` - -Via `r-fixest` and `rpy2`, we get - - -```{python} -r_fit = fixest.feols( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - vcov="iid", -) - -r_fit_weights = fixest.feols( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - weights=ro.Formula("~weights"), - vcov="iid", -) -``` - - R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1). - - R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1). - - - -Let's compare how close the covariance matrices are: - - -```{python} -fit_vcov = fit._vcov -r_vcov = stats.vcov(r_fit) -fit_vcov - r_vcov -``` - - -And for WLS: - - -```{python} -fit_weights._vcov - stats.vcov(r_fit_weights) -``` - -We conclude by comparing all estimation results via the `tidy` methods: - -```{python} -fit.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(r_fit)).T -``` - -```{python} -fit_weights.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T -``` - - -### Heteroskedastic Errors - -We repeat the same exercise with heteroskedastic (HC1) errors: - - -```{python} -fit = pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero") -fit_weights = pf.feols( - fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero", weights="weights" -) -``` - - -```{python} -r_fit = fixest.feols( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - vcov="hetero", -) - -r_fit_weights = fixest.feols( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - weights=ro.Formula("~weights"), - vcov="hetero", -) -``` - -As before, we compare the variance covariance matrices: - - -```{python} -fit._vcov - stats.vcov(r_fit) -``` - -```{python} -fit_weights._vcov - stats.vcov(r_fit_weights) -``` - -We conclude by comparing all estimation results via the `tidy` methods: - -```{python} -fit.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(r_fit)).T -``` - -```{python} -fit_weights.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T -``` - - -### Cluster-Robust Errors - -We conclude with cluster robust errors. - - -```{python} -fit = pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"}) -fit_weights = pf.feols( - fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"}, weights="weights" -) - -r_fit = fixest.feols( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - vcov=ro.Formula("~f1"), -) -r_fit_weights = fixest.feols( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - weights=ro.Formula("~weights"), - vcov=ro.Formula("~f1"), -) -``` - -```{python} -fit._vcov - stats.vcov(r_fit) -``` - -```{python} -fit_weights._vcov - stats.vcov(r_fit_weights) -``` - -We conclude by comparing all estimation results via the `tidy` methods: - -```{python} -fit.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(r_fit)).T -``` - -```{python} -fit_weights.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T -``` - -## Poisson Regression - - -```{python} -data = pf.get_data(model="Fepois") -``` - - -```{python} -fit_iid = pf.fepois(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="iid", iwls_tol=1e-10) -fit_hetero = pf.fepois( - fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero", iwls_tol=1e-10 -) -fit_crv = pf.fepois( - fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"}, iwls_tol=1e-10 -) - -fit_r_iid = fixest.fepois( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - vcov="iid", -) - -fit_r_hetero = fixest.fepois( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - vcov="hetero", -) - -fit_r_crv = fixest.fepois( - ro.Formula("Y ~ X1 + X2 | f1 + f2"), - data=data, - vcov=ro.Formula("~f1"), -) -``` - -```{python} -fit_iid._vcov - stats.vcov(fit_r_iid) -``` - -```{python} -fit_hetero._vcov - stats.vcov(fit_r_hetero) -``` - -```{python} -fit_crv._vcov - stats.vcov(fit_r_crv) -``` - -We conclude by comparing all estimation results via the `tidy` methods: - - -```{python} -fit_iid.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(fit_r_iid)).T -``` - -```{python} -fit_hetero.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(fit_r_hetero)).T -``` - -```{python} -fit_crv.tidy() -``` - -```{python} -pd.DataFrame(broom.tidy_fixest(fit_r_crv)).T -``` diff --git a/docs/pyfixest.md b/docs/pyfixest.md index 628bf001c..37f085a55 100644 --- a/docs/pyfixest.md +++ b/docs/pyfixest.md @@ -1,4 +1,6 @@ -![](figures/pyfixest-logo.png) +# PyFixest + +![PyFixest Logo](figures/pyfixest-logo.png) # PyFixest: Fast High-Dimensional Fixed Effects Regression in Python @@ -17,76 +19,95 @@ [![Citation](https://img.shields.io/badge/Cite%20as-PyFixest-blue)](https://github.com/py-econometrics/pyfixest?tab=readme-ov-file#how-to-cite) [![Donate | GiveDirectly](https://img.shields.io/static/v1?label=GiveDirectly&message=Donate&color=blue&style=flat-square)](https://github.com/py-econometrics/pyfixest?tab=readme-ov-file#support-pyfixest) -[pixi-badge]:https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/prefix-dev/pixi/main/assets/badge/v0.json&style=flat-square +[pixi-badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/prefix-dev/pixi/main/assets/badge/v0.json&style=flat-square [pixi-url]: https://pixi.sh [chat-badge]: https://img.shields.io/discord/1259933360726216754.svg?label=&logo=discord&logoColor=ffffff&color=7389D8&labelColor=6A7EC2&style=flat-square [chat-url]: https://discord.gg/gBAydeDMVK -`PyFixest` is a Python implementation of the formidable [fixest](https://github.com/lrberge/fixest) package for fast high-dimensional fixed effects regression. +`PyFixest` is a Python implementation of the formidable +[fixest](https://github.com/lrberge/fixest) package for fast high-dimensional +fixed effects regression. -The package aims to mimic `fixest` syntax and functionality as closely as Python allows: if you know `fixest` well, the goal is that you won't have to read the docs to get started! In particular, this means that all of `fixest's` defaults are mirrored by `PyFixest`. +The package aims to mimic `fixest` syntax and functionality as closely as Python +allows: if you know `fixest` well, the goal is that you won't have to read the +docs to get started! In particular, this means that all of `fixest's` defaults +are mirrored by `PyFixest`. -Nevertheless, for a quick introduction, you can take a look at the [quickstart](https://py-econometrics.github.io/pyfixest/quickstart.html) or the regression chapter of [Arthur Turrell's](https://github.com/aeturrell) book on [Coding for Economists](https://aeturrell.github.io/coding-for-economists/econmt-regression.html#imports). +Nevertheless, for a quick introduction, you can take a look at the +[quickstart](https://py-econometrics.github.io/pyfixest/quickstart.html) or the +regression chapter of [Arthur Turrell's](https://github.com/aeturrell) book on +[Coding for Economists](https://aeturrell.github.io/coding-for-economists/econmt-regression.html#imports). -For questions on `PyFixest`, head on over to our [github discussions](https://github.com/py-econometrics/pyfixest/discussions), or (more informally) join our [Discord server](https://discord.gg/gBAydeDMVK). +For questions on `PyFixest`, head on over to our +[github discussions](https://github.com/py-econometrics/pyfixest/discussions), +or (more informally) join our [Discord server](https://discord.gg/gBAydeDMVK). ## Support PyFixest -If you enjoy using `PyFixest`, please consider donating to [GiveDirectly](https://donate.givedirectly.org/dedicate) and dedicating your donation to `pyfixest.dev@gmail.com`. -You can also leave a message through the donation form - your support and encouragement mean a lot to the developers! +If you enjoy using `PyFixest`, please consider donating to +[GiveDirectly](https://donate.givedirectly.org/dedicate) and dedicating your +donation to `pyfixest.dev@gmail.com`. You can also leave a message through the +donation form - your support and encouragement mean a lot to the developers! ## Features -- **OLS**, **WLS** and **IV** Regression with Fixed-Effects Demeaning via [Frisch-Waugh-Lovell](https://bookdown.org/ts_robinson1994/10EconometricTheorems/frisch.html) -- **Poisson Regression** following the [pplmhdfe algorithm](https://journals.sagepub.com/doi/full/10.1177/1536867X20909691) -- Probit, Logit and Gaussian Family **GLMs** (currently without fixed effects demeaning, this is WIP) -- **Quantile Regression** using an Interior Point Solver -- Multiple Estimation Syntax -- Several **Robust** and **Cluster Robust Variance-Covariance** Estimators - -- **Wild Cluster Bootstrap** Inference (via - [wildboottest](https://github.com/py-econometrics/wildboottest)) -- **Difference-in-Differences** Estimators: - - The canonical Two-Way Fixed Effects Estimator - - [Gardner's two-stage - ("`Did2s`")](https://jrgcmu.github.io/2sdd_current.pdf) - estimator - - Basic Versions of the Local Projections estimator following - [Dube et al (2023)](https://www.nber.org/papers/w31184) - - The fully saturated Event-Study estimator following [Sun & Abraham (2021)](https://www.sciencedirect.com/science/article/abs/pii/S030440762030378X) -- **Multiple Hypothesis Corrections** following the Procedure by [Romano and Wolf](https://journals.sagepub.com/doi/pdf/10.1177/1536867X20976314) and **Simultaneous Confidence Intervals** using a **Multiplier Bootstrap** -- The **Causal Cluster Variance Estimator (CCV)** following [Abadie et al.](https://economics.mit.edu/sites/default/files/2022-09/When%20Should%20You%20Adjust%20Standard%20Errors%20for%20Clustering.pdf) -- Regression **Decomposition** following [Gelbach (2016)](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1425737) -- **Publication-ready tables** with [Great Tables](https://posit-dev.github.io/great-tables/articles/intro.html) or LaTex booktabs +- **OLS**, **WLS** and **IV** Regression with Fixed-Effects Demeaning via + [Frisch-Waugh-Lovell](https://bookdown.org/ts_robinson1994/10EconometricTheorems/frisch.html) +- **Poisson Regression** following the + [pplmhdfe algorithm](https://journals.sagepub.com/doi/full/10.1177/1536867X20909691) +- Probit, Logit and Gaussian Family **GLMs** (currently without fixed effects + demeaning, this is WIP) +- **Quantile Regression** using an Interior Point Solver +- Multiple Estimation Syntax +- Several **Robust** and **Cluster Robust Variance-Covariance** Estimators +- **Wild Cluster Bootstrap** Inference (via + [wildboottest](https://github.com/py-econometrics/wildboottest)) +- **Difference-in-Differences** Estimators: + - The canonical Two-Way Fixed Effects Estimator + - [Gardner's two-stage + ("`Did2s`")](https://jrgcmu.github.io/2sdd_current.pdf) estimator + - Basic Versions of the Local Projections estimator following + [Dube et al (2023)](https://www.nber.org/papers/w31184) + - The fully saturated Event-Study estimator following + [Sun & Abraham (2021)](https://www.sciencedirect.com/science/article/abs/pii/S030440762030378X) +- **Multiple Hypothesis Corrections** following the Procedure by + [Romano and Wolf](https://journals.sagepub.com/doi/pdf/10.1177/1536867X20976314) + and **Simultaneous Confidence Intervals** using a **Multiplier Bootstrap** +- The **Causal Cluster Variance Estimator (CCV)** following + [Abadie et al.](https://economics.mit.edu/sites/default/files/2022-09/When%20Should%20You%20Adjust%20Standard%20Errors%20for%20Clustering.pdf) +- Regression **Decomposition** following + [Gelbach (2016)](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1425737) +- **Publication-ready tables** with + [Great Tables](https://posit-dev.github.io/great-tables/articles/intro.html) or + LaTex booktabs ## Installation You can install the release version from `PyPi` by running -``` py +```py pip install -U pyfixest ``` or the development version from github by running -``` py +```py pip install git+https://github.com/py-econometrics/pyfixest.git ``` ## Benchmarks -All benchmarks follow the [fixest -benchmarks](https://github.com/lrberge/fixest/tree/master/_BENCHMARK). +All benchmarks follow the +[fixest benchmarks](https://github.com/lrberge/fixest/tree/master/_BENCHMARK). All non-pyfixest timings are taken from the `fixest` benchmarks. -![](figures/benchmarks_ols.svg) -![](figures/benchmarks_poisson.svg) -![](figures/quantreg_benchmarks.png) +![OLS Benchmarks](figures/benchmarks_ols.svg) +![Poisson Benchmarks](figures/benchmarks_poisson.svg) +![Quantile Regression Benchmarks](figures/quantreg_benchmarks.png) ## Quickstart - ```python import pyfixest as pf @@ -94,26 +115,25 @@ data = pf.get_data() pf.feols("Y ~ X1 | f1 + f2", data=data).summary() ``` - ### - - Estimation: OLS - Dep. var.: Y, Fixed effects: f1+f2 - Inference: CRV1 - Observations: 997 - - | Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% | - |:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:| - | X1 | -0.919 | 0.065 | -14.057 | 0.000 | -1.053 | -0.786 | - --- - RMSE: 1.441 R2: 0.609 R2 Within: 0.2 - +``` +### + +Estimation: OLS +Dep. var.: Y, Fixed effects: f1+f2 +Inference: CRV1 +Observations: 997 + +| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% | +|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:| +| X1 | -0.919 | 0.065 | -14.057 | 0.000 | -1.053 | -0.786 | +--- +RMSE: 1.441 R2: 0.609 R2 Within: 0.2 +``` ### Multiple Estimation -You can estimate multiple models at once by using [multiple estimation -syntax](https://aeturrell.github.io/coding-for-economists/econmt-regression.html#multiple-regression-models): - - +You can estimate multiple models at once by using +[multiple estimation syntax](https://aeturrell.github.io/coding-for-economists/econmt-regression.html#multiple-regression-models): ```python # OLS Estimation: estimate multiple models at once @@ -122,6 +142,7 @@ fit = pf.feols("Y + Y2 ~X1 | csw0(f1, f2)", data = data, vcov = {'CRV1':'group_i fit.etable() ``` +``` est1 est2 est3 est4 est5 est6 ------------ ----------------- ----------------- ----------------- ----------------- ----------------- ----------------- depvar Y Y2 Y Y2 Y Y2 @@ -139,20 +160,18 @@ fit.etable() Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001 Format of coefficient cell: Coefficient (Std. Error) - - - +``` ### Adjust Standard Errors "on-the-fly" Standard Errors can be adjusted after estimation, "on-the-fly": - ```python fit1 = fit.fetch_model(0) fit1.vcov("hetero").summary() ``` +``` Model: Y~X1 ### @@ -167,18 +186,18 @@ fit1.vcov("hetero").summary() | X1 | -1.000 | 0.082 | -12.134 | 0.000 | -1.162 | -0.838 | --- RMSE: 2.158 R2: 0.123 - +``` ### Poisson Regression via `fepois()` You can estimate Poisson Regressions via the `fepois()` function: - ```python poisson_data = pf.get_data(model = "Fepois") pf.fepois("Y ~ X1 + X2 | f1 + f2", data = poisson_data).summary() ``` +``` ### Estimation: Poisson @@ -192,23 +211,22 @@ pf.fepois("Y ~ X1 + X2 | f1 + f2", data = poisson_data).summary() | X2 | -0.015 | 0.010 | -1.449 | 0.147 | -0.035 | 0.005 | --- Deviance: 1068.169 - +``` ### IV Estimation via three-part formulas -Last, `PyFixest` also supports IV estimation via three part formula -syntax: - +Last, `PyFixest` also supports IV estimation via three part formula syntax: ```python fit_iv = pf.feols("Y ~ 1 | f1 | X1 ~ Z1", data = data) fit_iv.summary() ``` +``` ### Estimation: IV - Dep. var.: Y, Fixed effects: f1 + Dep.var.: Y, Fixed effects: f1 Inference: CRV1 Observations: 997 @@ -216,6 +234,7 @@ fit_iv.summary() |:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:| | X1 | -1.025 | 0.115 | -8.930 | 0.000 | -1.259 | -0.790 | --- +``` ## Quantile Regression via `pf.quantreg` @@ -223,15 +242,18 @@ fit_iv.summary() fit_qr = pf.quantreg("Y ~ X1 + X2", data = data, quantile = 0.5) ``` - ## Call for Contributions Thanks for showing interest in contributing to `pyfixest`! We appreciate all -contributions and constructive feedback, whether that be reporting bugs, requesting -new features, or suggesting improvements to documentation. +contributions and constructive feedback, whether that be reporting bugs, +requesting new features, or suggesting improvements to documentation. -If you'd like to get involved, but are not yet sure how, please feel free to send us an [email](alexander-fischer1801@t-online.de). Some familiarity with -either Python or econometrics will help, but you really don't need to be a `numpy` core developer or have published in [Econometrica](https://onlinelibrary.wiley.com/journal/14680262) =) We'd be more than happy to invest time to help you get started! +If you'd like to get involved, but are not yet sure how, please feel free to +send us an [email](alexander-fischer1801@t-online.de). Some familiarity with +either Python or econometrics will help, but you really don't need to be a +`numpy` core developer or have published in +[Econometrica](https://onlinelibrary.wiley.com/journal/14680262) =) We'd be more +than happy to invest time to help you get started! ## Contributors ✨ @@ -259,14 +281,15 @@ Thanks goes to these wonderful people: -This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! +This project follows the +[all-contributors](https://github.com/all-contributors/all-contributors) +specification. Contributions of any kind welcome! ## Acknowledgements We thank all institutions that have funded or supported work on PyFixest! - - +![AAI Institute Logo](../figures/aai-institute-logo.svg) ## How to Cite diff --git a/pyfixest/estimation/feiv_.py b/pyfixest/estimation/feiv_.py index 29d604a24..262bf4267 100644 --- a/pyfixest/estimation/feiv_.py +++ b/pyfixest/estimation/feiv_.py @@ -18,40 +18,58 @@ class Feiv(Feols): Non user-facing class to estimate an IV model using a 2SLS estimator. Inherits from the Feols class. Users should not directly instantiate this class, - but rather use the [feols()](/reference/estimation.feols.qmd) function. Note that + but rather use the [feols()](estimation.feols) function. Note that no demeaning is performed in this class: demeaning is performed in the - [FixestMulti](/reference/estimation.fixest_multi.qmd) class (to allow for caching + [FixestMulti](estimation.fixest_multi) class (to allow for caching of demeaned variables for multiple estimation). Parameters ---------- - Y : np.ndarray - Dependent variable, a two-dimensional np.array. - X : np.ndarray - Independent variables, a two-dimensional np.array. - endgvar : np.ndarray - Endogenous Indenpendent variables, a two-dimensional np.array. - Z : np.ndarray - Instruments, a two-dimensional np.array. - weights : np.ndarray - Weights, a one-dimensional np.array. - coefnames_x : list - Names of the coefficients of X. - coefnames_z : list - Names of the coefficients of Z. + FixestFormula : FixestFormula + A formula object describing the model to be estimated. + data : pd.DataFrame + The dataframe containing the data. + ssc_dict : dict[str, Union[str, bool]] + A dictionary specifying the small sample correction to use. + drop_singletons : bool + Whether to drop singleton fixed effects. + drop_intercept : bool + Whether to drop the intercept. + weights : Optional[str] + The name of the weights column. + weights_type : Optional[str] + The type of weights to use. collin_tol : float - Tolerance for collinearity check. + The tolerance for collinearity detection. + fixef_tol : float + The tolerance for the fixed effects algorithm. + fixef_maxiter : int + The maximum number of iterations for the fixed effects algorithm. + lookup_demeaned_data : dict[str, pd.DataFrame] + A dictionary of demeaned data. solver: Literal["np.linalg.lstsq", "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"], default is "scipy.linalg.solve". Solver to use for the estimation. demeaner_backend: DemeanerBackendOptions, optional The backend to use for demeaning. Can be either "numba", "jax", or "rust". Defaults to "numba". - weights_name : Optional[str] - Name of the weights variable. - weights_type : Optional[str] - Type of the weights variable. Either "aweights" for analytic weights - or "fweights" for frequency weights. + store_data : bool, optional + Whether to store the data in the model object. Defaults to True. + copy_data : bool, optional + Whether to copy the data before estimation. Defaults to True. + lean : bool, optional + Whether to use a lean estimation, which stores less data in the model object. + Defaults to False. + context : int or Mapping[str, Any] + A dictionary containing additional context variables to be used by + formulaic during the creation of the model matrix. This can include + custom factorization functions, transformations, or any other + variables that need to be available in the formula environment. + sample_split_var : Optional[str], optional + The name of the variable to use for sample splitting. Defaults to None. + sample_split_value : Optional[Union[str, int]], optional + The value of the sample splitting variable to use for the current model. + Defaults to None. Attributes ---------- diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index 503d0887c..4fc785d1a 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -68,37 +68,59 @@ class Feols: Non user-facing class to estimate a linear regression via OLS. Users should not directly instantiate this class, - but rather use the [feols()](/reference/estimation.feols.qmd) function. Note that + but rather use the [feols()](estimation.feols) function. Note that no demeaning is performed in this class: demeaning is performed in the - [FixestMulti](/reference/estimation.fixest_multi.qmd) class (to allow for caching + [FixestMulti](estimation.fixest_multi) class (to allow for caching of demeaned variables for multiple estimation). Parameters ---------- - Y : np.ndarray - Dependent variable, a two-dimensional numpy array. - X : np.ndarray - Independent variables, a two-dimensional numpy array. - weights : np.ndarray - Weights, a one-dimensional numpy array. - collin_tol : float - Tolerance level for collinearity checks. - coefnames : list[str] - Names of the coefficients (of the design matrix X). - weights_name : Optional[str] - Name of the weights variable. + FixestFormula : FixestFormula + A formula object describing the model to be estimated. + data : pd.DataFrame + The dataframe containing the data. + ssc_dict : dict[str, Union[str, bool]] + A dictionary specifying the small sample correction to use. + drop_singletons : bool + Whether to drop singleton fixed effects. + drop_intercept : bool + Whether to drop the intercept. + weights : Optional[str] + The name of the weights column. weights_type : Optional[str] - Type of the weights variable. Either "aweights" for analytic weights or - "fweights" for frequency weights. - solver : str, optional. + The type of weights to use. + collin_tol : float + The tolerance for collinearity detection. + fixef_tol : float + The tolerance for the fixed effects algorithm. + fixef_maxiter : int + The maximum number of iterations for the fixed effects algorithm. + lookup_demeaned_data : dict[str, pd.DataFrame] + A dictionary of demeaned data. + solver : str, optional The solver to use for the regression. Can be "np.linalg.lstsq", "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax". Defaults to "scipy.linalg.solve". + demeaner_backend : str, optional + The backend to use for demeaning. Can be "numba" or "jax". + Defaults to "numba". + store_data : bool, optional + Whether to store the data in the model object. Defaults to True. + copy_data : bool, optional + Whether to copy the data before estimation. Defaults to True. + lean : bool, optional + Whether to use a lean estimation, which stores less data in the model object. + Defaults to False. context : int or Mapping[str, Any] A dictionary containing additional context variables to be used by formulaic during the creation of the model matrix. This can include custom factorization functions, transformations, or any other variables that need to be available in the formula environment. + sample_split_var : Optional[str], optional + The name of the variable to use for sample splitting. Defaults to None. + sample_split_value : Optional[Union[str, int, float]], optional + The value of the sample splitting variable to use for the current model. + Defaults to None. Attributes ---------- @@ -535,11 +557,9 @@ def _get_predictors(self) -> None: def get_fit(self) -> None: """ - Fit an OLS model. + Fit the regression model. - Returns - ------- - None + This method estimates the regression coefficients and computes the residuals. """ if self._X_is_empty: self._u_hat = self._Y @@ -589,7 +609,7 @@ def vcov( Returns ------- Feols - An instance of class [Feols(/reference/Feols.qmd) with updated inference. + An instance of class Feols with updated inference. """ # Assuming `data` is the DataFrame in question @@ -1088,10 +1108,10 @@ def wald_test(self, R=None, q=None, distribution="F"): Parameters ---------- - R : array-like, optional + R : np.ndarray, optional The matrix R of the linear hypothesis. If None, defaults to an identity matrix. - q : array-like, optional + q : np.ndarray, optional The vector q of the linear hypothesis. If None, defaults to a vector of zeros. distribution : str, optional