py-econometrics · s3alfisc · Sep 27, 2025 · Sep 28, 2025 · Sep 28, 2025 · Sep 28, 2025
diff --git a/.gitignore b/.gitignore
@@ -39,3 +39,4 @@ target
 .idea/
 coverage.xml
 .DS_Store
+tests/refactor/data/cached_results/
diff --git a/pixi.lock b/pixi.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,18 +10,18 @@ authors = [
 readme = "README.md"
 license = { text = "MIT" }
 
-dependencies = [
-  "scipy>=1.6,<1.16",
-  "formulaic>=1.1.0",
-  "pandas>=1.1.0",
-  "numba>=0.58.0",
-  "seaborn>=0.13.2",
-  "tabulate>=0.9.0",
-  "tqdm>=4.0.0",
-  "great-tables>=0.10.0",
-  "numpy>=1.25.2",
-  "narwhals>=1.13.3",
-  "joblib>=1.4.2,<2",]
+[tool.pixi.dependencies]
+scipy = ">=1.6,<1.16"
+formulaic = ">=1.1.0"
+pandas = ">=1.1.0, <=2.2.0"
+numba = ">=0.58.0, <=0.61.0"
+seaborn = ">=0.13.2"
+tabulate = ">=0.9.0"
+tqdm = ">=4.0.0"
+great_tables = ">=0.15.0"
+numpy = ">=1.25.2"
+narwhals = ">=1.13.3"
+joblib = ">=1.4.2,<2"
 
 [tool.pixi.feature.dev.dependencies]
 rpy2 = "==3.5.11"
@@ -47,7 +47,8 @@ dev = [
   "pyarrow>=14.0",
   "jax>=0.4.15",
   "jaxlib>=0.4.15",
-  "pytest-benchmark>=5.1.0,<6"
+  "pytest-benchmark>=5.1.0,<6",
+  "pytest-sugar>=1.0.0"
 ]
 
 lint = [
@@ -121,6 +122,22 @@ update-test-data = "Rscript tests/r_test_comparisons.R"
 install-r-extended = "Rscript r_test_requirements.R"
 render-notebooks = "python scripts/run_notebooks.py"
 
+# Unified cached test framework tasks (recommended)
+test-cache-all-methods = "cd tests && python refactor/cache_manager.py list"
+test-cache-all-results = "cd tests && python refactor/cache_manager.py generate --n-jobs -1"
+test-cache-all-summary = "cd tests && python refactor/cache_manager.py summary"
+test-cache-all-clear = "cd tests && python refactor/cache_manager.py clear"
+test-run-all-cached = "pytest -rs tests/refactor/tests/ -n 6 --cov=pyfixest --cov-report=xml"
+test-cached-workflow-all = { depends-on = ["test-cache-all-results", "test-run-all-cached"] }
+
+# FEOLS-specific tasks (legacy, use unified tasks above)
+test-generate-params = "cd tests && python -c \"from refactor.config.feols.test_generator import generate_feols_test_cases; cases = generate_feols_test_cases(); print(f'Generated {len(cases)} test cases')\""
+test-cache-r-results = "cd tests && python refactor/manage_cache.py generate --n-jobs -1"
+test-run-cached = "pytest -rs tests/refactor/tests/test_feols_cached.py -n 4 --cov=pyfixest --cov-report=xml"
+test-cache-summary = "cd tests && python refactor/manage_cache.py summary"
+test-cache-clear = "cd tests && python refactor/manage_cache.py clear"
+test-cached-workflow = { depends-on = ["test-generate-params", "test-cache-r-results", "test-run-cached"] }
+
 [tool.pixi.feature.lint.tasks]
 pre-commit = "pre-commit run --all-files"
 

diff --git a/tests/.coverage b/tests/.coverage
diff --git a/tests/README_FEOLS_MIGRATION.md b/tests/README_FEOLS_MIGRATION.md
@@ -0,0 +1,174 @@
+# FEOLS Test Migration Framework
+
+This document explains the new cached testing framework for migrating `test_single_fit_feols` to use cached R results.
+
+## Overview
+
+The framework consists of:
+
+1. **Abstract Base Classes** (`config/test_cases.py`) - Define test case structure
+2. **FEOLS Test Cases** (`config/feols_tests.py`) - `TestSingleFitFeols` implementation
+3. **Test Generator** (`config/feols_test_generator.py`) - Generates all parameter combinations
+4. **R Cache Runner** (`r_cache/r_test_runner.py`) - Runs R tests and caches results
+5. **Cached Tests** (`test_feols_cached.py`) - New parametrized tests using cached R results
+
+## Key Benefits
+
+- **Speed**: R tests run once, Python tests compare against cached results
+- **Reproducibility**: Hash-based caching ensures consistency
+- **Maintainability**: Clear separation between test configuration and execution
+- **Type Safety**: Abstract base classes with validation
+- **Human Readable**: JSON cache files can be inspected and debugged easily
+- **Language Agnostic**: R writes JSON directly, no Python dependency
+
+## Usage
+
+### Using Pixi Tasks (Recommended)
+
+```bash
+# 1. Generate test hyperparameters (shows how many test cases will be created)
+pixi run test-generate-params
+
+# 2. Run R scripts and cache results (time-intensive, run once)
+pixi run test-cache-r-results
+
+# 3. Run fast Python tests against cached results
+pixi run test-run-cached
+
+# 4. Complete workflow (all three steps above)
+pixi run test-cached-workflow
+
+# 5. Management tasks
+pixi run test-cache-summary    # Show cache status
+pixi run test-cache-clear      # Clear all cached results
+```
+
+### Manual Usage (Alternative)
+
+```bash
+# Generate all R results and cache them
+cd tests
+python manage_feols_cache.py generate
+
+# Check cache status
+python manage_feols_cache.py summary
+
+# Run all cached FEOLS tests
+pytest test_feols_cached.py -v
+
+# Run specific test cases
+pytest test_feols_cached.py::test_feols_vs_cached_r -k "feols_00001"
+
+# Clear cache
+python manage_feols_cache.py clear
+
+# Force refresh cache
+python manage_feols_cache.py generate --force
+```
+
+## Framework Structure
+
+### Test Case Definition
+
+```python
+# Example test case for test_single_fit_feols migration
+test_case = TestSingleFitFeols(
+    test_id="feols_001",
+    formula="Y~X1+X2",
+    inference="hetero",
+    weights="weights",
+    dropna=True,
+    f3_type="categorical",
+    demeaner_backend="numba"
+)
+```
+
+### Naming Convention
+
+Test case classes follow the pattern `Test{OriginalFunctionName}`:
+- `TestSingleFitFeols` → migrates `test_single_fit_feols`
+- `TestSingleFitFepois` → will migrate `test_single_fit_fepois`
+- `TestSingleFitIv` → will migrate `test_single_fit_iv`
+- etc.
+
+This naming helps track which original test function each test case class replaces.
+
+### Parameter Combinations
+
+The framework generates all parameter combinations from the original `test_single_fit_feols`:
+
+- **Formulas**: All OLS formulas + OLS-but-not-Poisson formulas
+- **Inference**: "iid", "hetero", {"CRV1": "group_id"}
+- **Weights**: None, "weights"
+- **Dropna**: False, True
+- **F3 Types**: "str", "object", "int", "categorical", "float"
+- **Backends**: "numba", "jax", "rust"
+
+### Validation
+
+Each test case validates its parameters:
+- JAX/Rust backends only support string f3_type
+- Cluster variables must exist in data
+- All numeric parameters must be positive
+
+## Migration Path
+
+1. **Phase 1** (Current): Framework setup with FEOLS tests
+2. **Phase 2**: Extend to FEPOIS, FEIV, DID, etc.
+3. **Phase 3**: Gradually replace original tests
+4. **Phase 4**: Remove old test infrastructure
+
+## Files Created
+
+```
+tests/
+├── config/
+│   ├── __init__.py
+│   ├── test_cases.py           # Abstract base classes
+│   ├── test_registry.py        # Test registry
+│   ├── feols_tests.py          # TestSingleFitFeols class
+│   └── feols_test_generator.py # Parameter combination generator
+├── r_cache/
+│   ├── __init__.py
+│   └── r_test_runner.py        # R test execution and caching
+├── test_feols_cached.py        # New cached tests
+├── manage_feols_cache.py       # Cache management script
+└── README_FEOLS_MIGRATION.md   # This file
+```
+
+## Example Workflow
+
+```python
+# 1. Generate test cases for test_single_fit_feols migration
+from config.feols_test_generator import generate_feols_test_cases
+test_cases = generate_feols_test_cases()
+print(f"Generated {len(test_cases)} TestSingleFitFeols test cases")
+
+# 2. Run R tests and cache (one-time)
+from r_cache.r_test_runner import FeolsRTestRunner
+runner = FeolsRTestRunner()
+results = runner.run_all_tests(test_cases)
+
+# 3. Run Python tests against cache (fast)
+pytest test_feols_cached.py
+```
+
+## Pixi Tasks Reference
+
+| Task | Description | Usage |
+|------|-------------|-------|
+| `test-generate-params` | Generate and count test hyperparameters | Shows how many test cases will be created |
+| `test-cache-r-results` | Run R scripts and cache all results | Time-intensive, run once or when parameters change |
+| `test-run-cached` | Run Python tests against cached R results | Fast execution, main testing command |
+| `test-cached-workflow` | Complete workflow (all three steps) | One-command setup for new environments |
+| `test-cache-summary` | Show cache status and statistics | Check what's cached and when |
+| `test-cache-clear` | Clear all cached R results | Clean slate for regeneration |
+
+## Next Steps
+
+1. Test the framework with a subset of cases: `pixi run test-generate-params`
+2. Generate initial cache: `pixi run test-cache-r-results`
+3. Run fast tests: `pixi run test-run-cached`
+4. Extend to other test types (FEPOIS, FEIV, etc.)
+5. Add more sophisticated caching strategies
+6. Integrate with CI/CD pipeline
diff --git a/tests/data/cached_results/feols_cache_summary.json b/tests/data/cached_results/feols_cache_summary.json
@@ -0,0 +1,7 @@
+{
+  "total_tests": 14,
+  "successful_tests": 14,
+  "failed_tests": 0,
+  "test_group": "feols",
+  "last_updated": "2025-09-28T14:37:08.595834"
+}
diff --git a/tests/refactor/README.md b/tests/refactor/README.md
@@ -0,0 +1,67 @@
+# Pyfixest Test Refactoring Framework
+
+This directory contains the refactored test framework for comparing pyfixest against R's fixest package.
+
+## Structure
+
+- `config/` - Test parameter definitions and generators
+  - `base/` - Shared base classes and utilities
+  - `feols/` - FEOLS-specific test configurations
+- `r_cache/` - R test execution and caching
+  - `base/` - Shared R execution utilities
+  - `feols/` - FEOLS-specific R scripts and runners
+- `tests/` - Python test files that compare against cached R results
+- `data/cached_results/` - Cached R test results organized by method
+- `manage_cache.py` - Unified cache management CLI
+
+## Migration Status
+
+- ✅ `test_single_fit_feols` - Completed
+- ✅ `test_single_fit_fepois` - Completed
+- ✅ `test_single_fit_iv` - Completed
+
+## Usage
+
+### Unified Commands (Recommended)
+
+```bash
+# List all available test methods
+pixi run test-cache-all-methods
+
+# Generate cached R results for all implemented methods
+pixi run test-cache-all-results
+
+# Run all cached tests
+pixi run test-run-all-cached
+
+# Show cache summary for all methods
+pixi run test-cache-all-summary
+
+# Complete workflow (generate + test)
+pixi run test-cached-workflow-all
+```
+
+### Method-Specific Commands
+
+```bash
+# Generate cache for specific method only
+cd tests && python refactor/cache_manager.py generate feols
+
+# Show summary for specific method
+cd tests && python refactor/cache_manager.py summary feols
+
+# Clear cache for specific method
+cd tests && python refactor/cache_manager.py clear feols
+```
+
+## Development
+
+```bash
+# Work on specific method
+pixi run test-cache-feols-results
+pixi run test-run-feols-cached
+
+# Cache management
+pixi run test-cache-all-summary
+pixi run test-cache-all-clear
+```
diff --git a/tests/refactor/__init__.py b/tests/refactor/__init__.py
@@ -0,0 +1 @@
+# Test refactoring framework for pyfixest