Skip to content

Commit

Permalink
Move tox.ini and update-lockfile logic to Makefile
Browse files Browse the repository at this point in the history
- Remove deprecated API_KEY_EIA envvar
- Add .gitignore in new environments directory
- Create Makefile and migrat tox.ini logic into it
- Replace spaces in Makefile with tabs
- Install pudl via pip install --no-deps to avoid contaminating
  the locked environment
- Move pip install and coverage XML logic from GHA workflow into
  the Makefile
- Increase the minimum versions of some dependencies.
- Move update-lockfile GHA workflow logic into Makefile
- Attempt to run slow tests in parallel using "wait" to prevent
  premature exit of the shell
  • Loading branch information
zaneselvans committed Oct 27, 2023
1 parent 3b39a36 commit 9767c39
Show file tree
Hide file tree
Showing 19 changed files with 8,923 additions and 3,577 deletions.
30 changes: 13 additions & 17 deletions .github/workflows/tox-pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,15 @@ on:
- ready_for_review

env:
PUDL_OUTPUT: /home/runner/pudl-work/output
PUDL_INPUT: /home/runner/pudl-work/data/
PUDL_OUTPUT: /home/runner/pudl-work/output/
PUDL_INPUT: /home/runner/pudl-work/input/
DAGSTER_HOME: /home/runner/pudl-work/dagster_home/

jobs:
ci-static:
ci-docs:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
tox-env:
- linters
- docs
defaults:
run:
shell: bash -l {0}
Expand All @@ -32,7 +28,7 @@ jobs:
with:
fetch-depth: 2

- name: Install Conda environment using mamba
- name: Install conda-lock environment with micromamba
uses: mamba-org/setup-micromamba@v1
with:
environment-file: environments/conda-lock.yml
Expand All @@ -48,13 +44,12 @@ jobs:
conda config --show
printenv | sort
- name: Build ${{ matrix.tox-env}} with Tox
- name: Lint and build PUDL documentation with Sphinx
run: |
tox -e ${{ matrix.tox-env }}
make github-docs-build
- name: Upload coverage
uses: actions/upload-artifact@v3
if: ${{ matrix.tox-env == 'docs' }}
with:
name: coverage-docs
path: coverage.xml
Expand All @@ -72,7 +67,7 @@ jobs:
with:
fetch-depth: 2

- name: Install Conda environment using mamba
- name: Install conda-lock environment with micromamba
uses: mamba-org/setup-micromamba@v1
with:
environment-file: environments/conda-lock.yml
Expand All @@ -93,9 +88,9 @@ jobs:
which sqlite3
sqlite3 --version
- name: Run unit tests with Tox
- name: Run PUDL unit tests and collect test coverage
run: |
tox -e unit -- --durations 0
make github-pytest-unit
- name: Upload coverage
uses: actions/upload-artifact@v3
Expand Down Expand Up @@ -171,7 +166,7 @@ jobs:

- name: Run integration tests, trying to use GCS cache if possible
run: |
tox -e integration -- --gcs-cache-path=gs://zenodo-cache.catalyst.coop --durations 0
make github-pytest-integration
- name: Upload coverage
uses: actions/upload-artifact@v3
Expand All @@ -185,13 +180,13 @@ jobs:
ci-coverage:
runs-on: ubuntu-latest
needs:
- ci-docs
- ci-unit
- ci-integration
- ci-static
steps:
- uses: actions/checkout@v4
- name: Download coverage
id: download-unit
id: download-coverage
uses: actions/download-artifact@v3
with:
path: coverage
Expand All @@ -207,6 +202,7 @@ jobs:
runs-on: ubuntu-latest
if: ${{ always() }}
needs:
- ci-docs
- ci-unit
- ci-integration
steps:
Expand Down
22 changes: 2 additions & 20 deletions .github/workflows/update-lockfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,26 +33,8 @@ jobs:
- name: Run conda-lock to recreate lockfile from scratch
run: |
cd environments
rm conda-lock.yml
conda-lock \
--micromamba \
--file=../pyproject.toml \
--lockfile=conda-lock.yml
conda-lock render \
--kind explicit \
--kind env \
--dev-dependencies \
--extras docs \
--extras datasette \
conda-lock.yml
conda-lock render \
--kind env \
--extras docs \
--platform linux-64 \
--filename-template "readthedocs-{platform}.conda.lock" \
conda-lock.yml
cd ..
make conda-lock
- name: Open a pull request
uses: peter-evans/create-pull-request@v5
with:
Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ build:

# Define the python environment using conda / mamba
conda:
environment: environments/readthedocs-linux-64.conda.lock.yml
environment: environments/conda-linux-64.lock.yml

# Build documentation in the docs/ directory with Sphinx
sphinx:
Expand Down
165 changes: 165 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
covargs := --append --source=src/pudl
gcs_cache_path := --gcs-cache-path=gs://zenodo-cache.catalyst.coop
pytest_covargs := --cov-append --cov=src/pudl --cov-report=xml
pytest_args := --durations 0 ${pytest_covargs} ${gcs_cache_path}
coverage_erase := coverage erase
coverage_report := coverage report --sort=cover
etl_fast_yml := src/pudl/package_data/settings/etl_fast.yml
etl_full_yml := src/pudl/package_data/settings/etl_full.yml
pip_install_pudl := pip install --no-deps --editable ./

########################################################################################
# Conda lockfile generation
########################################################################################

ifdef GITHUB_ACTION
mamba := micromamba
else
mamba := mamba
endif

# Regenerate the conda lockfile and render platform specific conda environments.
conda-lock:
rm -f environments/conda-lock.yml
conda-lock \
--${mamba} \
--file=pyproject.toml \
--lockfile=environments/conda-lock.yml
(cd environments && conda-lock render \
--kind env \
--dev-dependencies \
--extras docs \
--extras datasette \
conda-lock.yml)

########################################################################################
# Build documentation (for local use)
########################################################################################

docs-clean:
rm -rf docs/_build

docs-build: docs-clean
doc8 docs/ README.rst
sphinx-build -W -b html docs docs/_build/html

########################################################################################
# Generic pytest commands for local use, without test coverage
########################################################################################

pytest-unit:
pytest --doctest-modules src/pudl test/unit

pytest-integration:
pytest test/integration

pytest-validate:
pytest --live-dbs test/validate
pudl_check_fks

########################################################################################
# More complex pytest commands for local use that collect test coverage
########################################################################################

# Run unit & integration tests on 1-2 years of data and collect test coverage data.
local-pytest-ci: docs-clean
${coverage_erase}
doc8 docs/ README.rst
coverage run ${covargs} -- ${CONDA_PREFIX}/bin/sphinx-build -W -b html docs docs/_build/html
pytest ${pytest_args} --doctest-modules src/pudl test/unit
pytest ${pytest_args} --etl-settings ${etl_fast_yml} test/integration
${coverage_report}

# Run unit & integration tests on ALL years of data and collect test coverage data.
# NOTE: This will take 1+ hours to run and the PUDL DB will not be retained.
local-pytest-ci-all-years: docs-clean
${coverage_erase}
doc8 docs/ README.rst
coverage run ${covargs} -- ${CONDA_PREFIX}/bin/sphinx-build -W -b html docs docs/_build/html
pytest ${pytest_args} --doctest-modules src/pudl test/unit
pytest ${pytest_args} --etl-settings ${etl_full_yml} test/integration
${coverage_report}

# Run the full ETL, generating new FERC & PUDL SQLite DBs and EPA CEMS Parquet files.
# Then run the full integration tests and data validations on all years of data.
# NOTE: This will clobber your existing databases and takes hours to run!!!
# Backgrounding the data validation and integration tests and using wait allows them to
# run in parallel.
nuke: docs-clean
${coverage_erase}
doc8 docs/ README.rst
coverage run ${covargs} -- ${CONDA_PREFIX}/bin/sphinx-build -W -b html docs docs/_build/html
pytest ${pytest_args} --doctest-modules src/pudl test/unit
pytest ${pytest_args} \
--etl-settings ${etl_fast_yml} \
test/integration
rm -f tox-nuke.log
coverage run ${covargs} -- \
src/pudl/convert/ferc_to_sqlite.py \
--logfile tox-nuke.log \
--clobber \
${gcs_cache_path} \
${etl_full_yml}
coverage run ${covargs} -- \
src/pudl/cli/etl.py \
--logfile tox-nuke.log \
--clobber \
${gcs_cache_path} \
${etl_full_yml}
pudl_check_fks
pytest ${pytest_args} --live-dbs --etl-settings ${etl_full_yml} test/integration & \
pytest ${pytest_args} --live-dbs test/validate & \
wait
${coverage_report}

########################################################################################
# Some miscellaneous test cases
########################################################################################

# Check that designated Jupyter notebooks can be run against the current DB
pytest-jupyter:
pytest --live-dbs test/integration/jupyter_notebooks_test.py

# Compare actual and expected number of rows in many tables:
pytest-minmax-rows:
pytest --live-dbs \
test/validate/epacamd_eia_test.py::test_minmax_rows \
test/validate/ferc1_test.py::test_minmax_rows \
test/validate/eia_test.py::test_minmax_rows \
test/validate/mcoe_test.py::test_minmax_rows_mcoe

# Build the FERC 1 and PUDL DBs, ignoring foreign key constraints.
# Identify any plant or utility IDs in the DBs that haven't yet been mapped
# NOTE: This probably needs to be turned into a script of some kind not a test.
# In particular, building these DBs without checking FK constraints, in a location
# where they aren't going to clobber existing user DBs
unmapped-ids:
pytest \
--save-unmapped-ids \
--ignore-foreign-key-constraints \
--etl-settings ${etl_full_yml} \
test/integration/glue_test.py

########################################################################################
# The github- prefixed targets are meant to be run by GitHub Actions
########################################################################################

github-docs-build: docs-clean
${pip_install_pudl}
${coverage_erase}
doc8 docs/ README.rst
coverage run ${covargs} -- ${CONDA_PREFIX}/bin/sphinx-build -W -b html docs docs/_build/html
${coverage_report}
coverage xml

github-pytest-unit:
${pip_install_pudl}
${coverage_erase}
pytest ${pytest_args} --doctest-modules src/pudl test/unit
${coverage_report}

github-pytest-integration:
${pip_install_pudl}
${coverage_erase}
pytest ${pytest_args} test/integration
${coverage_report}
1 change: 1 addition & 0 deletions devtools/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
user-environment.yml
1 change: 0 additions & 1 deletion docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ services:
pudl-etl:
platform: linux/x86_64 # Need to specify x84 because M1 chips default to use ARM: https://stackoverflow.com/questions/68630526/lib64-ld-linux-x86-64-so-2-no-such-file-or-directory-error
environment:
- API_KEY_EIA
- GCP_BILLING_PROJECT
env_file:
- .env
Expand Down
24 changes: 15 additions & 9 deletions docker/gcp_pudl_etl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,29 @@ function authenticate_gcp() {

function run_pudl_etl() {
send_slack_msg ":large_yellow_circle: Deployment started for $ACTION_SHA-$GITHUB_REF :floppy_disk:"
authenticate_gcp \
&& alembic upgrade head \
&& pudl_setup \
&& ferc_to_sqlite \
authenticate_gcp && \
alembic upgrade head && \
pudl_setup && \
ferc_to_sqlite \
--loglevel=DEBUG \
--gcs-cache-path=gs://internal-zenodo-cache.catalyst.coop \
--workers=8 \
$PUDL_SETTINGS_YML \
&& pudl_etl \
$PUDL_SETTINGS_YML && \
pudl_etl \
--loglevel DEBUG \
--max-concurrent 6 \
--gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \
$PUDL_SETTINGS_YML \
&& pytest \
$PUDL_SETTINGS_YML && \
# Run multiple pytest processes in the background and wait for them to exit
pytest \
--gcs-cache-path=gs://internal-zenodo-cache.catalyst.coop \
--etl-settings=$PUDL_SETTINGS_YML \
--live-dbs test
--live-dbs test/integration test/unit & \
pytest \
--gcs-cache-path=gs://internal-zenodo-cache.catalyst.coop \
--etl-settings=$PUDL_SETTINGS_YML \
--live-dbs test/validate & \
wait
}

function shutdown_vm() {
Expand Down
Loading

0 comments on commit 9767c39

Please sign in to comment.