diff --git a/.github/workflows/bot-auto-merge.yml b/.github/workflows/bot-auto-merge.yml index 5cc0e11880..56f5fbb6eb 100644 --- a/.github/workflows/bot-auto-merge.yml +++ b/.github/workflows/bot-auto-merge.yml @@ -4,25 +4,31 @@ name: bot-auto-merge on: workflow_run: types: [completed] - workflows: [tox-pytest] + workflows: ["tox-pytest"] jobs: bot-auto-merge: name: Auto-merge passing bot PRs runs-on: ubuntu-latest steps: + - name: Impersonate auto merge PR bot + uses: tibdex/github-app-token@v1 + id: generate-token + with: + app_id: ${{ secrets.BOT_AUTO_MERGE_PRS_APP_ID }} + private_key: ${{ secrets.BOT_AUTO_MERGE_PRS_APP_KEY }} - name: Auto-merge passing dependabot PRs if: ${{ github.event.workflow_run.conclusion == 'success' }} uses: ridedott/merge-me-action@v2 with: - # For clarity only. dependabot is default. + # For clarity only. dependabot is default login. GITHUB_LOGIN: dependabot - GITHUB_TOKEN: ${{ secrets.BOT_AUTO_MERGE_TOKEN }} + GITHUB_TOKEN: ${{ steps.generate-token.outputs.token }} ENABLED_FOR_MANUAL_CHANGES: "true" - name: Auto-merge passing pre-commit-ci PRs if: ${{ github.event.workflow_run.conclusion == 'success' }} uses: ridedott/merge-me-action@v2 with: GITHUB_LOGIN: pre-commit-ci - GITHUB_TOKEN: ${{ secrets.BOT_AUTO_MERGE_TOKEN }} + GITHUB_TOKEN: ${{ steps.generate-token.outputs.token }} ENABLED_FOR_MANUAL_CHANGES: "true" diff --git a/README.rst b/README.rst index 887d366dd7..97cf0b9ccd 100644 --- a/README.rst +++ b/README.rst @@ -69,7 +69,7 @@ PUDL currently integrates data from: * `EIA Form 861 `__: 2001-2021 * `EIA Form 923 `__: 2001-2021 * `EPA Continuous Emissions Monitoring System (CEMS) `__: 1995-2021 -* `FERC Form 1 `__: 1994-2020 +* `FERC Form 1 `__: 1994-2021 * `FERC Form 714 `__: 2006-2020 * `US Census Demographic Profile 1 Geodatabase `__: 2010 diff --git a/devtools/data-release.sh b/devtools/data-release.sh new file mode 100755 index 0000000000..83db1086a8 --- /dev/null +++ b/devtools/data-release.sh @@ -0,0 +1,74 @@ +#!/bin/sh +# A script to compile a Dockerized data release based on PUDL nightly build outputs. + +# Positional arguments: + +# First command line argument is the PUDL nightly build tag / ref. This indicates what +# build outputs to use. E.g. "dev" or "v2022.11.30" +PUDL_REF=$1 + +# Docker tag to use in the archive, e.g. "latest" or "2022.11.30". Will be used to +# pull the docker image using catalystcoop/pudl-jupyter:$DOCKER_TAG +DOCKER_TAG=$2 + +# Path to a local directory where the archive will be assembled. Should be in a place +# with at least 20GB of disk space. +# E.g. "./pudl-v2022.11.30" +RELEASE_DIR=$3 + +# Construct the GCS URL: +GCS_ROOT="gs://intake.catalyst.coop" +GCS_URL="$GCS_ROOT/$PUDL_REF" + +# Construct the Docker image name +DOCKER_REPO="catalystcoop" +DOCKER_NAME="pudl-jupyter" +DOCKER_IMAGE="$DOCKER_REPO/$DOCKER_NAME:$DOCKER_TAG" + +echo "Started:" `date` +# Start with a clean slate: +rm -rf $RELEASE_DIR +mkdir -p $RELEASE_DIR +# The release container / environment is based on the pudl-examples repo: +git clone --depth 1 git@github.com:catalyst-cooperative/pudl-examples.git $RELEASE_DIR +rm -rf $RELEASE_DIR/.git* +# These directories are where the data will go. They're integrated with the +# Docker container that's defined in the pudl-examples repo: +mkdir -p $RELEASE_DIR/pudl_data +mkdir -p $RELEASE_DIR/user_data + +# Make sure we have the specified version of the Docker container: +docker pull $DOCKER_IMAGE +# Freeze the version of the Docker container: +cat $RELEASE_DIR/docker-compose.yml | sed -e "s/$DOCKER_NAME:latest/$DOCKER_NAME:$DOCKER_TAG/" > $RELEASE_DIR/new-docker-compose.yml +mv $RELEASE_DIR/new-docker-compose.yml $RELEASE_DIR/docker-compose.yml +# Set up a skeleton PUDL environment in the release dir: +pudl_setup $RELEASE_DIR/pudl_data + +# These are probably outdated now... see if they fail. +rm -rf $RELEASE_DIR/pudl_data/environment.yml +rm -rf $RELEASE_DIR/pudl_data/notebook +rm -rf $RELEASE_DIR/pudl_data/settings + +# Copy over all of the pre-processed data +echo "Copying SQLite databases..." +mkdir -p $RELEASE_DIR/pudl_data/sqlite/ +gsutil -m cp "$GCS_URL/*.sqlite" "$GCS_URL/ferc*_xbrl_*.json" $RELEASE_DIR/pudl_data/sqlite/ + +echo "Copying Parquet datasets..." +mkdir -p $RELEASE_DIR/pudl_data/parquet/epacems +gsutil -m cp -r "$GCS_URL/hourly_emissions_epacems/*" $RELEASE_DIR/pudl_data/parquet/epacems + +# Save the Docker image as a tarball so it can be archived with the data: +echo "Saving Docker image: $DOCKER_IMAGE" +docker save $DOCKER_IMAGE -o $RELEASE_DIR/pudl-jupyter.tar + +# List the high-level contents of the archive so we can see what it contains: +echo "Archive contents:" +find $RELEASE_DIR -maxdepth 3 + +# Create the archive +echo "Creating the archive tarball..." +tar -czf $RELEASE_DIR.tgz $RELEASE_DIR + +echo "Finished:" `date` diff --git a/devtools/databeta.sh b/devtools/databeta.sh deleted file mode 100755 index 105c94b112..0000000000 --- a/devtools/databeta.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/sh -# A script to compile a Dockerized data release based on a user's local PUDL -# data environment. - -# Name of the directory to create the data release archive in -RELEASE_DIR=pudl-v0.6.0-2022-03-12 -# The PUDL working directory where we'll find the data to archive: -PUDL_IN=$HOME/code/catalyst/pudl-work -# Reference to an existing Docker image to pull -DOCKER_TAG="2022.03.12" - -echo "Started:" `date` -# Start with a clean slate: -rm -rf $RELEASE_DIR -mkdir -p $RELEASE_DIR -# The release container / environment is based on the pudl-examples repo: -git clone --depth 1 git@github.com:catalyst-cooperative/pudl-examples.git $RELEASE_DIR -rm -rf $RELEASE_DIR/.git* -# These directories are where the data will go. They're integrated with the -# Docker container that's defined in the pudl-examples repo: -mkdir -p $RELEASE_DIR/pudl_data -mkdir -p $RELEASE_DIR/user_data - -# Make sure we have the specified version of the Docker container: -docker pull catalystcoop/pudl-jupyter:$DOCKER_TAG -# Freeze the version of the Docker container: -cat $RELEASE_DIR/docker-compose.yml | sed -e "s/pudl-jupyter:latest/pudl-jupyter:$DOCKER_TAG/" > $RELEASE_DIR/new-docker-compose.yml -mv $RELEASE_DIR/new-docker-compose.yml $RELEASE_DIR/docker-compose.yml -# Set up a skeleton PUDL environment in the release dir: -pudl_setup $RELEASE_DIR/pudl_data - -# These are probably outdated now... see if they fail. -rm -rf $RELEASE_DIR/pudl_data/environment.yml -rm -rf $RELEASE_DIR/pudl_data/notebook - -# Copy over all of the pre-processed data -echo "Copying SQLite databases..." -cp -v $PUDL_IN/sqlite/ferc1.sqlite $RELEASE_DIR/pudl_data/sqlite/ -cp -v $PUDL_IN/sqlite/pudl.sqlite $RELEASE_DIR/pudl_data/sqlite/ -cp -v $PUDL_IN/sqlite/censusdp1tract.sqlite $RELEASE_DIR/pudl_data/sqlite/ -echo "Copying Parquet datasets..." -cp -r $PUDL_IN/parquet/epacems $RELEASE_DIR/pudl_data/parquet/ - -# Save the Docker image as a tarball so it can be archived with the data: -docker save catalystcoop/pudl-jupyter:$DOCKER_TAG -o $RELEASE_DIR/pudl-jupyter.tar - -# List the high-level contents of the archive so we can see what it contains: -find $RELEASE_DIR -maxdepth 3 - -# Create the archive -tar -czf $RELEASE_DIR.tgz $RELEASE_DIR - -echo "Finished:" `date` diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 41a9c78cca..7ebfc5dbf1 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -2,10 +2,10 @@ PUDL Release Notes ======================================================================================= -.. _release-v2022.11.XX: +.. _release-v2022.11.30: --------------------------------------------------------------------------------------- -2022.11.XX +v2022.11.30 --------------------------------------------------------------------------------------- Data Coverage diff --git a/pyproject.toml b/pyproject.toml index 8a87236295..570d04cd67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,7 @@ requires = [ "setuptools<66", "setuptools_scm[toml]>=3.5.0", + "wheel", ] build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index 69d7b47045..1ff4b2c44b 100644 --- a/setup.py +++ b/setup.py @@ -47,9 +47,9 @@ python_requires=">=3.10,<3.11", setup_requires=["setuptools_scm"], install_requires=[ - "addfips>=0.3.1,<0.4.0", + "addfips>=0.4,<0.5", "catalystcoop.dbfread>=3.0,<3.1", - "catalystcoop.ferc-xbrl-extractor==0.5.0", + "catalystcoop.ferc-xbrl-extractor==0.6.1", "coloredlogs>=15.0,<15.1", "dask>=2021.8,<2022.11.2", "datapackage>=1.11,<1.16", # Transition datastore to use frictionless. @@ -68,9 +68,8 @@ "pygeos>=0.10,<0.14", "pyyaml>=5,<6.1", "scikit-learn>=1.0,<1.2", - "Shapely!=1.8.3", # Seems to have a bug or incompatibility "scipy>=1.6,<1.10", - "Shapely!=1.8.3", # Bug or incompatibility in upstream dependencies + "Shapely>1.8.0,!=1.8.3,<2.1", # Incompatibility in 1.8.3 "sqlalchemy>=1.4,<1.4.45", "timezonefinder>=5,<6.2", "xlsxwriter>=3,<3.1", @@ -115,7 +114,7 @@ "pytest-mock>=3.0,<3.11", "responses>=0.14,<0.23", "rstcheck[sphinx]>=5.0,<6.2", - "tox>=3.20,<3.28", + "tox>=3.20,<4.0.0", ], "datasette": [ "datasette>=0.60,<0.64", diff --git a/src/pudl/analysis/spatial.py b/src/pudl/analysis/spatial.py index d72cf99625..40c39fec1c 100644 --- a/src/pudl/analysis/spatial.py +++ b/src/pudl/analysis/spatial.py @@ -41,7 +41,7 @@ def check_gdf(gdf: gpd.GeoDataFrame) -> None: raise ValueError("Geometry contains (Multi)Polygon geometries with zero area") is_mpoly = gdf.geometry.geom_type == "MultiPolygon" for mpoly in gdf.geometry[is_mpoly]: - for poly in mpoly: + for poly in mpoly.geoms: if not poly.area: raise ValueError( "MultiPolygon contains Polygon geometries with zero area" @@ -63,7 +63,7 @@ def polygonize(geom: BaseGeometry) -> Polygon | MultiPolygon: polys = [] # Explode geometries to polygons if isinstance(geom, GeometryCollection): - for g in geom: + for g in geom.geoms: if isinstance(g, Polygon): polys.append(g) elif isinstance(g, MultiPolygon):