Skip to content

Commit

Permalink
Merge pull request #2098 from catalyst-cooperative/v2022.11.30
Browse files Browse the repository at this point in the history
Packaging & release note tweaks for PUDL v2022.11.30
  • Loading branch information
zaneselvans authored Dec 13, 2022
2 parents 2ef87d4 + a1a3958 commit e46873c
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 67 deletions.
14 changes: 10 additions & 4 deletions .github/workflows/bot-auto-merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,31 @@ name: bot-auto-merge
on:
workflow_run:
types: [completed]
workflows: [tox-pytest]
workflows: ["tox-pytest"]

jobs:
bot-auto-merge:
name: Auto-merge passing bot PRs
runs-on: ubuntu-latest
steps:
- name: Impersonate auto merge PR bot
uses: tibdex/github-app-token@v1
id: generate-token
with:
app_id: ${{ secrets.BOT_AUTO_MERGE_PRS_APP_ID }}
private_key: ${{ secrets.BOT_AUTO_MERGE_PRS_APP_KEY }}
- name: Auto-merge passing dependabot PRs
if: ${{ github.event.workflow_run.conclusion == 'success' }}
uses: ridedott/merge-me-action@v2
with:
# For clarity only. dependabot is default.
# For clarity only. dependabot is default login.
GITHUB_LOGIN: dependabot
GITHUB_TOKEN: ${{ secrets.BOT_AUTO_MERGE_TOKEN }}
GITHUB_TOKEN: ${{ steps.generate-token.outputs.token }}
ENABLED_FOR_MANUAL_CHANGES: "true"
- name: Auto-merge passing pre-commit-ci PRs
if: ${{ github.event.workflow_run.conclusion == 'success' }}
uses: ridedott/merge-me-action@v2
with:
GITHUB_LOGIN: pre-commit-ci
GITHUB_TOKEN: ${{ secrets.BOT_AUTO_MERGE_TOKEN }}
GITHUB_TOKEN: ${{ steps.generate-token.outputs.token }}
ENABLED_FOR_MANUAL_CHANGES: "true"
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ PUDL currently integrates data from:
* `EIA Form 861 <https://www.eia.gov/electricity/data/eia861/>`__: 2001-2021
* `EIA Form 923 <https://www.eia.gov/electricity/data/eia923/>`__: 2001-2021
* `EPA Continuous Emissions Monitoring System (CEMS) <https://campd.epa.gov/>`__: 1995-2021
* `FERC Form 1 <https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-1-electric-utility-annual>`__: 1994-2020
* `FERC Form 1 <https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-1-electric-utility-annual>`__: 1994-2021
* `FERC Form 714 <https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-no-714-annual-electric/data>`__: 2006-2020
* `US Census Demographic Profile 1 Geodatabase <https://www.census.gov/geographies/mapping-files/2010/geo/tiger-data.html>`__: 2010

Expand Down
74 changes: 74 additions & 0 deletions devtools/data-release.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/bin/sh
# A script to compile a Dockerized data release based on PUDL nightly build outputs.

# Positional arguments:

# First command line argument is the PUDL nightly build tag / ref. This indicates what
# build outputs to use. E.g. "dev" or "v2022.11.30"
PUDL_REF=$1

# Docker tag to use in the archive, e.g. "latest" or "2022.11.30". Will be used to
# pull the docker image using catalystcoop/pudl-jupyter:$DOCKER_TAG
DOCKER_TAG=$2

# Path to a local directory where the archive will be assembled. Should be in a place
# with at least 20GB of disk space.
# E.g. "./pudl-v2022.11.30"
RELEASE_DIR=$3

# Construct the GCS URL:
GCS_ROOT="gs://intake.catalyst.coop"
GCS_URL="$GCS_ROOT/$PUDL_REF"

# Construct the Docker image name
DOCKER_REPO="catalystcoop"
DOCKER_NAME="pudl-jupyter"
DOCKER_IMAGE="$DOCKER_REPO/$DOCKER_NAME:$DOCKER_TAG"

echo "Started:" `date`
# Start with a clean slate:
rm -rf $RELEASE_DIR
mkdir -p $RELEASE_DIR
# The release container / environment is based on the pudl-examples repo:
git clone --depth 1 [email protected]:catalyst-cooperative/pudl-examples.git $RELEASE_DIR
rm -rf $RELEASE_DIR/.git*
# These directories are where the data will go. They're integrated with the
# Docker container that's defined in the pudl-examples repo:
mkdir -p $RELEASE_DIR/pudl_data
mkdir -p $RELEASE_DIR/user_data

# Make sure we have the specified version of the Docker container:
docker pull $DOCKER_IMAGE
# Freeze the version of the Docker container:
cat $RELEASE_DIR/docker-compose.yml | sed -e "s/$DOCKER_NAME:latest/$DOCKER_NAME:$DOCKER_TAG/" > $RELEASE_DIR/new-docker-compose.yml
mv $RELEASE_DIR/new-docker-compose.yml $RELEASE_DIR/docker-compose.yml
# Set up a skeleton PUDL environment in the release dir:
pudl_setup $RELEASE_DIR/pudl_data

# These are probably outdated now... see if they fail.
rm -rf $RELEASE_DIR/pudl_data/environment.yml
rm -rf $RELEASE_DIR/pudl_data/notebook
rm -rf $RELEASE_DIR/pudl_data/settings

# Copy over all of the pre-processed data
echo "Copying SQLite databases..."
mkdir -p $RELEASE_DIR/pudl_data/sqlite/
gsutil -m cp "$GCS_URL/*.sqlite" "$GCS_URL/ferc*_xbrl_*.json" $RELEASE_DIR/pudl_data/sqlite/

echo "Copying Parquet datasets..."
mkdir -p $RELEASE_DIR/pudl_data/parquet/epacems
gsutil -m cp -r "$GCS_URL/hourly_emissions_epacems/*" $RELEASE_DIR/pudl_data/parquet/epacems

# Save the Docker image as a tarball so it can be archived with the data:
echo "Saving Docker image: $DOCKER_IMAGE"
docker save $DOCKER_IMAGE -o $RELEASE_DIR/pudl-jupyter.tar

# List the high-level contents of the archive so we can see what it contains:
echo "Archive contents:"
find $RELEASE_DIR -maxdepth 3

# Create the archive
echo "Creating the archive tarball..."
tar -czf $RELEASE_DIR.tgz $RELEASE_DIR

echo "Finished:" `date`
53 changes: 0 additions & 53 deletions devtools/databeta.sh

This file was deleted.

4 changes: 2 additions & 2 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
PUDL Release Notes
=======================================================================================

.. _release-v2022.11.XX:
.. _release-v2022.11.30:

---------------------------------------------------------------------------------------
2022.11.XX
v2022.11.30
---------------------------------------------------------------------------------------

Data Coverage
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
requires = [
"setuptools<66",
"setuptools_scm[toml]>=3.5.0",
"wheel",
]
build-backend = "setuptools.build_meta"

Expand Down
9 changes: 4 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@
python_requires=">=3.10,<3.11",
setup_requires=["setuptools_scm"],
install_requires=[
"addfips>=0.3.1,<0.4.0",
"addfips>=0.4,<0.5",
"catalystcoop.dbfread>=3.0,<3.1",
"catalystcoop.ferc-xbrl-extractor==0.5.0",
"catalystcoop.ferc-xbrl-extractor==0.6.1",
"coloredlogs>=15.0,<15.1",
"dask>=2021.8,<2022.11.2",
"datapackage>=1.11,<1.16", # Transition datastore to use frictionless.
Expand All @@ -68,9 +68,8 @@
"pygeos>=0.10,<0.14",
"pyyaml>=5,<6.1",
"scikit-learn>=1.0,<1.2",
"Shapely!=1.8.3", # Seems to have a bug or incompatibility
"scipy>=1.6,<1.10",
"Shapely!=1.8.3", # Bug or incompatibility in upstream dependencies
"Shapely>1.8.0,!=1.8.3,<2.1", # Incompatibility in 1.8.3
"sqlalchemy>=1.4,<1.4.45",
"timezonefinder>=5,<6.2",
"xlsxwriter>=3,<3.1",
Expand Down Expand Up @@ -115,7 +114,7 @@
"pytest-mock>=3.0,<3.11",
"responses>=0.14,<0.23",
"rstcheck[sphinx]>=5.0,<6.2",
"tox>=3.20,<3.28",
"tox>=3.20,<4.0.0",
],
"datasette": [
"datasette>=0.60,<0.64",
Expand Down
4 changes: 2 additions & 2 deletions src/pudl/analysis/spatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def check_gdf(gdf: gpd.GeoDataFrame) -> None:
raise ValueError("Geometry contains (Multi)Polygon geometries with zero area")
is_mpoly = gdf.geometry.geom_type == "MultiPolygon"
for mpoly in gdf.geometry[is_mpoly]:
for poly in mpoly:
for poly in mpoly.geoms:
if not poly.area:
raise ValueError(
"MultiPolygon contains Polygon geometries with zero area"
Expand All @@ -63,7 +63,7 @@ def polygonize(geom: BaseGeometry) -> Polygon | MultiPolygon:
polys = []
# Explode geometries to polygons
if isinstance(geom, GeometryCollection):
for g in geom:
for g in geom.geoms:
if isinstance(g, Polygon):
polys.append(g)
elif isinstance(g, MultiPolygon):
Expand Down

0 comments on commit e46873c

Please sign in to comment.