diff --git a/docker/Dockerfile b/docker/Dockerfile index 2d763ed4e5..d1a1d86f83 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM mambaorg/micromamba:1.5.6 +FROM mambaorg/micromamba:1.5.7 ENV PGDATA=${CONTAINER_HOME}/pgdata diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 4932a5599b..b6cbd705fe 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -14,6 +14,11 @@ New Data Coverage :ref:`i_core_eia860__fgd_equipment`. Once harvested, these tables will eventually be removed from the database, but they are being published until then. See :issue:`3394` and :issue:`3392`, and :pr:`3403`. +* Added a new ``gridpathratk`` data source containing hourly wind and solar generation + profiles from the `GridPath Resoure Adequacy Toolkit + `__. See the `new Zenodo archive + `__, PR :pr:`3489` and `this PUDL archiver issue + `__. Data Cleaning ^^^^^^^^^^^^^ diff --git a/src/pudl/metadata/sources.py b/src/pudl/metadata/sources.py index 51264ffa70..37f9c6604c 100644 --- a/src/pudl/metadata/sources.py +++ b/src/pudl/metadata/sources.py @@ -676,6 +676,45 @@ "license_raw": LICENSES["us-govt"], "license_pudl": LICENSES["cc-by-4.0"], }, + "gridpathratk": { + "title": "PUDL Raw GridPath Resource Adequacy Toolkit Renewable Generation Profiles", + "path": "https://gridlab.org/gridpathratoolkit/", + "description": ( + "Hourly renewable generation profiles compiled for the Western United " + "States as part of the GridPath Resource Adequacy Toolkit. Profiles are " + "stated as a capacity factor (a fraction of nameplate capacity). There are " + "3 different levels of processing or aggregation provided, all at hourly " + "resolution: Individual plant (wind) or generator (solar) output, " + "capacity-weighted averages of wind and solar output aggregated to the " + "level of balancing authority territories (or transmission zones for " + "larger balancing authorities), and that same aggregated output but with " + "some problematic individual generator profiles modified such that they " + "match the overall production curve of the balancing authority they are " + "within. This data also contains some daily weather data from several " + "sites across the western US and tables describing the way in which " + "individual wind and solar projects were aggregated up to the level of " + "balancing authority or transmission zone." + ), + "keywords": sorted( + { + "solar", + "wind", + "time series", + "energy", + "electricity", + "generation", + "weather", + "capacity factor", + "hourly", + "united states", + "usa", + "resource adequacy", + "gridpath", + } + ), + "license_raw": LICENSES["cc-by-4.0"], + "license_pudl": LICENSES["cc-by-4.0"], + }, "mshamines": { "title": "Mine Safety and Health Administration (MSHA) Mines", "path": "https://arlweb.msha.gov/OpenGovernmentData/OGIMSHA.asp", diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index 5772bf5939..f850f9b2c2 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -60,8 +60,12 @@ def __init__(self, datapackage_json: dict, dataset: str, doi: ZenodoDoi): def get_resource_path(self, name: str) -> str: """Returns zenodo url that holds contents of given named resource.""" res = self._get_resource_metadata(name) - # remote_url is sometimes set on the local cached version of datapackage.json - # so we should be using that if it exists. + # In older cached archives, "remote_url" was used to refer to the original path + # to the file, while the canonical "path" field was updated by the datastore + # to refer to the local path to the associated file relative to the location of + # datapackage.json. This behavior is deprecated and no longer used, but we need + # to retain this logic to support older cached archives, e.g. censusdp1tract + # which hasn't changed since 2020. resource_path = res.get("remote_url") or res.get("path") parsed_path = urlparse(resource_path) if parsed_path.path.startswith("/api/files"): @@ -122,10 +126,10 @@ def get_resources( """Returns series of PudlResourceKey identifiers for matching resources. Args: - name: if specified, find resource(s) with this name. - filters (dict): if specified, find resoure(s) matching these key=value constraints. - The constraints are matched against the 'parts' field of the resource - entry in the datapackage.json. + name: if specified, find resource(s) with this name. + filters (dict): if specified, find resource(s) matching these key=value + constraints. The constraints are matched against the 'parts' field of + the resource entry in the datapackage.json. """ for res in self.datapackage_json["resources"]: if name and res["name"] != name: @@ -201,6 +205,7 @@ class ZenodoDoiSettings(BaseSettings): ferc6: ZenodoDoi = "10.5281/zenodo.8326696" ferc60: ZenodoDoi = "10.5281/zenodo.8326695" ferc714: ZenodoDoi = "10.5281/zenodo.8326694" + gridpathratk: ZenodoDoi = "10.5281/zenodo.10844662" phmsagas: ZenodoDoi = "10.5281/zenodo.10493790" model_config = SettingsConfigDict(env_prefix="pudl_zenodo_doi_", env_file=".env")