Skip to content

Commit

Permalink
Merge branch '3243-phmsa-tx-j' into phmsa-transmission-l
Browse files Browse the repository at this point in the history
  • Loading branch information
e-belfer authored Jan 23, 2024
2 parents 34e4747 + d380828 commit 6749ba2
Show file tree
Hide file tree
Showing 24 changed files with 2,280 additions and 1,651 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ jobs:
| sed -e 's/",*$//g' | sed -e 's/^.*"//g' | sort > datastore-dois.txt

- name: Restore Zenodo datastore from cache if possible
uses: actions/cache@v3
uses: actions/cache@v4
id: cache-zenodo-datastore
with:
path: ${{ env.PUDL_INPUT }}
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ repos:
# Formatters: hooks that re-write Python & documentation files
####################################################################################
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.13
rev: v0.1.14
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down
101 changes: 86 additions & 15 deletions devtools/debug-column-mapping.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,37 @@
"First, select the raw dataset you're going to be mapping and locate all relevant file directories."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pudl\n",
"from pudl.workspace.datastore import ZenodoDoiSettings\n",
"import os\n",
"import importlib\n",
"from pathlib import Path\n",
"import pandas as pd\n",
"from zipfile import ZipFile\n",
"import logging\n",
"import sys\n",
"import types\n",
"\n",
"import pudl\n",
"from pudl.workspace.datastore import ZenodoDoiSettings\n",
"from pudl.extract.phmsagas import Extractor\n",
"\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.INFO)\n",
"handler = logging.StreamHandler(stream=sys.stdout)\n",
"formatter = logging.Formatter('%(message)s')\n",
"handler.setFormatter(formatter)\n",
"logger.handlers = [handler]"
"logger = pudl.logging_helpers.get_logger(\"__name__\")"
]
},
{
Expand All @@ -50,8 +59,19 @@
"source": [
"dataset = \"phmsagas\"\n",
"doi_path = getattr(ZenodoDoiSettings(), dataset).replace(\"/\", \"-\")\n",
"data_path = os.path.join(os.getenv(\"PUDL_INPUT\"),dataset,doi_path) # Get path to raw data\n",
"map_path = os.path.join(Path(pudl.package_data.__file__).parents[0], dataset) # Get path to mapping CSVs"
"pudl_paths = pudl.workspace.setup.PudlPaths()\n",
"data_path = os.path.join(pudl_paths.pudl_input,dataset,doi_path) # Get path to raw data\n",
"map_path = os.path.join(Path(pudl.package_data.__file__).parents[0], dataset) # Get path to mapping CSVs\n",
"ds = pudl.workspace.datastore.Datastore(pudl_paths.pudl_input)"
]
},
{
"cell_type": "markdown",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## File Check"
]
},
{
Expand Down Expand Up @@ -107,6 +127,13 @@
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Column Map Check"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -139,7 +166,7 @@
" continue\n",
" return match[0]\n",
"\n",
"ds = pudl.workspace.datastore.Datastore()\n",
"\n",
"for page in file_map.index:\n",
" if not table_subset or page in table_subset:\n",
" column_maps = pd.read_csv(\n",
Expand Down Expand Up @@ -178,11 +205,55 @@
"source": [
"Go back and fix any incorrectly labelled columns. Then run the cell above again, until all columns are correctly labelled."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Extractor Check"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## SETTINGS FOR EXTRACTOR\n",
"extractor_phmsagas = Extractor(ds=ds)\n",
"\n",
"# recommend changing the loglevel here to warning to only get the baddies\n",
"pudl.logging_helpers.configure_root_logger(loglevel=\"WARNING\")\n",
"\n",
"# IF you wanna restrict the years\n",
"working_years = list(range(1990,2023))\n",
"# IF you want to restrict the pages to extract here is a lil way to do that\n",
"# you give pages_you_want_to_extract a lil of pages you want to extract\n",
"# if pages_you_want_to_extract if nothing, you'll get the standard pages\n",
"pages_you_want_to_extract = []\n",
"all_pages = extractor_phmsagas._metadata.get_all_pages()\n",
"def _new_page_getter(self):\n",
" if pages_you_want_to_extract:\n",
" return pages_you_want_to_extract\n",
" else:\n",
" return all_pages\n",
"extractor_phmsagas._metadata.get_all_pages = types.MethodType(_new_page_getter, extractor_phmsagas)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## RUN THE EXTRACTOR\n",
"extracted_dfs = extractor_phmsagas.extract(year=working_years)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "pudl-dev",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -196,9 +267,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
13 changes: 11 additions & 2 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,17 @@ Data Coverage
CEMS instead of the annual files. Integrates CEMS through 2023Q3. See issue
:issue:`2973` & PR :pr:`3096`.
* Began integration of PHMSA gas distribution and transmission tables into PUDL,
extracting raw data from 1990-present. See epic :issue:`2848`, and PRs :pr:`2932`,
:pr:`3242`, :pr:`3254`.
extracting raw data from 1990-present. See epic :issue:`2848`, and constituent PRs:

* :pr:`2932`
* :pr:`3242`
* :pr:`3254`
* :pr:`3260`
* :pr:`3262`
* :pr:`3266`
* :pr:`3267`
* :pr:`3269`

* Updated the EIA Bulk Electricity data archive so that the available data now to runs
through 2023-10-01. See :pr:`3252`. Also added this dataset to the set of data that
will automatically generate archives each month. See `This PUDL Archiver PR
Expand Down
Loading

0 comments on commit 6749ba2

Please sign in to comment.