Skip to content

Commit

Permalink
Integrate 2023 EIA 860 Early Release (#3681)
Browse files Browse the repository at this point in the history
* extract: column maps

* add the new proposed tab to the assets

* fix column names & add new bools & add field descriptions

* pudl id mapping, removing null generator ids, light updates to mapping notebook

* add in the less than 5 MW plants into the pudl id mapping sheet

* validation updates and pr responses

* fix asset_checks

* clean up helper function

* Update src/pudl/helpers.py

Co-authored-by: E. Belfer <[email protected]>

---------

Co-authored-by: E. Belfer <[email protected]>
  • Loading branch information
cmgosnell and e-belfer authored Jun 21, 2024
1 parent 3b859b4 commit 056961b
Show file tree
Hide file tree
Showing 50 changed files with 1,649 additions and 1,220 deletions.
47 changes: 31 additions & 16 deletions devtools/debug-column-mapping.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,16 @@
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# this is occasionally required for running the extractor check down below.\n",
"# ! pip install xlrd"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -46,7 +56,6 @@
"\n",
"import pudl\n",
"from pudl.workspace.datastore import ZenodoDoiSettings\n",
"from pudl.extract.phmsagas import Extractor\n",
"\n",
"logger = pudl.logging_helpers.get_logger(\"__name__\")"
]
Expand All @@ -57,7 +66,7 @@
"metadata": {},
"outputs": [],
"source": [
"dataset = \"phmsagas\"\n",
"dataset = \"eia860\"\n",
"doi_path = getattr(ZenodoDoiSettings(), dataset).replace(\"/\", \"-\")\n",
"pudl_paths = pudl.workspace.setup.PudlPaths()\n",
"data_path = os.path.join(pudl_paths.pudl_input,dataset,doi_path) # Get path to raw data\n",
Expand All @@ -67,9 +76,7 @@
},
{
"cell_type": "markdown",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"metadata": {},
"source": [
"## File Check"
]
Expand Down Expand Up @@ -102,7 +109,7 @@
"\n",
"for table_files in file_map.values.tolist(): # For each table with a list of files\n",
" for file in table_files: # For each file included in this table\n",
" if file not in str(all_files): # Search the list of files for the file text, flag if not.\n",
" if (file not in str(all_files)) and (file != \"-1\"): # Search the list of files for the file text, flag if not.\n",
" logger.warning(f\"File '{file}' not found in actual raw data. Check file name.\")"
]
},
Expand Down Expand Up @@ -192,11 +199,9 @@
" raw_missing = [col for col in raw_columns if col not in mapped_columns.values]\n",
" mapped_missing = [col for col in mapped_columns if col not in raw_columns.values]\n",
" if raw_missing and raw_check:\n",
" logger.warning(f\"Raw columns {raw_missing} from {file} are not mapped.\")\n",
" logger.warning(f\"{page}: Raw columns {raw_missing} from {file} are not mapped.\")\n",
" if mapped_missing:\n",
" logger.warning(f\"Mapped columns {mapped_missing} do not exist in the raw data file {file}\")\n",
" \n",
" "
" logger.warning(f\"{page}: Mapped columns {mapped_missing} do not exist in the raw data file {file}\")"
]
},
{
Expand All @@ -213,31 +218,41 @@
"## Extractor Check"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pudl.extract.eia860 import Extractor"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## SETTINGS FOR EXTRACTOR\n",
"extractor_phmsagas = Extractor(ds=ds)\n",
"# adjust to your desired data source extractor in the cell above\n",
"extractor = Extractor(ds=ds)\n",
"\n",
"# recommend changing the loglevel here to warning to only get the baddies\n",
"pudl.logging_helpers.configure_root_logger(loglevel=\"WARNING\")\n",
"\n",
"# IF you wanna restrict the years\n",
"working_years = list(range(1990,2023))\n",
"working_years = list(range(2020,2024))\n",
"# IF you want to restrict the pages to extract here is a lil way to do that\n",
"# you give pages_you_want_to_extract a lil of pages you want to extract\n",
"# if pages_you_want_to_extract if nothing, you'll get the standard pages\n",
"pages_you_want_to_extract = []\n",
"all_pages = extractor_phmsagas._metadata.get_all_pages()\n",
"all_pages = extractor._metadata.get_all_pages()\n",
"def _new_page_getter(self):\n",
" if pages_you_want_to_extract:\n",
" return pages_you_want_to_extract\n",
" else:\n",
" return all_pages\n",
"extractor_phmsagas._metadata.get_all_pages = types.MethodType(_new_page_getter, extractor_phmsagas)"
"extractor._metadata.get_all_pages = types.MethodType(_new_page_getter, extractor)"
]
},
{
Expand All @@ -247,7 +262,7 @@
"outputs": [],
"source": [
"## RUN THE EXTRACTOR\n",
"extracted_dfs = extractor_phmsagas.extract(year=working_years)"
"extracted_dfs = extractor.extract(year=working_years)"
]
}
],
Expand All @@ -267,7 +282,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
"version": "3.12.3"
}
},
"nbformat": 4,
Expand Down
10 changes: 10 additions & 0 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ EIA AEO
fuel type, and include both nominal USD per MMBtu as well as real 2022 USD
per MMBtu. See issue :issue:`3649` and PR :pr:`3656`.

EIA 860
~~~~~~~

* Added EIA 860 early release data from 2023. This included adding a new tab with
proposed energy storage generators as well as adding a number of new columns
regarding energy storage and solar generators. See issue :issue:`3676` and PR
:pr:`3681`.



.. _release-v2024.5.0:

---------------------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""add 2023 EIA 860ER storage and solar columns
Revision ID: b9b6cb1a5405
Revises: da38a41d7f99
Create Date: 2024-06-20 10:00:37.339814
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'b9b6cb1a5405'
down_revision = 'da38a41d7f99'
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('core_eia860__scd_generators_energy_storage', schema=None) as batch_op:
batch_op.add_column(sa.Column('is_ac_coupled', sa.Boolean(), nullable=True, comment='Indicates if this energy storage device is AC-coupled (means the energy storage device and the PV system are not installed on the same side of an inverter).'))
batch_op.add_column(sa.Column('is_dc_coupled', sa.Boolean(), nullable=True, comment='Indicates if this energy storage device is DC-coupled (means the energy storage device and the PV system are on the same side of an inverter and the battery can still charge from the grid).'))
batch_op.add_column(sa.Column('id_dc_coupled_tightly', sa.Boolean(), nullable=True, comment='Indicates if this energy storage device is DC tightly coupled (means the energy storage device and the PV system are on the same side of an inverter and the battery cannot charge from the grid).'))
batch_op.add_column(sa.Column('is_independent', sa.Boolean(), nullable=True, comment='Indicates if this energy storage device is independent (not coupled with another generators)'))
batch_op.add_column(sa.Column('is_transmission_and_distribution_asset_support', sa.Boolean(), nullable=True, comment='Indicate if the energy storage system is intended to support a specific substation, transmission or distribution asset.'))
batch_op.add_column(sa.Column('is_direct_support', sa.Boolean(), nullable=True, comment='Indicates if this energy storage device is intended for dedicated generator firming or storing excess generation of other units.'))
batch_op.add_column(sa.Column('plant_id_eia_direct_support_1', sa.Float(), nullable=True, comment='The EIA Plant ID of the primary unit whose generation this energy storage device is intended to firm or store.'))
batch_op.add_column(sa.Column('generator_id_direct_support_1', sa.Text(), nullable=True, comment='The EIA Generator ID of the primary unit whose generation this energy storage device is intended to firm or store.'))
batch_op.add_column(sa.Column('plant_id_eia_direct_support_2', sa.Float(), nullable=True, comment='The EIA Plant ID of the secondary unit whose generation this energy storage device is intended to firm or store.'))
batch_op.add_column(sa.Column('generator_id_direct_support_2', sa.Text(), nullable=True, comment='The EIA Generator ID of the secondary unit whose generation this energy storage device is intended to firm or store.'))
batch_op.add_column(sa.Column('plant_id_eia_direct_support_3', sa.Float(), nullable=True, comment='The EIA Plant ID of the tertiary unit whose generation this energy storage device is intended to firm or store.'))
batch_op.add_column(sa.Column('generator_id_direct_support_3', sa.Text(), nullable=True, comment='The EIA Generator ID of the tertiary unit whose generation this energy storage device is intended to firm or store.'))

with op.batch_alter_table('core_eia860__scd_generators_solar', schema=None) as batch_op:
batch_op.add_column(sa.Column('uses_bifacial_panels', sa.Boolean(), nullable=True, comment='Indicates whether bifacial solar panels are used at this solar generating unit.'))

# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('core_eia860__scd_generators_solar', schema=None) as batch_op:
batch_op.drop_column('uses_bifacial_panels')

with op.batch_alter_table('core_eia860__scd_generators_energy_storage', schema=None) as batch_op:
batch_op.drop_column('generator_id_direct_support_3')
batch_op.drop_column('plant_id_eia_direct_support_3')
batch_op.drop_column('generator_id_direct_support_2')
batch_op.drop_column('plant_id_eia_direct_support_2')
batch_op.drop_column('generator_id_direct_support_1')
batch_op.drop_column('plant_id_eia_direct_support_1')
batch_op.drop_column('is_direct_support')
batch_op.drop_column('is_transmission_and_distribution_asset_support')
batch_op.drop_column('is_independent')
batch_op.drop_column('id_dc_coupled_tightly')
batch_op.drop_column('is_dc_coupled')
batch_op.drop_column('is_ac_coupled')

# ### end Alembic commands ###
1 change: 1 addition & 0 deletions src/pudl/extract/eia860.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def get_dtypes(page, **partition):
"raw_eia860__generator_proposed",
"raw_eia860__generator_retired",
"raw_eia860__generator_energy_storage_existing",
"raw_eia860__generator_energy_storage_proposed",
"raw_eia860__generator_energy_storage_retired",
"raw_eia860__generator_solar_existing",
"raw_eia860__generator_solar_retired",
Expand Down
22 changes: 22 additions & 0 deletions src/pudl/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1476,6 +1476,28 @@ def dedupe_and_drop_nas(
)


def drop_records_with_null_in_column(
df: pd.DataFrame, column: str, num_of_expected_nulls: int
) -> pd.DataFrame:
"""Drop a prescribed number of records with null values in a column.
Args:
df: table with column to check.
column: name of column with potential null values.
num_of_expected_nulls: the number of records with null values in the column
Raises:
AssertionError: If there are more nulls in the df then the
num_of_expected_nulls.
"""
# ensure there isn't more than the expected number of nulls before dropping
if len(null_records := df[df[column].isnull()]) > num_of_expected_nulls:
raise AssertionError(
f"Expected {num_of_expected_nulls} or less records with a null values {column} but found {null_records}"
)
return df.dropna(subset=[column])


def standardize_percentages_ratio(
frac_df: pd.DataFrame,
mixed_cols: list[str],
Expand Down
6 changes: 4 additions & 2 deletions src/pudl/metadata/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1648,8 +1648,10 @@ def enforce_schema(self, df: pd.DataFrame) -> pd.DataFrame:
raise ValueError(
f"{self.name} {len(dupes)}/{len(df)} duplicate primary keys ({pk=}) when enforcing schema."
)
if pk and df.loc[:, pk].isna().any(axis=None):
raise ValueError(f"{self.name} Null values found in primary key columns.")
if pk and not (nulls := df[df[pk].isna().any(axis=1)]).empty:
raise ValueError(
f"{self.name} Null values found in primary key columns.\n{nulls}"
)
return df

def aggregate_df(
Expand Down
89 changes: 89 additions & 0 deletions src/pudl/metadata/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -4825,6 +4825,95 @@
"disaggregated. See documentation for process: pudl.output.ferc1.disaggregate_null_or_total_tag"
),
},
"is_ac_coupled": {
"type": "boolean",
"description": (
"Indicates if this energy storage device is AC-coupled (means the energy storage device "
"and the PV system are not installed on the same side of an inverter)."
),
},
"is_dc_coupled": {
"type": "boolean",
"description": (
"Indicates if this energy storage device is DC-coupled (means the energy storage "
"device and the PV system are on the same side of an inverter and the battery can "
"still charge from the grid)."
),
},
"id_dc_coupled_tightly": {
"type": "boolean",
"description": (
"Indicates if this energy storage device is DC tightly coupled (means the energy "
"storage device and the PV system are on the same side of an inverter and the battery "
"cannot charge from the grid)."
),
},
"is_independent": {
"type": "boolean",
"description": "Indicates if this energy storage device is independent (not coupled with another generators)",
},
"is_direct_support": {
"type": "boolean",
"description": (
"Indicates if this energy storage device is intended for dedicated generator "
"firming or storing excess generation of other units."
),
},
"plant_id_eia_direct_support_1": {
"type": "number",
"description": (
"The EIA Plant ID of the primary unit whose generation this energy storage "
"device is intended to firm or store."
),
},
"generator_id_direct_support_1": {
"type": "string",
"description": (
"The EIA Generator ID of the primary unit whose generation this energy "
"storage device is intended to firm or store."
),
},
"plant_id_eia_direct_support_2": {
"type": "number",
"description": (
"The EIA Plant ID of the secondary unit whose generation this energy storage "
"device is intended to firm or store."
),
},
"generator_id_direct_support_2": {
"type": "string",
"description": (
"The EIA Generator ID of the secondary unit whose generation this energy "
"storage device is intended to firm or store."
),
},
"plant_id_eia_direct_support_3": {
"type": "number",
"description": (
"The EIA Plant ID of the tertiary unit whose generation this energy storage "
"device is intended to firm or store."
),
},
"generator_id_direct_support_3": {
"type": "string",
"description": (
"The EIA Generator ID of the tertiary unit whose generation this energy "
"storage device is intended to firm or store."
),
},
"is_transmission_and_distribution_asset_support": {
"type": "boolean",
"description": (
"Indicate if the energy storage system is intended to support a specific substation, "
"transmission or distribution asset."
),
},
"uses_bifacial_panels": {
"type": "boolean",
"description": (
"Indicates whether bifacial solar panels are used at this solar generating unit."
),
},
}
"""Field attributes by PUDL identifier (`field.name`).
Expand Down
13 changes: 13 additions & 0 deletions src/pudl/metadata/resources/eia860.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,7 @@
"uses_material_thin_film_cigs",
"uses_material_thin_film_other",
"uses_material_other",
"uses_bifacial_panels",
],
"primary_key": [
"plant_id_eia",
Expand Down Expand Up @@ -882,6 +883,18 @@
"served_transmission_and_distribution_deferral",
"served_voltage_or_reactive_power_support",
"stored_excess_wind_and_solar_generation",
"is_ac_coupled",
"is_dc_coupled",
"id_dc_coupled_tightly",
"is_independent",
"is_transmission_and_distribution_asset_support",
"is_direct_support",
"plant_id_eia_direct_support_1",
"generator_id_direct_support_1",
"plant_id_eia_direct_support_2",
"generator_id_direct_support_2",
"plant_id_eia_direct_support_3",
"generator_id_direct_support_3",
],
"primary_key": [
"plant_id_eia",
Expand Down
2 changes: 1 addition & 1 deletion src/pudl/metadata/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@
CONTRIBUTORS["catalyst-cooperative"],
],
"working_partitions": {
"years": sorted(set(range(2001, 2023))),
"years": sorted(set(range(2001, 2024))),
},
"keywords": sorted(
set(
Expand Down
Loading

0 comments on commit 056961b

Please sign in to comment.