Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate 2023 EIA 860 Early Release #3681

Merged
merged 11 commits into from
Jun 21, 2024
47 changes: 31 additions & 16 deletions devtools/debug-column-mapping.ipynb
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,16 @@
"%autoreload 2"
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# this is occasionally required for running the extractor check down below.\n",
"# ! pip install xlrd"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -46,7 +56,6 @@
"\n",
"import pudl\n",
"from pudl.workspace.datastore import ZenodoDoiSettings\n",
"from pudl.extract.phmsagas import Extractor\n",
"\n",
"logger = pudl.logging_helpers.get_logger(\"__name__\")"
]
Expand All @@ -57,7 +66,7 @@
"metadata": {},
"outputs": [],
"source": [
"dataset = \"phmsagas\"\n",
"dataset = \"eia860\"\n",
"doi_path = getattr(ZenodoDoiSettings(), dataset).replace(\"/\", \"-\")\n",
"pudl_paths = pudl.workspace.setup.PudlPaths()\n",
"data_path = os.path.join(pudl_paths.pudl_input,dataset,doi_path) # Get path to raw data\n",
Expand All @@ -67,9 +76,7 @@
},
{
"cell_type": "markdown",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"metadata": {},
"source": [
"## File Check"
]
Expand Down Expand Up @@ -102,7 +109,7 @@
"\n",
"for table_files in file_map.values.tolist(): # For each table with a list of files\n",
" for file in table_files: # For each file included in this table\n",
" if file not in str(all_files): # Search the list of files for the file text, flag if not.\n",
" if (file not in str(all_files)) and (file != \"-1\"): # Search the list of files for the file text, flag if not.\n",
" logger.warning(f\"File '{file}' not found in actual raw data. Check file name.\")"
]
},
Expand Down Expand Up @@ -192,11 +199,9 @@
" raw_missing = [col for col in raw_columns if col not in mapped_columns.values]\n",
" mapped_missing = [col for col in mapped_columns if col not in raw_columns.values]\n",
" if raw_missing and raw_check:\n",
" logger.warning(f\"Raw columns {raw_missing} from {file} are not mapped.\")\n",
" logger.warning(f\"{page}: Raw columns {raw_missing} from {file} are not mapped.\")\n",
" if mapped_missing:\n",
" logger.warning(f\"Mapped columns {mapped_missing} do not exist in the raw data file {file}\")\n",
" \n",
" "
" logger.warning(f\"{page}: Mapped columns {mapped_missing} do not exist in the raw data file {file}\")"
]
},
{
Expand All @@ -213,31 +218,41 @@
"## Extractor Check"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pudl.extract.eia860 import Extractor"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## SETTINGS FOR EXTRACTOR\n",
"extractor_phmsagas = Extractor(ds=ds)\n",
"# adjust to your desired data source extractor in the cell above\n",
"extractor = Extractor(ds=ds)\n",
"\n",
"# recommend changing the loglevel here to warning to only get the baddies\n",
"pudl.logging_helpers.configure_root_logger(loglevel=\"WARNING\")\n",
"\n",
"# IF you wanna restrict the years\n",
"working_years = list(range(1990,2023))\n",
"working_years = list(range(2020,2024))\n",
"# IF you want to restrict the pages to extract here is a lil way to do that\n",
"# you give pages_you_want_to_extract a lil of pages you want to extract\n",
"# if pages_you_want_to_extract if nothing, you'll get the standard pages\n",
"pages_you_want_to_extract = []\n",
"all_pages = extractor_phmsagas._metadata.get_all_pages()\n",
"all_pages = extractor._metadata.get_all_pages()\n",
"def _new_page_getter(self):\n",
" if pages_you_want_to_extract:\n",
" return pages_you_want_to_extract\n",
" else:\n",
" return all_pages\n",
"extractor_phmsagas._metadata.get_all_pages = types.MethodType(_new_page_getter, extractor_phmsagas)"
"extractor._metadata.get_all_pages = types.MethodType(_new_page_getter, extractor)"
]
},
{
Expand All @@ -247,7 +262,7 @@
"outputs": [],
"source": [
"## RUN THE EXTRACTOR\n",
"extracted_dfs = extractor_phmsagas.extract(year=working_years)"
"extracted_dfs = extractor.extract(year=working_years)"
]
}
],
Expand All @@ -267,7 +282,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
"version": "3.12.3"
}
},
"nbformat": 4,
Expand Down
10 changes: 10 additions & 0 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ EIA AEO
fuel type, and include both nominal USD per MMBtu as well as real 2022 USD
per MMBtu. See issue :issue:`3649` and PR :pr:`3656`.

EIA 860
~~~~~~~

* Added EIA 860 early release data from 2023. This included adding a new tab with
proposed energy storage generators as well as adding a number of new columns
regarding energy storage and solar generators. See issue :issue:`3676` and PR
:pr:`3681`.



.. _release-v2024.5.0:

---------------------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""add 2023 EIA 860ER storage and solar columns

Revision ID: b9b6cb1a5405
Revises: da38a41d7f99
Create Date: 2024-06-20 10:00:37.339814

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'b9b6cb1a5405'
down_revision = 'da38a41d7f99'
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('core_eia860__scd_generators_energy_storage', schema=None) as batch_op:
batch_op.add_column(sa.Column('is_ac_coupled', sa.Boolean(), nullable=True, comment='Indicates if this energy storage device is AC-coupled (means the energy storage device and the PV system are not installed on the same side of an inverter).'))
batch_op.add_column(sa.Column('is_dc_coupled', sa.Boolean(), nullable=True, comment='Indicates if this energy storage device is DC-coupled (means the energy storage device and the PV system are on the same side of an inverter and the battery can still charge from the grid).'))
batch_op.add_column(sa.Column('id_dc_coupled_tightly', sa.Boolean(), nullable=True, comment='Indicates if this energy storage device is DC tightly coupled (means the energy storage device and the PV system are on the same side of an inverter and the battery cannot charge from the grid).'))
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
batch_op.add_column(sa.Column('is_independent', sa.Boolean(), nullable=True, comment='Indicates if this energy storage device is independent (not coupled with another generators)'))
batch_op.add_column(sa.Column('is_transmission_and_distribution_asset_support', sa.Boolean(), nullable=True, comment='Indicate if the energy storage system is intended to support a specific substation, transmission or distribution asset.'))
batch_op.add_column(sa.Column('is_direct_support', sa.Boolean(), nullable=True, comment='Indicates if this energy storage device is intended for dedicated generator firming or storing excess generation of other units.'))
batch_op.add_column(sa.Column('plant_id_eia_direct_support_1', sa.Float(), nullable=True, comment='The EIA Plant ID of the primary unit whose generation this energy storage device is intended to firm or store.'))
batch_op.add_column(sa.Column('generator_id_direct_support_1', sa.Text(), nullable=True, comment='The EIA Generator ID of the primary unit whose generation this energy storage device is intended to firm or store.'))
batch_op.add_column(sa.Column('plant_id_eia_direct_support_2', sa.Float(), nullable=True, comment='The EIA Plant ID of the secondary unit whose generation this energy storage device is intended to firm or store.'))
batch_op.add_column(sa.Column('generator_id_direct_support_2', sa.Text(), nullable=True, comment='The EIA Generator ID of the secondary unit whose generation this energy storage device is intended to firm or store.'))
batch_op.add_column(sa.Column('plant_id_eia_direct_support_3', sa.Float(), nullable=True, comment='The EIA Plant ID of the tertiary unit whose generation this energy storage device is intended to firm or store.'))
batch_op.add_column(sa.Column('generator_id_direct_support_3', sa.Text(), nullable=True, comment='The EIA Generator ID of the tertiary unit whose generation this energy storage device is intended to firm or store.'))

with op.batch_alter_table('core_eia860__scd_generators_solar', schema=None) as batch_op:
batch_op.add_column(sa.Column('uses_bifacial_panels', sa.Boolean(), nullable=True, comment='Indicates whether bifacial solar panels are used at this solar generating unit.'))

# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('core_eia860__scd_generators_solar', schema=None) as batch_op:
batch_op.drop_column('uses_bifacial_panels')

with op.batch_alter_table('core_eia860__scd_generators_energy_storage', schema=None) as batch_op:
batch_op.drop_column('generator_id_direct_support_3')
batch_op.drop_column('plant_id_eia_direct_support_3')
batch_op.drop_column('generator_id_direct_support_2')
batch_op.drop_column('plant_id_eia_direct_support_2')
batch_op.drop_column('generator_id_direct_support_1')
batch_op.drop_column('plant_id_eia_direct_support_1')
batch_op.drop_column('is_direct_support')
batch_op.drop_column('is_transmission_and_distribution_asset_support')
batch_op.drop_column('is_independent')
batch_op.drop_column('id_dc_coupled_tightly')
batch_op.drop_column('is_dc_coupled')
batch_op.drop_column('is_ac_coupled')

# ### end Alembic commands ###
1 change: 1 addition & 0 deletions src/pudl/extract/eia860.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def get_dtypes(page, **partition):
"raw_eia860__generator_proposed",
"raw_eia860__generator_retired",
"raw_eia860__generator_energy_storage_existing",
"raw_eia860__generator_energy_storage_proposed",
"raw_eia860__generator_energy_storage_retired",
"raw_eia860__generator_solar_existing",
"raw_eia860__generator_solar_retired",
Expand Down
22 changes: 22 additions & 0 deletions src/pudl/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1476,6 +1476,28 @@ def dedupe_and_drop_nas(
)


def drop_records_with_null_pk(
df: pd.DataFrame, primary_key_col: str, num_of_expected_nulls: int
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
) -> pd.DataFrame:
"""Drop a prescribed number of records with null values in a primary key column.

Args:
df: table with primary_key_col column.
primary_key_col: name of column which potential null values.
num_of_expected_nulls: the number of records

Raises:
AssertionError: If there are more nulls in the df then the
num_of_expected_nulls.
"""
# there is one record that has a null gen id. ensure there isn't more before dropping
if len(null_gens := df[df[primary_key_col].isnull()]) > num_of_expected_nulls:
raise AssertionError(
f"Expected {num_of_expected_nulls} or zero records with a null {primary_key_col} but found {null_gens}"
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
)
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
return df.dropna(subset=[primary_key_col])


def standardize_percentages_ratio(
frac_df: pd.DataFrame,
mixed_cols: list[str],
Expand Down
6 changes: 4 additions & 2 deletions src/pudl/metadata/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1648,8 +1648,10 @@ def enforce_schema(self, df: pd.DataFrame) -> pd.DataFrame:
raise ValueError(
f"{self.name} {len(dupes)}/{len(df)} duplicate primary keys ({pk=}) when enforcing schema."
)
if pk and df.loc[:, pk].isna().any(axis=None):
raise ValueError(f"{self.name} Null values found in primary key columns.")
if pk and not (nulls := df[df[pk].isna().any(axis=1)]).empty:
raise ValueError(
f"{self.name} Null values found in primary key columns.\n{nulls}"
)
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
return df

def aggregate_df(
Expand Down
89 changes: 89 additions & 0 deletions src/pudl/metadata/fields.py
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -4825,6 +4825,95 @@
"disaggregated. See documentation for process: pudl.output.ferc1.disaggregate_null_or_total_tag"
),
},
"is_ac_coupled": {
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
"type": "boolean",
"description": (
"Indicates if this energy storage device is AC-coupled (means the energy storage device "
"and the PV system are not installed on the same side of an inverter)."
),
},
"is_dc_coupled": {
"type": "boolean",
"description": (
"Indicates if this energy storage device is DC-coupled (means the energy storage "
"device and the PV system are on the same side of an inverter and the battery can "
"still charge from the grid)."
),
},
"id_dc_coupled_tightly": {
"type": "boolean",
"description": (
"Indicates if this energy storage device is DC tightly coupled (means the energy "
"storage device and the PV system are on the same side of an inverter and the battery "
"cannot charge from the grid)."
),
},
"is_independent": {
"type": "boolean",
"description": "Indicates if this energy storage device is independent (not coupled with another generators)",
},
"is_direct_support": {
"type": "boolean",
"description": (
"Indicates if this energy storage device is intended for dedicated generator "
"firming or storing excess generation of other units."
),
},
"plant_id_eia_direct_support_1": {
"type": "number",
"description": (
"The EIA Plant ID of the primary unit whose generation this energy storage "
"device is intended to firm or store."
),
},
"generator_id_direct_support_1": {
"type": "string",
"description": (
"The EIA Generator ID of the primary unit whose generation this energy "
"storage device is intended to firm or store."
),
},
"plant_id_eia_direct_support_2": {
"type": "number",
"description": (
"The EIA Plant ID of the secondary unit whose generation this energy storage "
"device is intended to firm or store."
),
},
"generator_id_direct_support_2": {
"type": "string",
"description": (
"The EIA Generator ID of the secondary unit whose generation this energy "
"storage device is intended to firm or store."
),
},
"plant_id_eia_direct_support_3": {
"type": "number",
"description": (
"The EIA Plant ID of the tertiary unit whose generation this energy storage "
"device is intended to firm or store."
),
},
"generator_id_direct_support_3": {
"type": "string",
"description": (
"The EIA Generator ID of the tertiary unit whose generation this energy "
"storage device is intended to firm or store."
),
},
"is_transmission_and_distribution_asset_support": {
"type": "boolean",
"description": (
"Indicate if the energy storage system is intended to support a specific substation, "
"transmission or distribution asset."
),
},
"uses_bifacial_panels": {
"type": "boolean",
"description": (
"Indicates whether bifacial solar panels are used at this solar generating unit."
),
},
}
"""Field attributes by PUDL identifier (`field.name`).

Expand Down
13 changes: 13 additions & 0 deletions src/pudl/metadata/resources/eia860.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,7 @@
"uses_material_thin_film_cigs",
"uses_material_thin_film_other",
"uses_material_other",
"uses_bifacial_panels",
],
"primary_key": [
"plant_id_eia",
Expand Down Expand Up @@ -882,6 +883,18 @@
"served_transmission_and_distribution_deferral",
"served_voltage_or_reactive_power_support",
"stored_excess_wind_and_solar_generation",
"is_ac_coupled",
"is_dc_coupled",
"id_dc_coupled_tightly",
"is_independent",
"is_transmission_and_distribution_asset_support",
"is_direct_support",
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
"plant_id_eia_direct_support_1",
"generator_id_direct_support_1",
"plant_id_eia_direct_support_2",
"generator_id_direct_support_2",
"plant_id_eia_direct_support_3",
"generator_id_direct_support_3",
],
"primary_key": [
"plant_id_eia",
Expand Down
2 changes: 1 addition & 1 deletion src/pudl/metadata/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@
CONTRIBUTORS["catalyst-cooperative"],
],
"working_partitions": {
"years": sorted(set(range(2001, 2023))),
"years": sorted(set(range(2001, 2024))),
},
"keywords": sorted(
set(
Expand Down
Loading