diff --git a/migrations/versions/11a43f756905_idk.py b/migrations/versions/11a43f756905_idk.py new file mode 100644 index 0000000000..039d6d78fb --- /dev/null +++ b/migrations/versions/11a43f756905_idk.py @@ -0,0 +1,244 @@ +"""idk + +Revision ID: 11a43f756905 +Revises: 273a78878b74 +Create Date: 2023-09-25 13:06:55.676082 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "11a43f756905" +down_revision = "273a78878b74" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table( + "denorm_depreciation_amortization_summary_ferc1", schema=None + ) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + with op.batch_alter_table( + "denorm_electric_operating_expenses_ferc1", schema=None + ) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + with op.batch_alter_table( + "denorm_electric_operating_revenues_ferc1", schema=None + ) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + with op.batch_alter_table( + "denorm_electric_plant_depreciation_functional_ferc1", schema=None + ) as batch_op: + batch_op.add_column( + sa.Column( + "depreciation_type", + sa.Text(), + nullable=True, + comment="Type of depreciation provision within FERC Account 108, including cost ofremoval, depreciation expenses, salvage, cost of retired plant, etc.", + ) + ) + batch_op.drop_column("ferc_account") + + with op.batch_alter_table("denorm_plant_in_service_ferc1", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + with op.batch_alter_table("denorm_purchased_power_ferc1", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "purchased_storage_mwh", + sa.Float(), + nullable=True, + comment="Number of megawatt hours purchased during the period for energy storage.", + ) + ) + batch_op.add_column( + sa.Column( + "purchased_other_than_storage_mwh", + sa.Float(), + nullable=True, + comment="Number of megawatt hours purchased during the period for other than energy storage.", + ) + ) + + with op.batch_alter_table( + "depreciation_amortization_summary_ferc1", schema=None + ) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + batch_op.add_column( + sa.Column( + "row_type_xbrl", + sa.Enum("calculated_value", "reported_value", "correction"), + nullable=True, + comment="Indicates whether the value reported in the row is calculated, or uniquely reported within the table.", + ) + ) + + with op.batch_alter_table( + "electric_operating_expenses_ferc1", schema=None + ) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + with op.batch_alter_table( + "electric_operating_revenues_ferc1", schema=None + ) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + with op.batch_alter_table( + "electric_plant_depreciation_functional_ferc1", schema=None + ) as batch_op: + batch_op.add_column( + sa.Column( + "depreciation_type", + sa.Text(), + nullable=True, + comment="Type of depreciation provision within FERC Account 108, including cost ofremoval, depreciation expenses, salvage, cost of retired plant, etc.", + ) + ) + batch_op.drop_column("ferc_account") + + with op.batch_alter_table("plant_in_service_ferc1", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "utility_type", + sa.Text(), + nullable=True, + comment="Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.", + ) + ) + + with op.batch_alter_table("purchased_power_ferc1", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "purchased_storage_mwh", + sa.Float(), + nullable=True, + comment="Number of megawatt hours purchased during the period for energy storage.", + ) + ) + batch_op.add_column( + sa.Column( + "purchased_other_than_storage_mwh", + sa.Float(), + nullable=True, + comment="Number of megawatt hours purchased during the period for other than energy storage.", + ) + ) + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("purchased_power_ferc1", schema=None) as batch_op: + batch_op.drop_column("purchased_other_than_storage_mwh") + batch_op.drop_column("purchased_storage_mwh") + + with op.batch_alter_table("plant_in_service_ferc1", schema=None) as batch_op: + batch_op.drop_column("utility_type") + + with op.batch_alter_table( + "electric_plant_depreciation_functional_ferc1", schema=None + ) as batch_op: + batch_op.add_column(sa.Column("ferc_account", sa.TEXT(), nullable=True)) + batch_op.drop_column("depreciation_type") + + with op.batch_alter_table( + "electric_operating_revenues_ferc1", schema=None + ) as batch_op: + batch_op.drop_column("utility_type") + + with op.batch_alter_table( + "electric_operating_expenses_ferc1", schema=None + ) as batch_op: + batch_op.drop_column("utility_type") + + with op.batch_alter_table( + "depreciation_amortization_summary_ferc1", schema=None + ) as batch_op: + batch_op.drop_column("row_type_xbrl") + batch_op.drop_column("utility_type") + + with op.batch_alter_table("denorm_purchased_power_ferc1", schema=None) as batch_op: + batch_op.drop_column("purchased_other_than_storage_mwh") + batch_op.drop_column("purchased_storage_mwh") + + with op.batch_alter_table("denorm_plant_in_service_ferc1", schema=None) as batch_op: + batch_op.drop_column("utility_type") + + with op.batch_alter_table( + "denorm_electric_plant_depreciation_functional_ferc1", schema=None + ) as batch_op: + batch_op.add_column(sa.Column("ferc_account", sa.TEXT(), nullable=True)) + batch_op.drop_column("depreciation_type") + + with op.batch_alter_table( + "denorm_electric_operating_revenues_ferc1", schema=None + ) as batch_op: + batch_op.drop_column("utility_type") + + with op.batch_alter_table( + "denorm_electric_operating_expenses_ferc1", schema=None + ) as batch_op: + batch_op.drop_column("utility_type") + + with op.batch_alter_table( + "denorm_depreciation_amortization_summary_ferc1", schema=None + ) as batch_op: + batch_op.drop_column("utility_type") + + # ### end Alembic commands ### diff --git a/migrations/versions/273a78878b74_purchased_storage_mwh.py b/migrations/versions/273a78878b74_purchased_storage_mwh.py index 78c2d90c86..911c752c69 100644 --- a/migrations/versions/273a78878b74_purchased_storage_mwh.py +++ b/migrations/versions/273a78878b74_purchased_storage_mwh.py @@ -9,33 +9,61 @@ from alembic import op # revision identifiers, used by Alembic. -revision = '273a78878b74' -down_revision = 'b5226cb31143' +revision = "273a78878b74" +down_revision = "b5226cb31143" branch_labels = None depends_on = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('denorm_purchased_power_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('purchased_storage_mwh', sa.Float(), nullable=True, comment='Number of megawatt hours purchased for energy storage during the period.')) - batch_op.add_column(sa.Column('purchased_other_than_storage_mwh', sa.Float(), nullable=True, comment='Number of megawatt hours purchased for other than energy storage during the period.')) + with op.batch_alter_table("denorm_purchased_power_ferc1", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "purchased_storage_mwh", + sa.Float(), + nullable=True, + comment="Number of megawatt hours purchased for energy storage during the period.", + ) + ) + batch_op.add_column( + sa.Column( + "purchased_other_than_storage_mwh", + sa.Float(), + nullable=True, + comment="Number of megawatt hours purchased for other than energy storage during the period.", + ) + ) - with op.batch_alter_table('purchased_power_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('purchased_storage_mwh', sa.Float(), nullable=True, comment='Number of megawatt hours purchased for energy storage during the period.')) - batch_op.add_column(sa.Column('purchased_other_than_storage_mwh', sa.Float(), nullable=True, comment='Number of megawatt hours purchased for other than energy storage during the period.')) + with op.batch_alter_table("purchased_power_ferc1", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "purchased_storage_mwh", + sa.Float(), + nullable=True, + comment="Number of megawatt hours purchased for energy storage during the period.", + ) + ) + batch_op.add_column( + sa.Column( + "purchased_other_than_storage_mwh", + sa.Float(), + nullable=True, + comment="Number of megawatt hours purchased for other than energy storage during the period.", + ) + ) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('purchased_power_ferc1', schema=None) as batch_op: - batch_op.drop_column('purchased_other_than_storage_mwh') - batch_op.drop_column('purchased_storage_mwh') + with op.batch_alter_table("purchased_power_ferc1", schema=None) as batch_op: + batch_op.drop_column("purchased_other_than_storage_mwh") + batch_op.drop_column("purchased_storage_mwh") - with op.batch_alter_table('denorm_purchased_power_ferc1', schema=None) as batch_op: - batch_op.drop_column('purchased_other_than_storage_mwh') - batch_op.drop_column('purchased_storage_mwh') + with op.batch_alter_table("denorm_purchased_power_ferc1", schema=None) as batch_op: + batch_op.drop_column("purchased_other_than_storage_mwh") + batch_op.drop_column("purchased_storage_mwh") # ### end Alembic commands ### diff --git a/src/pudl/analysis/ferc1_eia_train.py b/src/pudl/analysis/ferc1_eia_train.py index 2ff798bb8e..23f69c387c 100644 --- a/src/pudl/analysis/ferc1_eia_train.py +++ b/src/pudl/analysis/ferc1_eia_train.py @@ -163,7 +163,6 @@ def _prep_ferc1_eia(ferc1_eia, utils_eia860) -> pd.DataFrame: logger.debug("Prepping FERC-EIA table") # Only want to keep the plant_name_ppe field which replaces plant_name_eia ferc1_eia_prep = ferc1_eia.copy().drop(columns="plant_name_eia") - # Add utility_name_eia - this must happen before renaming the cols or else there # will be duplicate utility_name_eia columns. utils_eia860.loc[:, "report_year"] = utils_eia860.report_date.dt.year @@ -183,23 +182,24 @@ def _prep_ferc1_eia(ferc1_eia, utils_eia860) -> pd.DataFrame: ferc1_eia_prep = ferc1_eia_prep.rename(columns=RENAME_COLS_FERC1_EIA)[ list(RENAME_COLS_FERC1_EIA.values()) ] - # Add in pct diff values for pct_diff_col in [x for x in RENAME_COLS_FERC1_EIA.values() if "_pct_diff" in x]: ferc1_eia_prep = _pct_diff(ferc1_eia_prep, pct_diff_col) - # Add in fuel_type_code_pudl diff (qualitative bool) - ferc1_eia_prep.loc[ + ferc1_eia_prep["fuel_type_code_pudl_diff"] = False + ferc1_eia_prep_nona = ferc1_eia_prep[ ferc1_eia_prep.fuel_type_code_pudl_eia.notna() - & ferc1_eia_prep.fuel_type_code_pudl_ferc1.notna(), - "fuel_type_code_pudl_diff", - ] = ferc1_eia_prep.fuel_type_code_pudl_eia == ( - ferc1_eia_prep.fuel_type_code_pudl_ferc1 + & ferc1_eia_prep.fuel_type_code_pudl_ferc1.notna() + ].copy() + ferc1_eia_prep_nona["fuel_type_code_pudl_diff"] = ( + ferc1_eia_prep_nona.fuel_type_code_pudl_eia + == ferc1_eia_prep_nona.fuel_type_code_pudl_ferc1 ) + ferc1_eia_prep.update(ferc1_eia_prep_nona) # Add in installation_year diff (diff vs. pct_diff) ferc1_eia_prep.loc[ - :, "installation_year_ferc1" + ferc1_eia_prep.installation_year_ferc1.notna(), "installation_year_ferc1" ] = ferc1_eia_prep.installation_year_ferc1.astype("Int64") ferc1_eia_prep.loc[ @@ -212,7 +212,6 @@ def _prep_ferc1_eia(ferc1_eia, utils_eia860) -> pd.DataFrame: # Add best match col ferc1_eia_prep = _is_best_match(ferc1_eia_prep) - return ferc1_eia_prep diff --git a/src/pudl/package_data/glue/pudl_id_mapping.xlsx b/src/pudl/package_data/glue/pudl_id_mapping.xlsx index 1485284aeb..3e7f3cb92d 100644 Binary files a/src/pudl/package_data/glue/pudl_id_mapping.xlsx and b/src/pudl/package_data/glue/pudl_id_mapping.xlsx differ diff --git a/test/validate/ferc1_test.py b/test/validate/ferc1_test.py index ffebc697dc..cc42e1ee44 100644 --- a/test/validate/ferc1_test.py +++ b/test/validate/ferc1_test.py @@ -84,16 +84,16 @@ def test_no_null_cols_ferc1(pudl_out_ferc1, live_dbs, cols, df_name): @pytest.mark.parametrize( "df_name,expected_rows", [ - ("fbp_ferc1", 25_421), - ("fuel_ferc1", 48_841), - ("plant_in_service_ferc1", 315_206), - ("plants_all_ferc1", 54_284), + ("fbp_ferc1", 25_423), + ("fuel_ferc1", 48_843), + ("plant_in_service_ferc1", 315_208), + ("plants_all_ferc1", 54_384), ("plants_hydro_ferc1", 6_796), ("plants_pumped_storage_ferc1", 544), ("plants_small_ferc1", 16_235), - ("plants_steam_ferc1", 30_709), + ("plants_steam_ferc1", 30_809), ("pu_ferc1", 7_425), - ("purchased_power_ferc1", 197_523), + ("purchased_power_ferc1", 197_665), ], ) def test_minmax_rows(pudl_out_ferc1, live_dbs, expected_rows, df_name):