From 3a9548c43e0abbe60cac9534e04da69843a5a6c6 Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Thu, 4 Jan 2024 13:25:19 -0500
Subject: [PATCH 01/17] ensure all the corrections get tags and add the
 begining of a rate base asset

---
 src/pudl/output/ferc1.py                      | 39 ++++++++++++++++++-
 .../ferc1/xbrl_factoid_plant_status_tags.csv  | 10 -----
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index edf6f68247..509b3f047e 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -1191,7 +1191,22 @@ def _out_ferc1__explosion_tags(table_dimensions_ferc1) -> pd.DataFrame:
         .reset_index()
         .drop(columns=["notes"])
     )
-    return tags_all
+    # Add the correction records to the tags with the same tags as the parent
+    idx = list(NodeId._fields)
+    correction_index = (
+        table_dimensions_ferc1[
+            ~table_dimensions_ferc1.xbrl_factoid.str.endswith("_correction")
+        ]
+        .set_index(idx)
+        .index
+    )
+    corrections = tags_all.set_index(idx)
+    corrections = (
+        corrections.loc[corrections.index.intersection(correction_index)]
+        .reset_index()
+        .assign(xbrl_factoid=lambda x: x.xbrl_factoid + "_correction")
+    )
+    return pd.concat([tags_all, corrections])
 
 
 def _get_tags(file_name: str, table_dimensions_ferc1: pd.DataFrame) -> pd.DataFrame:
@@ -1236,7 +1251,10 @@ def _aggregatable_dimension_tags(
         )
         .set_index(idx)
     )
-    table_dimensions_ferc1 = table_dimensions_ferc1.set_index(idx)
+    # don't include the corrections
+    table_dimensions_ferc1 = table_dimensions_ferc1[
+        ~table_dimensions_ferc1.xbrl_factoid.str.endswith("_correction")
+    ].set_index(idx)
     tags_df = pd.concat(
         [
             tags_df,
@@ -2774,3 +2792,20 @@ def nodes_to_df(calc_forest: nx.DiGraph, nodes: list[NodeId]) -> pd.DataFrame:
     except AttributeError:
         tags = pd.DataFrame()
     return pd.concat([index, tags], axis="columns")
+
+
+def out_ferc1__yearly_rate_base(
+    exploded_balance_sheet_assets_ferc1, exploded_balance_sheet_liabilities_ferc1
+):
+    """Make a table of only rate-base data."""
+    in_rate_base = pd.concat(
+        [
+            exploded_balance_sheet_assets_ferc1[
+                exploded_balance_sheet_assets_ferc1.tags_in_rate_base == "yes"
+            ],
+            exploded_balance_sheet_liabilities_ferc1[
+                exploded_balance_sheet_liabilities_ferc1.tags_in_rate_base == "yes"
+            ],
+        ]
+    ).drop(columns=["tags_in_rate_base"])
+    return in_rate_base
diff --git a/src/pudl/package_data/ferc1/xbrl_factoid_plant_status_tags.csv b/src/pudl/package_data/ferc1/xbrl_factoid_plant_status_tags.csv
index b7df41c279..59fb27acf9 100644
--- a/src/pudl/package_data/ferc1/xbrl_factoid_plant_status_tags.csv
+++ b/src/pudl/package_data/ferc1/xbrl_factoid_plant_status_tags.csv
@@ -26,14 +26,4 @@ core_ferc1__yearly_utility_plant_summary_sched200,depreciation_and_amortization_
 core_ferc1__yearly_utility_plant_summary_sched200,abandonment_of_leases,total
 core_ferc1__yearly_utility_plant_summary_sched200,amortization_of_plant_acquisition_adjustment,total
 core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_in_service_classified_and_property_under_capital_leases,in_service
-core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_in_service_plant_purchased_or_sold_correction,in_service
-core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_in_service_experimental_plant_unclassified_correction,in_service
-core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_in_service_classified_and_unclassified_correction,in_service
-core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_and_construction_work_in_progress_correction,construction_work_in_progress
-core_ferc1__yearly_utility_plant_summary_sched200,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_correction,total
-core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_net_correction,total
-core_ferc1__yearly_utility_plant_summary_sched200,depreciation_utility_plant_in_service_correction,in_service
-core_ferc1__yearly_utility_plant_summary_sched200,depreciation_amortization_and_depletion_utility_plant_leased_to_others_correction,leased
-core_ferc1__yearly_utility_plant_summary_sched200,depreciation_and_amortization_utility_plant_held_for_future_use_correction,future
-core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_in_service_classified_and_property_under_capital_leases_correction,in_service
 core_ferc1__yearly_utility_plant_summary_sched200,abandonment_of_leases,leased

From f33aa8217881a8faeb27702ee09e1f2dabc15fa0 Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Wed, 10 Jan 2024 11:33:25 -0500
Subject: [PATCH 02/17] Add in cash on hand as an additional factoid into rate
 base table

---
 src/pudl/output/ferc1.py | 60 ++++++++++++++++++++++++++++++++--------
 1 file changed, 49 insertions(+), 11 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index 509b3f047e..a51cacee80 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -2795,17 +2795,55 @@ def nodes_to_df(calc_forest: nx.DiGraph, nodes: list[NodeId]) -> pd.DataFrame:
 
 
 def out_ferc1__yearly_rate_base(
-    exploded_balance_sheet_assets_ferc1, exploded_balance_sheet_liabilities_ferc1
-):
+    exploded_balance_sheet_assets_ferc1: pd.DataFrame,
+    exploded_balance_sheet_liabilities_ferc1: pd.DataFrame,
+    core_ferc1__yearly_operating_expenses_sched320: pd.DataFrame,
+) -> pd.DataFrame:
     """Make a table of only rate-base data."""
-    in_rate_base = pd.concat(
-        [
-            exploded_balance_sheet_assets_ferc1[
-                exploded_balance_sheet_assets_ferc1.tags_in_rate_base == "yes"
-            ],
-            exploded_balance_sheet_liabilities_ferc1[
-                exploded_balance_sheet_liabilities_ferc1.tags_in_rate_base == "yes"
-            ],
+    # First grab the cash on hand out of the operating expense table.
+    xbrl_factoid_name = pudl.transform.ferc1.FERC1_TFR_CLASSES[
+        "core_ferc1__yearly_operating_expenses_sched320"
+    ]().params.xbrl_factoid_name
+    pks = pudl.metadata.classes.Resource.from_id(
+        "core_ferc1__yearly_operating_expenses_sched320"
+    ).schema.primary_key
+    cash_working_capital = (
+        core_ferc1__yearly_operating_expenses_sched320[
+            core_ferc1__yearly_operating_expenses_sched320[xbrl_factoid_name].isin(
+                [
+                    f"operations_and_maintenance_expenses_electric{suffix}"
+                    for suffix in ["", "_correction"]
+                ]
+            )
         ]
-    ).drop(columns=["tags_in_rate_base"])
+        .groupby(pks + ["utility_type"], as_index=False)[["dollar_value"]]
+        .sum(min_count=1)
+        .assign(
+            dollar_value=lambda x: x.dollar_value / 8,
+            xbrl_factoid="cash_on_hand",  # newly definied (do we need to add it anywhere?)
+            tags_rate_base_category="net_working_capital",
+            tags_aggregatable_utility_type="electric",
+            table_name="core_ferc1__yearly_operating_expenses_sched320",
+        )
+        .drop(columns=[xbrl_factoid_name])
+        .rename(columns={"dollar_value": "ending_balance"})
+    )
+    # then select only the leafy exploded records that are in rate base and concat
+    in_rate_base = (
+        pd.concat(
+            [
+                exploded_balance_sheet_assets_ferc1[
+                    exploded_balance_sheet_assets_ferc1.tags_in_rate_base == "yes"
+                ],
+                exploded_balance_sheet_liabilities_ferc1[
+                    exploded_balance_sheet_liabilities_ferc1.tags_in_rate_base == "yes"
+                ],
+                cash_working_capital,
+            ]
+        )
+        .drop(columns=["tags_in_rate_base"])
+        .sort_values(
+            by=["report_year", "utility_id_ferc1", "table_name"], ascending=False
+        )
+    )
     return in_rate_base

From ea8301e5853ca816af9b804c100e91e1f41be2eb Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Wed, 10 Jan 2024 16:35:20 -0500
Subject: [PATCH 03/17] add documentation for rate base table

---
 src/pudl/output/ferc1.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index a51cacee80..bd4de0e425 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -2799,7 +2799,25 @@ def out_ferc1__yearly_rate_base(
     exploded_balance_sheet_liabilities_ferc1: pd.DataFrame,
     core_ferc1__yearly_operating_expenses_sched320: pd.DataFrame,
 ) -> pd.DataFrame:
-    """Make a table of only rate-base data."""
+    """Make a table of granular utility rate-base data.
+
+    This table contains granular data consisting of what utilities can
+    include in their rate bases. This information comes from two core
+    inputs: ``exploded_balance_sheet_assets_ferc1`` and
+    ``exploded_balance_sheet_liabilities_ferc1``. These tables include granular
+    data from the nested calculations that are build into the accounting tables.
+    See :class:`Exploder` for more details.
+
+    This rate base table also contains one specific addition from
+    :ref:`core_ferc1__yearly_operating_expenses_sched320`. In standard ratemaking
+    processes, utilities are enabled to include working capital - sometimes referred
+    to as cash on hand or cash reverves. A standard ratemaking process is to consider
+    the available rate-baseable working capital to be one eigth of the average
+    operations and maintenance expense. This function grabs that expense and
+    concatenates it with the rest of the assets and liabilities from the granular
+    exploded data.
+
+    """
     # First grab the cash on hand out of the operating expense table.
     xbrl_factoid_name = pudl.transform.ferc1.FERC1_TFR_CLASSES[
         "core_ferc1__yearly_operating_expenses_sched320"
@@ -2807,6 +2825,8 @@ def out_ferc1__yearly_rate_base(
     pks = pudl.metadata.classes.Resource.from_id(
         "core_ferc1__yearly_operating_expenses_sched320"
     ).schema.primary_key
+    # grab the factoid and its correction records - then group them together
+    # to produce on cash_on_hand factoid to concat
     cash_working_capital = (
         core_ferc1__yearly_operating_expenses_sched320[
             core_ferc1__yearly_operating_expenses_sched320[xbrl_factoid_name].isin(
@@ -2826,6 +2846,7 @@ def out_ferc1__yearly_rate_base(
             table_name="core_ferc1__yearly_operating_expenses_sched320",
         )
         .drop(columns=[xbrl_factoid_name])
+        # the assets/liabilites both use ending_balance for its main $$ column
         .rename(columns={"dollar_value": "ending_balance"})
     )
     # then select only the leafy exploded records that are in rate base and concat

From 24cf1cfb21554777b5b50b8a8e6fefc09e908478 Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Wed, 10 Jan 2024 16:45:43 -0500
Subject: [PATCH 04/17] remove _correction record from the expense.

the correction corrects the calculations of the parent
(operating expense) and its child subcomponents. if we were calculating
the expense i would want to include the correction but i don't want it if
we are just grabbing the reported value
---
 src/pudl/output/ferc1.py | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index bd4de0e425..b5523440dd 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -2818,26 +2818,17 @@ def out_ferc1__yearly_rate_base(
     exploded data.
 
     """
-    # First grab the cash on hand out of the operating expense table.
+    # get the factoid name to grab the right part of the table
     xbrl_factoid_name = pudl.transform.ferc1.FERC1_TFR_CLASSES[
         "core_ferc1__yearly_operating_expenses_sched320"
     ]().params.xbrl_factoid_name
-    pks = pudl.metadata.classes.Resource.from_id(
-        "core_ferc1__yearly_operating_expenses_sched320"
-    ).schema.primary_key
-    # grab the factoid and its correction records - then group them together
-    # to produce on cash_on_hand factoid to concat
+    # First grab the cash on hand out of the operating expense table.
+    # then prep it for concating. Calculate cash on hand & add tags
     cash_working_capital = (
         core_ferc1__yearly_operating_expenses_sched320[
-            core_ferc1__yearly_operating_expenses_sched320[xbrl_factoid_name].isin(
-                [
-                    f"operations_and_maintenance_expenses_electric{suffix}"
-                    for suffix in ["", "_correction"]
-                ]
-            )
+            core_ferc1__yearly_operating_expenses_sched320[xbrl_factoid_name]
+            == "operations_and_maintenance_expenses_electric"
         ]
-        .groupby(pks + ["utility_type"], as_index=False)[["dollar_value"]]
-        .sum(min_count=1)
         .assign(
             dollar_value=lambda x: x.dollar_value / 8,
             xbrl_factoid="cash_on_hand",  # newly definied (do we need to add it anywhere?)

From 6d41c5cd0326c6fcc7043be613e5800a890b45ee Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Tue, 16 Jan 2024 11:00:12 -0500
Subject: [PATCH 05/17] attempt to associate tags with _correction factoids
 when all child calc componets have same tags

---
 src/pudl/output/ferc1.py | 109 +++++++++++++++++++++++++++++----------
 1 file changed, 82 insertions(+), 27 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index b5523440dd..6955f00317 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -1154,7 +1154,10 @@ class OffByFactoid(NamedTuple):
 
 
 @asset
-def _out_ferc1__explosion_tags(table_dimensions_ferc1) -> pd.DataFrame:
+def _out_ferc1__explosion_tags(
+    table_dimensions_ferc1: pd.DataFrame,
+    calculation_components_xbrl_ferc1: pd.DataFrame,
+) -> pd.DataFrame:
     """Grab the stored tables of tags and add inferred dimension."""
     rate_tags = _get_tags("xbrl_factoid_rate_base_tags.csv", table_dimensions_ferc1)
     rev_req_tags = _get_tags(
@@ -1180,9 +1183,10 @@ def _out_ferc1__explosion_tags(table_dimensions_ferc1) -> pd.DataFrame:
         plant_function_tags,
         utility_type_tags,
     ]
-    tags_all = (
+    tag_idx = list(NodeId._fields)
+    tags = (
         pd.concat(
-            [df.set_index(list(NodeId._fields)) for df in tag_dfs],
+            [df.set_index(tag_idx) for df in tag_dfs],
             join="outer",
             verify_integrity=True,
             ignore_index=False,
@@ -1191,22 +1195,10 @@ def _out_ferc1__explosion_tags(table_dimensions_ferc1) -> pd.DataFrame:
         .reset_index()
         .drop(columns=["notes"])
     )
-    # Add the correction records to the tags with the same tags as the parent
-    idx = list(NodeId._fields)
-    correction_index = (
-        table_dimensions_ferc1[
-            ~table_dimensions_ferc1.xbrl_factoid.str.endswith("_correction")
-        ]
-        .set_index(idx)
-        .index
-    )
-    corrections = tags_all.set_index(idx)
-    corrections = (
-        corrections.loc[corrections.index.intersection(correction_index)]
-        .reset_index()
-        .assign(xbrl_factoid=lambda x: x.xbrl_factoid + "_correction")
-    )
-    return pd.concat([tags_all, corrections])
+    # Add the correction records to the tags...
+    corrections = make_correction_tags(tags, calculation_components_xbrl_ferc1)
+    tags = pd.concat([tags, corrections])
+    return tags
 
 
 def _get_tags(file_name: str, table_dimensions_ferc1: pd.DataFrame) -> pd.DataFrame:
@@ -1251,7 +1243,7 @@ def _aggregatable_dimension_tags(
         )
         .set_index(idx)
     )
-    # don't include the corrections
+    # don't include the corrections because we will add those in later
     table_dimensions_ferc1 = table_dimensions_ferc1[
         ~table_dimensions_ferc1.xbrl_factoid.str.endswith("_correction")
     ].set_index(idx)
@@ -1267,6 +1259,64 @@ def _aggregatable_dimension_tags(
     return tags_df[tags_df[aggregatable_col] != "total"]
 
 
+def make_correction_tags(
+    tags_all: pd.DataFrame, calc_components: pd.DataFrame
+) -> pd.DataFrame:
+    """Make tags for correction records.
+
+    We need to check to see if any of the tags in each of the calculated
+    parent factoids are the same for all of their child components. So in this
+    function, we're going to merge on the tags to the children then groupby the
+    parents. For each tag, see if the childrens'tags contains only one unique value.
+    If so grab the tag to associate with the correction record of the parent. If not,
+    no tag will be associated with the record.
+    """
+    tag_idx = list(NodeId._fields)
+    calcs_w_tags = (
+        pd.merge(  # remove the correction records bc those are the ones we want to
+            calc_components[~calc_components.xbrl_factoid.str.contains("_correction")],
+            tags_all,
+            on=tag_idx,
+            how="left",
+            validate="m:1",
+        )
+    )
+    # use the same groupby to get the number of unique tags and the first one
+    # we will only use the first tag if the tags are unique
+    tag_cols = list(tags_all.drop(columns=tag_idx).columns)
+    tag_gb = calcs_w_tags.groupby([f"{c}_parent" for c in tag_idx], dropna=False)[
+        tag_cols
+    ]
+    tag_check = pd.merge(
+        tag_gb.nunique(
+            dropna=False
+        ),  # bc if null and non-null tag we want to know that
+        tag_gb.first(),
+        right_index=True,
+        left_index=True,
+        suffixes=("_n", ""),
+        validate="1:1",
+    )
+    # null out all of the tags that have non-unique tags for each parent
+    for col in tag_cols:
+        non_unique_mask = tag_check[f"{col}_n"] != 1
+        tag_check.loc[non_unique_mask, col] = pd.NA
+    # specifically for in_rate_base assign partial when it is a mix
+    tag_check.loc[tag_check["in_rate_base_n"] > 1, "in_rate_base"] = "partial"
+    # remove the fully null tags bc there's nothing new in there and
+    # drop all of the _n columns
+    tag_check = tag_check.dropna(how="all", subset=tag_cols)[tag_cols]
+    # remove the parent from the index name
+    tag_check.index.names = [
+        col.removesuffix("_parent") for col in tag_check.index.names
+    ]
+    correction_tags = tag_check.reset_index().assign(
+        xbrl_factoid=lambda x: x.xbrl_factoid + "_correction"
+    )
+    logger.info(f"Found {len(correction_tags)=}")
+    return correction_tags
+
+
 def exploded_table_asset_factory(
     root_table: str,
     table_names: list[str],
@@ -2794,6 +2844,7 @@ def nodes_to_df(calc_forest: nx.DiGraph, nodes: list[NodeId]) -> pd.DataFrame:
     return pd.concat([index, tags], axis="columns")
 
 
+@asset
 def out_ferc1__yearly_rate_base(
     exploded_balance_sheet_assets_ferc1: pd.DataFrame,
     exploded_balance_sheet_liabilities_ferc1: pd.DataFrame,
@@ -2822,16 +2873,16 @@ def out_ferc1__yearly_rate_base(
     xbrl_factoid_name = pudl.transform.ferc1.FERC1_TFR_CLASSES[
         "core_ferc1__yearly_operating_expenses_sched320"
     ]().params.xbrl_factoid_name
-    # First grab the cash on hand out of the operating expense table.
-    # then prep it for concating. Calculate cash on hand & add tags
+    # First grab the working capital out of the operating expense table.
+    # then prep it for concating. Calculate working capital & add tags
     cash_working_capital = (
         core_ferc1__yearly_operating_expenses_sched320[
             core_ferc1__yearly_operating_expenses_sched320[xbrl_factoid_name]
             == "operations_and_maintenance_expenses_electric"
         ]
         .assign(
-            dollar_value=lambda x: x.dollar_value / 8,
-            xbrl_factoid="cash_on_hand",  # newly definied (do we need to add it anywhere?)
+            dollar_value=lambda x: x.dollar_value.divide(8),
+            xbrl_factoid="cash_working_capital",  # newly definied (do we need to add it anywhere?)
             tags_rate_base_category="net_working_capital",
             tags_aggregatable_utility_type="electric",
             table_name="core_ferc1__yearly_operating_expenses_sched320",
@@ -2845,15 +2896,19 @@ def out_ferc1__yearly_rate_base(
         pd.concat(
             [
                 exploded_balance_sheet_assets_ferc1[
-                    exploded_balance_sheet_assets_ferc1.tags_in_rate_base == "yes"
+                    exploded_balance_sheet_assets_ferc1.tags_in_rate_base.isin(
+                        ["yes", "partial"]
+                    )
                 ],
                 exploded_balance_sheet_liabilities_ferc1[
-                    exploded_balance_sheet_liabilities_ferc1.tags_in_rate_base == "yes"
+                    exploded_balance_sheet_liabilities_ferc1.tags_in_rate_base.isin(
+                        ["yes", "partial"]
+                    )
                 ],
                 cash_working_capital,
             ]
         )
-        .drop(columns=["tags_in_rate_base"])
+        # .drop(columns=["tags_in_rate_base"])
         .sort_values(
             by=["report_year", "utility_id_ferc1", "table_name"], ascending=False
         )

From 80ccf5d8ceceb20747dd14ba6c559da9fb678215 Mon Sep 17 00:00:00 2001
From: Dazhong Xia <dazhong.xia@catalyst.coop>
Date: Wed, 17 Jan 2024 16:21:19 -0500
Subject: [PATCH 06/17] Add a simple XbrlCalculationForest test.

Just see if we can get an annotated forest at all right now. TODO: test for tag propagation behavior.

Co-authored-by: Christina Gosnell <cmgosnell@users.noreply.github.com>
---
 src/pudl/output/ferc1.py       | 16 +++---
 test/unit/output/ferc1_test.py | 99 ++++++++++++++++++----------------
 2 files changed, 62 insertions(+), 53 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index 6955f00317..f8145c05c0 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -2088,6 +2088,7 @@ class XbrlCalculationForestFerc1(BaseModel):
     exploded_calcs: pd.DataFrame = pd.DataFrame()
     seeds: list[NodeId] = []
     tags: pd.DataFrame = pd.DataFrame()
+    # TODO: remove the group metric checks and see if things still build / tests still pass
     group_metric_checks: GroupMetricChecks = GroupMetricChecks()
     model_config = ConfigDict(
         arbitrary_types_allowed=True, ignored_types=(cached_property,)
@@ -2203,14 +2204,13 @@ def exploded_calcs_to_digraph(
         Then we compile a dictionary of node attributes, based on the individual
         calculation components in the exploded calcs dataframe.
         """
-        source_nodes = list(
-            exploded_calcs.loc[:, self.parent_cols]
-            .rename(columns=lambda x: x.removesuffix("_parent"))
-            .itertuples(name="NodeId", index=False)
-        )
-        target_nodes = list(
-            exploded_calcs.loc[:, self.calc_cols].itertuples(name="NodeId", index=False)
-        )
+        source_nodes = [
+            NodeId(*x)
+            for x in exploded_calcs.set_index(self.parent_cols).index.to_list()
+        ]
+        target_nodes = [
+            NodeId(*x) for x in exploded_calcs.set_index(self.calc_cols).index.to_list()
+        ]
         edgelist = pd.DataFrame({"source": source_nodes, "target": target_nodes})
         forest = nx.from_pandas_edgelist(edgelist, create_using=nx.DiGraph)
         return forest
diff --git a/test/unit/output/ferc1_test.py b/test/unit/output/ferc1_test.py
index b7c02312a0..f5947b8d31 100644
--- a/test/unit/output/ferc1_test.py
+++ b/test/unit/output/ferc1_test.py
@@ -18,59 +18,68 @@
 
 """
 
-import json
 import logging
 
 import pandas as pd
 
-# from pudl.output.ferc1 import NodeId, XbrlCalculationForestFerc1
+from pudl.output.ferc1 import NodeId, XbrlCalculationForestFerc1
 
 logger = logging.getLogger(__name__)
 
-EXPLODED_META_IDX = ["table_name", "xbrl_factoid"]
-TEST_CALC_1 = [
-    {"name": "reported_1", "weight": 1.0, "source_tables": ["table_1"]},
-    {"name": "reported_2", "weight": -1.0, "source_tables": ["table_1"]},
-]
 
-TEST_CALC_2 = [
-    {"name": "reported_1", "weight": 1.0, "source_tables": ["table_1", "table_2"]},
-    {"name": "reported_2", "weight": -1.0, "source_tables": ["table_1"]},
-]
-
-TEST_CALC_3 = [
-    {"name": "reported_1", "weight": 1.0, "source_tables": ["table_1"]},
-    {"name": "reported_3", "weight": 1.0, "source_tables": ["table_3"]},
-]
+# TODO: give this a better name once we know what behavior we're actually testing
+def test_annotated_forest():
+    tags = pd.DataFrame(
+        columns=[
+            "table_name",
+            "xbrl_factoid",
+            "utility_type",
+            "plant_status",
+            "plant_function",
+        ]
+    )
+    parent = NodeId(
+        table_name="table_1",
+        xbrl_factoid="reported_1",
+        utility_type="electric",
+        plant_status=pd.NA,
+        plant_function=pd.NA,
+    )
+    child1 = NodeId(
+        table_name="table_1",
+        xbrl_factoid="reported_1_1",
+        utility_type="electric",
+        plant_status=pd.NA,
+        plant_function=pd.NA,
+    )
+    child2 = NodeId(
+        table_name="table_1",
+        xbrl_factoid="reported_1_2",
+        utility_type="electric",
+        plant_status=pd.NA,
+        plant_function=pd.NA,
+    )
 
-TEST_EXPLODED_META: pd.DataFrame = (
-    pd.DataFrame(
-        columns=["table_name", "xbrl_factoid", "calculations", "xbrl_factoid_original"],
-        data=[
-            ("table_1", "reported_1", "[]", "reported_original_1"),
-            ("table_1", "reported_2", "[]", "reported_original_2"),
-            ("table_1", "calc_1", json.dumps(TEST_CALC_1), "calc_original_1"),
-            ("table_2", "calc_2", json.dumps(TEST_CALC_2), "calc_original_2"),
-            ("table_1", "calc_3", json.dumps(TEST_CALC_3), "calc_original_3"),
-        ],
+    edges = [(parent, child1), (parent, child2)]
+    records = []
+    for parent, child in edges:
+        record = {"weight": 1}
+        for field in NodeId._fields:
+            record[f"{field}_parent"] = parent.__getattribute__(field)
+            record[field] = child.__getattribute__(field)
+        records.append(record)
+    dtype_child = {col: pd.StringDtype() for col in NodeId._fields}
+    dtype_parent = {f"{col}_parent": pd.StringDtype() for col in NodeId._fields}
+    dtype_weight = {"weight": pd.Int64Dtype()}
+    exploded_calcs = pd.DataFrame.from_records(records).astype(
+        dtype_child | dtype_parent | dtype_weight
     )
-    .convert_dtypes()
-    .set_index(EXPLODED_META_IDX)
-)
+    exploded_meta = pd.DataFrame([parent, child1, child2]).astype(dtype_child)
 
-# LEAF_NODE_1 = XbrlCalculationForestFerc1(
-#    exploded_meta=TEST_EXPLODED_META,
-#    seeds=[NodeId("table_1", "reported_1")],
-# )
-# LEAF_NODE_2 = XbrlCalculationForestFerc1(
-#    exploded_meta=TEST_EXPLODED_META,
-#    seeds=[NodeId("table_1", "reported_2")],
-# )
-# CALC_TREE_1 = XbrlCalculationForestFerc1(
-#    exploded_meta=TEST_EXPLODED_META,
-#    seeds=[NodeId("table_1", "calc_1")],
-# )
-# CALC_TREE_2 = XbrlCalculationForestFerc1(
-#    exploded_meta=TEST_EXPLODED_META,
-#    seeds=[NodeId("table_2", "calc_2")],
-# )
+    simple_forest = XbrlCalculationForestFerc1(
+        exploded_meta=exploded_meta,
+        exploded_calcs=exploded_calcs,
+        seeds=[parent],
+        tags=tags,
+    )
+    assert len(simple_forest.annotated_forest.nodes) == 3

From 50615cbab89895e13010d47239e90acdf3213454 Mon Sep 17 00:00:00 2001
From: Dazhong Xia <dazhong.xia@catalyst.coop>
Date: Wed, 17 Jan 2024 16:48:52 -0500
Subject: [PATCH 07/17] WIP: write down some to-dos for test cases.

---
 test/unit/output/ferc1_test.py | 53 +++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 13 deletions(-)

diff --git a/test/unit/output/ferc1_test.py b/test/unit/output/ferc1_test.py
index f5947b8d31..e1c427415f 100644
--- a/test/unit/output/ferc1_test.py
+++ b/test/unit/output/ferc1_test.py
@@ -20,6 +20,7 @@
 
 import logging
 
+import networkx as nx
 import pandas as pd
 
 from pudl.output.ferc1 import NodeId, XbrlCalculationForestFerc1
@@ -27,17 +28,11 @@
 logger = logging.getLogger(__name__)
 
 
-# TODO: give this a better name once we know what behavior we're actually testing
-def test_annotated_forest():
-    tags = pd.DataFrame(
-        columns=[
-            "table_name",
-            "xbrl_factoid",
-            "utility_type",
-            "plant_status",
-            "plant_function",
-        ]
-    )
+# TODO: combine these into a class because we have a lot of similar method names
+# TODO: make graph construction easier with helper functions
+
+
+def test_annotated_forest_propagates_leafward():
     parent = NodeId(
         table_name="table_1",
         xbrl_factoid="reported_1",
@@ -71,15 +66,47 @@ def test_annotated_forest():
     dtype_child = {col: pd.StringDtype() for col in NodeId._fields}
     dtype_parent = {f"{col}_parent": pd.StringDtype() for col in NodeId._fields}
     dtype_weight = {"weight": pd.Int64Dtype()}
+
     exploded_calcs = pd.DataFrame.from_records(records).astype(
         dtype_child | dtype_parent | dtype_weight
     )
     exploded_meta = pd.DataFrame([parent, child1, child2]).astype(dtype_child)
-
+    tags = pd.DataFrame([parent]).assign(in_rate_base="yes")
     simple_forest = XbrlCalculationForestFerc1(
         exploded_meta=exploded_meta,
         exploded_calcs=exploded_calcs,
         seeds=[parent],
         tags=tags,
     )
-    assert len(simple_forest.annotated_forest.nodes) == 3
+    annotated_forest = simple_forest.annotated_forest
+    assert len(annotated_forest.nodes) == 3
+    annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+    assert annotated_tags[parent]["in_rate_base"] == "yes"
+    assert (
+        annotated_tags[parent]["in_rate_base"] == annotated_tags[child1]["in_rate_base"]
+    )
+    assert (
+        annotated_tags[parent]["in_rate_base"] == annotated_tags[child2]["in_rate_base"]
+    )
+
+
+def test_annotated_forest_propagates_rootward():
+    pass
+
+
+def test_annotated_forest_propagates_corrections():
+    pass
+
+
+def test_annotate_forest_propagates_both_dirs_with_corrections():
+    pass
+
+
+def test_annotate_forest_does_not_propagate():
+    # if a parent has two disagreeing children
+    pass
+
+
+def test_annoted_forest_does_propagate_null_and_value():
+    # if a parent has some children with one value and some with nulls
+    pass

From 1ee6c7cae17be052260d9d06d07a2a8a7062b4b3 Mon Sep 17 00:00:00 2001
From: Dazhong Xia <dazhong.xia@catalyst.coop>
Date: Thu, 18 Jan 2024 17:22:02 -0500
Subject: [PATCH 08/17] Get leafward propagation working

---
 src/pudl/output/ferc1.py       |  25 ++++
 test/unit/output/ferc1_test.py | 237 ++++++++++++++++++++++++---------
 2 files changed, 197 insertions(+), 65 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index f8145c05c0..f6f55893ae 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -2312,6 +2312,7 @@ def annotated_forest(self: Self) -> nx.DiGraph:
         annotated_forest = deepcopy(self.forest)
         nx.set_node_attributes(annotated_forest, self.node_attrs)
         nx.set_edge_attributes(annotated_forest, self.edge_attrs)
+        annotated_forest = self.propagate_tags(annotated_forest)
 
         logger.info("Checking whether any pruned nodes were also tagged.")
         self.check_lost_tags(lost_nodes=self.pruned)
@@ -2320,6 +2321,30 @@ def annotated_forest(self: Self) -> nx.DiGraph:
         self.check_conflicting_tags(annotated_forest)
         return annotated_forest
 
+    def propagate_tags(self: Self, annotated_forest: nx.DiGraph):
+        """Propagate tags.
+
+        Propagate tags leafwards, rootward &  to the _correction nodes.
+        """
+        existing_tags = nx.get_node_attributes(annotated_forest, "tags")
+        leafward_inherited_tags = ["in_rate_base"]
+
+        for node, parent_tags in existing_tags.items():
+            descendants = nx.descendants(annotated_forest, node)
+            descendant_tags = {
+                desc: {
+                    "tags": {
+                        tag_name: parent_tags[tag_name]
+                        for tag_name in leafward_inherited_tags
+                        if tag_name in parent_tags
+                    }
+                    | existing_tags.get(desc, {})
+                }
+                for desc in descendants
+            }
+            nx.set_node_attributes(annotated_forest, descendant_tags)
+        return annotated_forest
+
     def check_lost_tags(self: Self, lost_nodes: list[NodeId]) -> None:
         """Check whether any of the input lost nodes were also tagged nodes."""
         if lost_nodes:
diff --git a/test/unit/output/ferc1_test.py b/test/unit/output/ferc1_test.py
index e1c427415f..a7b9f594ab 100644
--- a/test/unit/output/ferc1_test.py
+++ b/test/unit/output/ferc1_test.py
@@ -19,9 +19,11 @@
 """
 
 import logging
+import unittest
 
 import networkx as nx
 import pandas as pd
+import pytest
 
 from pudl.output.ferc1 import NodeId, XbrlCalculationForestFerc1
 
@@ -32,62 +34,177 @@
 # TODO: make graph construction easier with helper functions
 
 
-def test_annotated_forest_propagates_leafward():
-    parent = NodeId(
-        table_name="table_1",
-        xbrl_factoid="reported_1",
-        utility_type="electric",
-        plant_status=pd.NA,
-        plant_function=pd.NA,
-    )
-    child1 = NodeId(
-        table_name="table_1",
-        xbrl_factoid="reported_1_1",
-        utility_type="electric",
-        plant_status=pd.NA,
-        plant_function=pd.NA,
-    )
-    child2 = NodeId(
-        table_name="table_1",
-        xbrl_factoid="reported_1_2",
-        utility_type="electric",
-        plant_status=pd.NA,
-        plant_function=pd.NA,
-    )
-
-    edges = [(parent, child1), (parent, child2)]
-    records = []
-    for parent, child in edges:
-        record = {"weight": 1}
-        for field in NodeId._fields:
-            record[f"{field}_parent"] = parent.__getattribute__(field)
-            record[field] = child.__getattribute__(field)
-        records.append(record)
-    dtype_child = {col: pd.StringDtype() for col in NodeId._fields}
-    dtype_parent = {f"{col}_parent": pd.StringDtype() for col in NodeId._fields}
-    dtype_weight = {"weight": pd.Int64Dtype()}
-
-    exploded_calcs = pd.DataFrame.from_records(records).astype(
-        dtype_child | dtype_parent | dtype_weight
-    )
-    exploded_meta = pd.DataFrame([parent, child1, child2]).astype(dtype_child)
-    tags = pd.DataFrame([parent]).assign(in_rate_base="yes")
-    simple_forest = XbrlCalculationForestFerc1(
-        exploded_meta=exploded_meta,
-        exploded_calcs=exploded_calcs,
-        seeds=[parent],
-        tags=tags,
-    )
-    annotated_forest = simple_forest.annotated_forest
-    assert len(annotated_forest.nodes) == 3
-    annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
-    assert annotated_tags[parent]["in_rate_base"] == "yes"
-    assert (
-        annotated_tags[parent]["in_rate_base"] == annotated_tags[child1]["in_rate_base"]
-    )
-    assert (
-        annotated_tags[parent]["in_rate_base"] == annotated_tags[child2]["in_rate_base"]
+class TestTagPropagation(unittest.TestCase):
+    def setUp(self):
+        self.parent = NodeId(
+            table_name="table_1",
+            xbrl_factoid="reported_1",
+            utility_type="electric",
+            plant_status=pd.NA,
+            plant_function=pd.NA,
+        )
+        self.child1 = NodeId(
+            table_name="table_1",
+            xbrl_factoid="reported_1_1",
+            utility_type="electric",
+            plant_status=pd.NA,
+            plant_function=pd.NA,
+        )
+        self.child2 = NodeId(
+            table_name="table_1",
+            xbrl_factoid="reported_1_2",
+            utility_type="electric",
+            plant_status=pd.NA,
+            plant_function=pd.NA,
+        )
+
+        dtype_node = {col: pd.StringDtype() for col in NodeId._fields}
+        self.exploded_meta = pd.DataFrame(
+            [self.parent, self.child1, self.child2]
+        ).astype(dtype_node)
+
+    def _exploded_calcs_from_edges(self, edges: list[tuple[NodeId, NodeId]]):
+        records = []
+        for parent, child in edges:
+            record = {"weight": 1}
+            for field in NodeId._fields:
+                record[f"{field}_parent"] = parent.__getattribute__(field)
+                record[field] = child.__getattribute__(field)
+            records.append(record)
+        dtype_parent = {f"{col}_parent": pd.StringDtype() for col in NodeId._fields}
+        dtype_child = {col: pd.StringDtype() for col in NodeId._fields}
+        dtype_weight = {"weight": pd.Int64Dtype()}
+
+        return pd.DataFrame.from_records(records).astype(
+            dtype_child | dtype_parent | dtype_weight
+        )
+
+    def test_leafward_prop_undecided_children(self):
+        edges = [(self.parent, self.child1), (self.parent, self.child2)]
+        tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
+            in_rate_base=["yes", pd.NA, pd.NA]
+        )
+
+        simple_forest = XbrlCalculationForestFerc1(
+            exploded_meta=self.exploded_meta,
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=[self.parent],
+            tags=tags,
+        )
+
+        annotated_forest = simple_forest.annotated_forest
+        assert len(annotated_forest.nodes) == 3
+        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        assert annotated_tags[self.parent]["in_rate_base"] == "yes"
+        assert annotated_tags[self.child1]["in_rate_base"] == "yes"
+        assert annotated_tags[self.child2]["in_rate_base"] == "yes"
+
+    def test_leafward_prop_disagreeing_child(self):
+        """Don't force the diagreeing child to follow the parent."""
+        edges = [(self.parent, self.child1), (self.parent, self.child2)]
+        tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
+            in_rate_base=["yes", "no", pd.NA]
+        )
+
+        simple_forest = XbrlCalculationForestFerc1(
+            exploded_meta=self.exploded_meta,
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=[self.parent],
+            tags=tags,
+        )
+
+        annotated_forest = simple_forest.annotated_forest
+        assert len(annotated_forest.nodes) == 3
+        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        assert annotated_tags[self.parent]["in_rate_base"] == "yes"
+        assert annotated_tags[self.child1]["in_rate_base"] == "no"
+        assert annotated_tags[self.child2]["in_rate_base"] == "yes"
+
+    def test_leafward_prop_preserve_non_propagating_tags(self):
+        """Don't force the diagreeing child to follow the parent."""
+        edges = [(self.parent, self.child1), (self.parent, self.child2)]
+        tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
+            in_rate_base=["yes", "no", pd.NA],
+            in_root_boose=["yus", "nu", "purtiul"],
+        )
+
+        simple_forest = XbrlCalculationForestFerc1(
+            exploded_meta=self.exploded_meta,
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=[self.parent],
+            tags=tags,
+        )
+
+        annotated_forest = simple_forest.annotated_forest
+        assert len(annotated_forest.nodes) == 3
+        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        assert annotated_tags[self.parent]["in_rate_base"] == "yes"
+        assert annotated_tags[self.child1]["in_rate_base"] == "no"
+        assert annotated_tags[self.child2]["in_rate_base"] == "yes"
+        assert annotated_tags[self.parent]["in_root_boose"] == "yus"
+        assert annotated_tags[self.child1]["in_root_boose"] == "nu"
+        assert annotated_tags[self.child2]["in_root_boose"] == "purtiul"
+
+    def test_rootward_prop_disagreeing_children(self):
+        """Parents should not pick sides between disagreeing children."""
+        edges = [(self.parent, self.child1), (self.parent, self.child2)]
+        tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
+            in_rate_base=[pd.NA, "no", "yes"]
+        )
+
+        simple_forest = XbrlCalculationForestFerc1(
+            exploded_meta=self.exploded_meta,
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=[self.parent],
+            tags=tags,
+        )
+
+        annotated_forest = simple_forest.annotated_forest
+        assert len(annotated_forest.nodes) == 3
+        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        assert annotated_tags[self.parent] == {}
+        assert annotated_tags[self.child1]["in_rate_base"] == "no"
+        assert annotated_tags[self.child2]["in_rate_base"] == "yes"
+
+    @pytest.mark.xfail(
+        reason="we haven't implemented this behavior correctly yet", strict=True
     )
+    def test_prop_no_tags(self):
+        """If no tags, don't propagate anything."""
+        edges = [(self.parent, self.child1), (self.parent, self.child2)]
+        tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
+            in_rate_base=[pd.NA, pd.NA, pd.NA]
+        )
+
+        simple_forest = XbrlCalculationForestFerc1(
+            exploded_meta=self.exploded_meta,
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=[self.parent],
+            tags=tags,
+        )
+
+        annotated_forest = simple_forest.annotated_forest
+        assert len(annotated_forest.nodes) == 3
+        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        assert annotated_tags[self.parent] == {}
+        assert annotated_tags[self.child1] == {}
+        assert annotated_tags[self.child2] == {}
+
+        tags = pd.DataFrame(columns=NodeId._fields).convert_dtypes()
+
+        simple_forest = XbrlCalculationForestFerc1(
+            exploded_meta=self.exploded_meta,
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=[self.parent],
+            tags=tags,
+        )
+
+        annotated_forest = simple_forest.annotated_forest
+        assert len(annotated_forest.nodes) == 3
+        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        assert annotated_tags[self.parent] == {}
+        assert annotated_tags[self.child1] == {}
+        assert annotated_tags[self.child2] == {}
 
 
 def test_annotated_forest_propagates_rootward():
@@ -100,13 +217,3 @@ def test_annotated_forest_propagates_corrections():
 
 def test_annotate_forest_propagates_both_dirs_with_corrections():
     pass
-
-
-def test_annotate_forest_does_not_propagate():
-    # if a parent has two disagreeing children
-    pass
-
-
-def test_annoted_forest_does_propagate_null_and_value():
-    # if a parent has some children with one value and some with nulls
-    pass

From 9b19f8b1d5e6feaabc0cfc78e9e51a36363be88a Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Thu, 25 Jan 2024 17:32:43 -0500
Subject: [PATCH 09/17] first pass of adding leafward tags one layer and an
 attempt at a recursive method

---
 src/pudl/output/ferc1.py       |  87 +++++++++++++++++++-
 test/unit/output/ferc1_test.py | 141 ++++++++++++++++++++++++++++++++-
 2 files changed, 223 insertions(+), 5 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index 7c955ec219..36ab3ee632 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -2327,8 +2327,8 @@ def propagate_tags(self: Self, annotated_forest: nx.DiGraph):
         Propagate tags leafwards, rootward &  to the _correction nodes.
         """
         existing_tags = nx.get_node_attributes(annotated_forest, "tags")
+        ## Leafwards propagation
         leafward_inherited_tags = ["in_rate_base"]
-
         for node, parent_tags in existing_tags.items():
             descendants = nx.descendants(annotated_forest, node)
             descendant_tags = {
@@ -2343,6 +2343,63 @@ def propagate_tags(self: Self, annotated_forest: nx.DiGraph):
                 for desc in descendants
             }
             nx.set_node_attributes(annotated_forest, descendant_tags)
+
+        # Rootward propagation
+        existing_tags = nx.get_node_attributes(annotated_forest, "tags")
+        rootward_tags = {}
+        rootward_inherited_tags = ["in_rate_base"]
+        for node in existing_tags:
+            # what node is your successor node?
+            # does that sucessor node have children that all have the same tag?
+            # if so then apply that tag to the sucessor
+            # print(nx.ancestors(simple_forest.forest, node))
+
+            # we assume that no nodes have multiple parents
+            parents = list(annotated_forest.predecessors(node))
+            # if you have no parents then no need to check nothing
+            if len(parents) == 0:
+                continue
+            assert len(parents) == 1
+            parent = parents[0]
+            sibling_tags = {
+                sib_node: existing_tags.get(sib_node, {})
+                for sib_node in annotated_forest.successors(parent)
+                if not sib_node.xbrl_factoid.endswith("_correction")
+            }
+            for rootward_tag in rootward_inherited_tags:
+                sibling_tag_values = {
+                    # must return na bc we don't want to propagate unless all siblings
+                    # have same tag
+                    sibling_tag.get(rootward_tag, pd.NA)
+                    for sibling_tag in sibling_tags.values()
+                }
+                if len(sibling_tag_values) == 1:
+                    parent_tags = {
+                        parent: {
+                            "tags": {rootward_tag: sibling_tag_values.pop()}
+                            | existing_tags.get(parent, {})
+                        }
+                    }
+                rootward_tags = rootward_tags | parent_tags
+        nx.set_node_attributes(annotated_forest, rootward_tags)
+        # Correction Records
+        existing_tags = nx.get_node_attributes(annotated_forest, "tags")
+        correction_nodes = [
+            node
+            for node in annotated_forest
+            if node.xbrl_factoid.endswith("_correction")
+        ]
+        correction_tags = {}
+        for correction_node in correction_nodes:
+            # for every correction node, we assume that that nodes parent tags can apply
+            parents = list(annotated_forest.predecessors(correction_node))
+            # all correction records shoul have a parent and only one
+            assert len(parents) == 1
+            parent = parents[0]
+            correction_tags[correction_node] = {
+                "tags": existing_tags.get(parent, {})
+                | existing_tags.get(correction_node, {})
+            }
         return annotated_forest
 
     def check_lost_tags(self: Self, lost_nodes: list[NodeId]) -> None:
@@ -2869,6 +2926,34 @@ def nodes_to_df(calc_forest: nx.DiGraph, nodes: list[NodeId]) -> pd.DataFrame:
     return pd.concat([index, tags], axis="columns")
 
 
+def aggregate_child_tags(
+    annotated_forest, node, tag_name: Literal["in_rate_base"]
+) -> dict:
+    """Set the tags for nodes when all of its children have same tag."""
+    tag = pd.NA
+    # i'm a leaf so i stop looking
+    if not annotated_forest.successors(node):
+        tag = annotated_forest.get(node, {}).get(tag_name, pd.NA)
+    # if i have a value you don't need to keep looking at this nodes childern
+    elif annotated_forest.get(node, {}).get(tag_name, pd.NA) != pd.NA:
+        tag = annotated_forest[node][tag_name]
+    else:
+        child_tags = {}
+        for child_node in annotated_forest.successors(node):
+            child_tags.add(aggregate_child_tags(annotated_forest, child_node, tag_name))
+        # if all the children tags are the same and non-null
+        if (len(child_tags) == 1) and {t for t in child_tags if not pd.isna(t)}:
+            new_node_tag = child_tags.pop()
+            # actually assign the tag here but don't wipe out any other tags
+            existing_tags = nx.get_node_attributes(annotated_forest, "tags")
+            node_tags = {
+                node: {"tags": {tag_name: new_node_tag} | existing_tags.get(node, {})}
+            }
+            nx.set_node_attributes(annotated_forest, node_tags)
+            tag = new_node_tag
+    return tag
+
+
 @asset
 def out_ferc1__yearly_rate_base(
     exploded_balance_sheet_assets_ferc1: pd.DataFrame,
diff --git a/test/unit/output/ferc1_test.py b/test/unit/output/ferc1_test.py
index a7b9f594ab..e6e0b9ae09 100644
--- a/test/unit/output/ferc1_test.py
+++ b/test/unit/output/ferc1_test.py
@@ -43,6 +43,13 @@ def setUp(self):
             plant_status=pd.NA,
             plant_function=pd.NA,
         )
+        self.parent_correction = NodeId(
+            table_name="table_1",
+            xbrl_factoid="reported_1_correction",
+            utility_type="electric",
+            plant_status=pd.NA,
+            plant_function=pd.NA,
+        )
         self.child1 = NodeId(
             table_name="table_1",
             xbrl_factoid="reported_1_1",
@@ -57,10 +64,37 @@ def setUp(self):
             plant_status=pd.NA,
             plant_function=pd.NA,
         )
-
+        self.grand_child11 = NodeId(
+            table_name="table_1",
+            xbrl_factoid="reported_1_1_1",
+            utility_type="electric",
+            plant_status=pd.NA,
+            plant_function=pd.NA,
+        )
+        self.grand_child12 = NodeId(
+            table_name="table_1",
+            xbrl_factoid="reported_1_1_2",
+            utility_type="electric",
+            plant_status=pd.NA,
+            plant_function=pd.NA,
+        )
+        self.child1_correction = NodeId(
+            table_name="table_1",
+            xbrl_factoid="reported_1_1_correction",
+            utility_type="electric",
+            plant_status=pd.NA,
+            plant_function=pd.NA,
+        )
         dtype_node = {col: pd.StringDtype() for col in NodeId._fields}
         self.exploded_meta = pd.DataFrame(
-            [self.parent, self.child1, self.child2]
+            [
+                self.parent,
+                self.child1,
+                self.child2,
+                self.grand_child11,
+                self.grand_child12,
+                self.child1_correction,
+            ]
         ).astype(dtype_node)
 
     def _exploded_calcs_from_edges(self, edges: list[tuple[NodeId, NodeId]]):
@@ -206,9 +240,108 @@ def test_prop_no_tags(self):
         assert annotated_tags[self.child1] == {}
         assert annotated_tags[self.child2] == {}
 
+    def test_annotated_forest_propagates_rootward(self):
+        edges = [
+            (self.parent, self.child1),
+            (self.parent, self.child2),
+            (self.child1, self.grand_child11),
+            (self.child1, self.grand_child12),
+        ]
+        tags = pd.DataFrame([self.grand_child11, self.grand_child12]).assign(
+            in_rate_base=["yes", "yes"]
+        )
 
-def test_annotated_forest_propagates_rootward():
-    pass
+        simple_forest = XbrlCalculationForestFerc1(
+            exploded_meta=self.exploded_meta,
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=[self.parent],
+            tags=tags,
+        )
+        annotated_forest = simple_forest.annotated_forest
+        assert len(annotated_forest.nodes) == 5
+        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        # TODO: WHY THO it doesn't show up
+        # assert annotated_tags[self.parent] == {}
+        assert annotated_tags.get(self.parent, {}) == {}
+        assert annotated_tags[self.child1]["in_rate_base"] == "yes"
+        assert annotated_tags.get(self.child2, {}) == {}
+        assert annotated_tags[self.grand_child11]["in_rate_base"] == "yes"
+        assert annotated_tags[self.grand_child12]["in_rate_base"] == "yes"
+
+    def test_annotated_forest_propagates_rootward_disagreeing_sibling(self):
+        edges = [
+            (self.parent, self.child1),
+            (self.parent, self.child2),
+            (self.child1, self.grand_child11),
+            (self.child1, self.grand_child12),
+        ]
+        tags = pd.DataFrame([self.grand_child11, self.grand_child12]).assign(
+            in_rate_base=["yes", "no"]
+        )
+
+        simple_forest = XbrlCalculationForestFerc1(
+            exploded_meta=self.exploded_meta,
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=[self.parent],
+            tags=tags,
+        )
+        annotated_forest = simple_forest.annotated_forest
+        assert len(annotated_forest.nodes) == 5
+        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        assert annotated_tags.get(self.parent, {}) == {}
+        assert annotated_tags.get(self.child1, {}) == {}
+        assert annotated_tags.get(self.child2, {}) == {}
+        assert annotated_tags[self.grand_child11]["in_rate_base"] == "yes"
+        assert annotated_tags[self.grand_child12]["in_rate_base"] == "no"
+
+    def test_annotated_forest_propagates_rootward_correction(self):
+        edges = [
+            (self.child1, self.grand_child11),
+            (self.child1, self.child1_correction),
+        ]
+        tags = pd.DataFrame([self.child1]).assign(in_rate_base=["yes"])
+
+        simple_forest = XbrlCalculationForestFerc1(
+            exploded_meta=self.exploded_meta,
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=[self.child1],
+            tags=tags,
+        )
+        annotated_forest = simple_forest.annotated_forest
+        assert len(annotated_forest.nodes) == 3
+        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        assert annotated_tags[self.child1]["in_rate_base"] == "yes"
+        assert annotated_tags[self.grand_child11]["in_rate_base"] == "yes"
+        assert annotated_tags[self.child1_correction]["in_rate_base"] == "yes"
+
+    @pytest.mark.xfail(
+        reason="we haven't implemented this behavior correctly yet", strict=True
+    )
+    def test_annotated_forest_propagates_rootward_two_layers(self):
+        edges = [
+            (self.parent, self.child1),
+            (self.parent, self.child2),
+            (self.child1, self.grand_child11),
+            (self.child1, self.grand_child12),
+        ]
+        pre_assigned_yes_nodes = [self.child2, self.grand_child11, self.grand_child12]
+        tags = pd.DataFrame(pre_assigned_yes_nodes).assign(
+            in_rate_base=["yes"] * len(pre_assigned_yes_nodes),
+        )
+
+        simple_forest = XbrlCalculationForestFerc1(
+            exploded_meta=self.exploded_meta,
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=[self.parent],
+            tags=tags,
+        )
+        annotated_forest = simple_forest.annotated_forest
+        assert len(annotated_forest.nodes) == 5
+        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        for pre_yes_node in pre_assigned_yes_nodes:
+            assert annotated_tags[pre_yes_node]["in_rate_base"] == "yes"
+        for post_yes_node in [self.child1, self.parent]:
+            assert annotated_tags[post_yes_node]["in_rate_base"] == "yes"
 
 
 def test_annotated_forest_propagates_corrections():

From d1347c17103ef4ec753129391c7d296d73dd8ac2 Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Thu, 25 Jan 2024 18:13:45 -0500
Subject: [PATCH 10/17] integrate the recursive tag propagation method

---
 src/pudl/output/ferc1.py       | 79 ++++++++++++++--------------------
 test/unit/output/ferc1_test.py | 36 ++++++++++++++--
 2 files changed, 66 insertions(+), 49 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index 36ab3ee632..230b76c26e 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -2345,43 +2345,10 @@ def propagate_tags(self: Self, annotated_forest: nx.DiGraph):
             nx.set_node_attributes(annotated_forest, descendant_tags)
 
         # Rootward propagation
-        existing_tags = nx.get_node_attributes(annotated_forest, "tags")
-        rootward_tags = {}
-        rootward_inherited_tags = ["in_rate_base"]
-        for node in existing_tags:
-            # what node is your successor node?
-            # does that sucessor node have children that all have the same tag?
-            # if so then apply that tag to the sucessor
-            # print(nx.ancestors(simple_forest.forest, node))
-
-            # we assume that no nodes have multiple parents
-            parents = list(annotated_forest.predecessors(node))
-            # if you have no parents then no need to check nothing
-            if len(parents) == 0:
-                continue
-            assert len(parents) == 1
-            parent = parents[0]
-            sibling_tags = {
-                sib_node: existing_tags.get(sib_node, {})
-                for sib_node in annotated_forest.successors(parent)
-                if not sib_node.xbrl_factoid.endswith("_correction")
-            }
-            for rootward_tag in rootward_inherited_tags:
-                sibling_tag_values = {
-                    # must return na bc we don't want to propagate unless all siblings
-                    # have same tag
-                    sibling_tag.get(rootward_tag, pd.NA)
-                    for sibling_tag in sibling_tags.values()
-                }
-                if len(sibling_tag_values) == 1:
-                    parent_tags = {
-                        parent: {
-                            "tags": {rootward_tag: sibling_tag_values.pop()}
-                            | existing_tags.get(parent, {})
-                        }
-                    }
-                rootward_tags = rootward_tags | parent_tags
-        nx.set_node_attributes(annotated_forest, rootward_tags)
+        root_node = self.roots(annotated_forest)[0]
+        _ = recursively_propagate_tags_leafward(
+            annotated_forest, root_node, "in_rate_base"
+        )
         # Correction Records
         existing_tags = nx.get_node_attributes(annotated_forest, "tags")
         correction_nodes = [
@@ -2400,6 +2367,7 @@ def propagate_tags(self: Self, annotated_forest: nx.DiGraph):
                 "tags": existing_tags.get(parent, {})
                 | existing_tags.get(correction_node, {})
             }
+        nx.set_node_attributes(annotated_forest, correction_tags)
         return annotated_forest
 
     def check_lost_tags(self: Self, lost_nodes: list[NodeId]) -> None:
@@ -2926,24 +2894,43 @@ def nodes_to_df(calc_forest: nx.DiGraph, nodes: list[NodeId]) -> pd.DataFrame:
     return pd.concat([index, tags], axis="columns")
 
 
-def aggregate_child_tags(
+def recursively_propagate_tags_leafward(
     annotated_forest, node, tag_name: Literal["in_rate_base"]
-) -> dict:
-    """Set the tags for nodes when all of its children have same tag."""
+):
+    """Set the tags for nodes when all of its children have same tag.
+
+    This function returns the value of a tag.
+    """
+
+    def _get_tag(annotated_forest, node, tag_name):
+        return annotated_forest.nodes.get(node, {}).get("tags", {}).get(tag_name, pd.NA)
+
+    logger.info(f"propagaging tags leafward from {node}")
     tag = pd.NA
     # i'm a leaf so i stop looking
-    if not annotated_forest.successors(node):
-        tag = annotated_forest.get(node, {}).get(tag_name, pd.NA)
+    if not list(annotated_forest.successors(node)):
+        tag = _get_tag(annotated_forest, node, tag_name)
+        logger.info(f"    We found a leaf people. w/ {tag=}")
     # if i have a value you don't need to keep looking at this nodes childern
-    elif annotated_forest.get(node, {}).get(tag_name, pd.NA) != pd.NA:
-        tag = annotated_forest[node][tag_name]
+    elif not pd.isna(_get_tag(annotated_forest, node, tag_name)):
+        tag = _get_tag(annotated_forest, node, tag_name)
+        logger.info(f"    We found a node w/ tags. w/ {tag=}")
     else:
-        child_tags = {}
+        child_tags = set()
         for child_node in annotated_forest.successors(node):
-            child_tags.add(aggregate_child_tags(annotated_forest, child_node, tag_name))
+            if not child_node.xbrl_factoid.endswith("_correction"):
+                child_tags.add(
+                    recursively_propagate_tags_leafward(
+                        annotated_forest, child_node, tag_name
+                    )
+                )
+        logger.info(f"   found {child_tags=}")
         # if all the children tags are the same and non-null
         if (len(child_tags) == 1) and {t for t in child_tags if not pd.isna(t)}:
             new_node_tag = child_tags.pop()
+            logger.info(
+                f"    We found a node consitent children tags. w/ {new_node_tag=}"
+            )
             # actually assign the tag here but don't wipe out any other tags
             existing_tags = nx.get_node_attributes(annotated_forest, "tags")
             node_tags = {
diff --git a/test/unit/output/ferc1_test.py b/test/unit/output/ferc1_test.py
index e6e0b9ae09..35f0ac6829 100644
--- a/test/unit/output/ferc1_test.py
+++ b/test/unit/output/ferc1_test.py
@@ -314,9 +314,6 @@ def test_annotated_forest_propagates_rootward_correction(self):
         assert annotated_tags[self.grand_child11]["in_rate_base"] == "yes"
         assert annotated_tags[self.child1_correction]["in_rate_base"] == "yes"
 
-    @pytest.mark.xfail(
-        reason="we haven't implemented this behavior correctly yet", strict=True
-    )
     def test_annotated_forest_propagates_rootward_two_layers(self):
         edges = [
             (self.parent, self.child1),
@@ -343,6 +340,39 @@ def test_annotated_forest_propagates_rootward_two_layers(self):
         for post_yes_node in [self.child1, self.parent]:
             assert annotated_tags[post_yes_node]["in_rate_base"] == "yes"
 
+    def test_annotated_forest_propagates_rootward_two_layers_plus_corrections(self):
+        edges = [
+            (self.parent, self.child1),
+            (self.parent, self.child2),
+            (self.parent, self.parent_correction),
+            (self.child1, self.grand_child11),
+            (self.child1, self.grand_child12),
+            (self.child1, self.child1_correction),
+        ]
+        pre_assigned_yes_nodes = [self.child2, self.grand_child11, self.grand_child12]
+        tags = pd.DataFrame(pre_assigned_yes_nodes).assign(
+            in_rate_base=["yes"] * len(pre_assigned_yes_nodes),
+        )
+
+        simple_forest = XbrlCalculationForestFerc1(
+            exploded_meta=self.exploded_meta,
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=[self.parent],
+            tags=tags,
+        )
+        annotated_forest = simple_forest.annotated_forest
+        assert len(annotated_forest.nodes) == 7
+        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        for pre_yes_node in pre_assigned_yes_nodes:
+            assert annotated_tags[pre_yes_node]["in_rate_base"] == "yes"
+        for post_yes_node in [
+            self.child1,
+            self.parent,
+            self.child1_correction,
+            self.parent_correction,
+        ]:
+            assert annotated_tags[post_yes_node]["in_rate_base"] == "yes"
+
 
 def test_annotated_forest_propagates_corrections():
     pass

From d1a42b4f71772e6a6c0702d6b09bf08fcb62398b Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Fri, 26 Jan 2024 11:41:46 -0500
Subject: [PATCH 11/17] remove old correction tagging and standardize unit
 tests a bit

---
 src/pudl/output/ferc1.py       |  61 --------------
 test/unit/output/ferc1_test.py | 149 ++++++++-------------------------
 2 files changed, 36 insertions(+), 174 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index 230b76c26e..fcb76a9faa 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -1195,9 +1195,6 @@ def _out_ferc1__explosion_tags(
         .reset_index()
         .drop(columns=["notes"])
     )
-    # Add the correction records to the tags...
-    corrections = make_correction_tags(tags, calculation_components_xbrl_ferc1)
-    tags = pd.concat([tags, corrections])
     return tags
 
 
@@ -1259,64 +1256,6 @@ def _aggregatable_dimension_tags(
     return tags_df[tags_df[aggregatable_col] != "total"]
 
 
-def make_correction_tags(
-    tags_all: pd.DataFrame, calc_components: pd.DataFrame
-) -> pd.DataFrame:
-    """Make tags for correction records.
-
-    We need to check to see if any of the tags in each of the calculated
-    parent factoids are the same for all of their child components. So in this
-    function, we're going to merge on the tags to the children then groupby the
-    parents. For each tag, see if the childrens'tags contains only one unique value.
-    If so grab the tag to associate with the correction record of the parent. If not,
-    no tag will be associated with the record.
-    """
-    tag_idx = list(NodeId._fields)
-    calcs_w_tags = (
-        pd.merge(  # remove the correction records bc those are the ones we want to
-            calc_components[~calc_components.xbrl_factoid.str.contains("_correction")],
-            tags_all,
-            on=tag_idx,
-            how="left",
-            validate="m:1",
-        )
-    )
-    # use the same groupby to get the number of unique tags and the first one
-    # we will only use the first tag if the tags are unique
-    tag_cols = list(tags_all.drop(columns=tag_idx).columns)
-    tag_gb = calcs_w_tags.groupby([f"{c}_parent" for c in tag_idx], dropna=False)[
-        tag_cols
-    ]
-    tag_check = pd.merge(
-        tag_gb.nunique(
-            dropna=False
-        ),  # bc if null and non-null tag we want to know that
-        tag_gb.first(),
-        right_index=True,
-        left_index=True,
-        suffixes=("_n", ""),
-        validate="1:1",
-    )
-    # null out all of the tags that have non-unique tags for each parent
-    for col in tag_cols:
-        non_unique_mask = tag_check[f"{col}_n"] != 1
-        tag_check.loc[non_unique_mask, col] = pd.NA
-    # specifically for in_rate_base assign partial when it is a mix
-    tag_check.loc[tag_check["in_rate_base_n"] > 1, "in_rate_base"] = "partial"
-    # remove the fully null tags bc there's nothing new in there and
-    # drop all of the _n columns
-    tag_check = tag_check.dropna(how="all", subset=tag_cols)[tag_cols]
-    # remove the parent from the index name
-    tag_check.index.names = [
-        col.removesuffix("_parent") for col in tag_check.index.names
-    ]
-    correction_tags = tag_check.reset_index().assign(
-        xbrl_factoid=lambda x: x.xbrl_factoid + "_correction"
-    )
-    logger.info(f"Found {len(correction_tags)=}")
-    return correction_tags
-
-
 def exploded_table_asset_factory(
     root_table: str,
     table_names: list[str],
diff --git a/test/unit/output/ferc1_test.py b/test/unit/output/ferc1_test.py
index 35f0ac6829..1d2b16ee19 100644
--- a/test/unit/output/ferc1_test.py
+++ b/test/unit/output/ferc1_test.py
@@ -25,6 +25,7 @@
 import pandas as pd
 import pytest
 
+from pudl.helpers import dedupe_n_flatten_list_of_lists
 from pudl.output.ferc1 import NodeId, XbrlCalculationForestFerc1
 
 logger = logging.getLogger(__name__)
@@ -113,22 +114,37 @@ def _exploded_calcs_from_edges(self, edges: list[tuple[NodeId, NodeId]]):
             dtype_child | dtype_parent | dtype_weight
         )
 
-    def test_leafward_prop_undecided_children(self):
-        edges = [(self.parent, self.child1), (self.parent, self.child2)]
-        tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
-            in_rate_base=["yes", pd.NA, pd.NA]
-        )
-
+    def build_forest_and_annotated_tags(
+        self, edges: list[tuple[NodeId, NodeId]], tags: pd.DataFrame, seeds=None
+    ):
+        """Build a forest, test forest nodes and return annotated tags.
+
+        Args:
+            edges: list of tuples
+            tags: dataframe of tags
+            seeds: list of seed nodes. Default is None and will assume seed node is
+                ``parent``.
+        """
+        if not seeds:
+            seeds = [self.parent]
         simple_forest = XbrlCalculationForestFerc1(
             exploded_meta=self.exploded_meta,
             exploded_calcs=self._exploded_calcs_from_edges(edges),
-            seeds=[self.parent],
+            seeds=seeds,
             tags=tags,
         )
-
         annotated_forest = simple_forest.annotated_forest
-        assert len(annotated_forest.nodes) == 3
+        # ensure no nodes got dropped
+        assert len(annotated_forest.nodes) == len(dedupe_n_flatten_list_of_lists(edges))
         annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        return annotated_tags
+
+    def test_leafward_prop_undecided_children(self):
+        edges = [(self.parent, self.child1), (self.parent, self.child2)]
+        tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
+            in_rate_base=["yes", pd.NA, pd.NA]
+        )
+        annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         assert annotated_tags[self.parent]["in_rate_base"] == "yes"
         assert annotated_tags[self.child1]["in_rate_base"] == "yes"
         assert annotated_tags[self.child2]["in_rate_base"] == "yes"
@@ -139,17 +155,7 @@ def test_leafward_prop_disagreeing_child(self):
         tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
             in_rate_base=["yes", "no", pd.NA]
         )
-
-        simple_forest = XbrlCalculationForestFerc1(
-            exploded_meta=self.exploded_meta,
-            exploded_calcs=self._exploded_calcs_from_edges(edges),
-            seeds=[self.parent],
-            tags=tags,
-        )
-
-        annotated_forest = simple_forest.annotated_forest
-        assert len(annotated_forest.nodes) == 3
-        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         assert annotated_tags[self.parent]["in_rate_base"] == "yes"
         assert annotated_tags[self.child1]["in_rate_base"] == "no"
         assert annotated_tags[self.child2]["in_rate_base"] == "yes"
@@ -161,17 +167,7 @@ def test_leafward_prop_preserve_non_propagating_tags(self):
             in_rate_base=["yes", "no", pd.NA],
             in_root_boose=["yus", "nu", "purtiul"],
         )
-
-        simple_forest = XbrlCalculationForestFerc1(
-            exploded_meta=self.exploded_meta,
-            exploded_calcs=self._exploded_calcs_from_edges(edges),
-            seeds=[self.parent],
-            tags=tags,
-        )
-
-        annotated_forest = simple_forest.annotated_forest
-        assert len(annotated_forest.nodes) == 3
-        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         assert annotated_tags[self.parent]["in_rate_base"] == "yes"
         assert annotated_tags[self.child1]["in_rate_base"] == "no"
         assert annotated_tags[self.child2]["in_rate_base"] == "yes"
@@ -185,17 +181,7 @@ def test_rootward_prop_disagreeing_children(self):
         tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
             in_rate_base=[pd.NA, "no", "yes"]
         )
-
-        simple_forest = XbrlCalculationForestFerc1(
-            exploded_meta=self.exploded_meta,
-            exploded_calcs=self._exploded_calcs_from_edges(edges),
-            seeds=[self.parent],
-            tags=tags,
-        )
-
-        annotated_forest = simple_forest.annotated_forest
-        assert len(annotated_forest.nodes) == 3
-        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         assert annotated_tags[self.parent] == {}
         assert annotated_tags[self.child1]["in_rate_base"] == "no"
         assert annotated_tags[self.child2]["in_rate_base"] == "yes"
@@ -209,33 +195,13 @@ def test_prop_no_tags(self):
         tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
             in_rate_base=[pd.NA, pd.NA, pd.NA]
         )
-
-        simple_forest = XbrlCalculationForestFerc1(
-            exploded_meta=self.exploded_meta,
-            exploded_calcs=self._exploded_calcs_from_edges(edges),
-            seeds=[self.parent],
-            tags=tags,
-        )
-
-        annotated_forest = simple_forest.annotated_forest
-        assert len(annotated_forest.nodes) == 3
-        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         assert annotated_tags[self.parent] == {}
         assert annotated_tags[self.child1] == {}
         assert annotated_tags[self.child2] == {}
 
         tags = pd.DataFrame(columns=NodeId._fields).convert_dtypes()
-
-        simple_forest = XbrlCalculationForestFerc1(
-            exploded_meta=self.exploded_meta,
-            exploded_calcs=self._exploded_calcs_from_edges(edges),
-            seeds=[self.parent],
-            tags=tags,
-        )
-
-        annotated_forest = simple_forest.annotated_forest
-        assert len(annotated_forest.nodes) == 3
-        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         assert annotated_tags[self.parent] == {}
         assert annotated_tags[self.child1] == {}
         assert annotated_tags[self.child2] == {}
@@ -250,16 +216,7 @@ def test_annotated_forest_propagates_rootward(self):
         tags = pd.DataFrame([self.grand_child11, self.grand_child12]).assign(
             in_rate_base=["yes", "yes"]
         )
-
-        simple_forest = XbrlCalculationForestFerc1(
-            exploded_meta=self.exploded_meta,
-            exploded_calcs=self._exploded_calcs_from_edges(edges),
-            seeds=[self.parent],
-            tags=tags,
-        )
-        annotated_forest = simple_forest.annotated_forest
-        assert len(annotated_forest.nodes) == 5
-        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         # TODO: WHY THO it doesn't show up
         # assert annotated_tags[self.parent] == {}
         assert annotated_tags.get(self.parent, {}) == {}
@@ -278,16 +235,7 @@ def test_annotated_forest_propagates_rootward_disagreeing_sibling(self):
         tags = pd.DataFrame([self.grand_child11, self.grand_child12]).assign(
             in_rate_base=["yes", "no"]
         )
-
-        simple_forest = XbrlCalculationForestFerc1(
-            exploded_meta=self.exploded_meta,
-            exploded_calcs=self._exploded_calcs_from_edges(edges),
-            seeds=[self.parent],
-            tags=tags,
-        )
-        annotated_forest = simple_forest.annotated_forest
-        assert len(annotated_forest.nodes) == 5
-        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         assert annotated_tags.get(self.parent, {}) == {}
         assert annotated_tags.get(self.child1, {}) == {}
         assert annotated_tags.get(self.child2, {}) == {}
@@ -300,16 +248,9 @@ def test_annotated_forest_propagates_rootward_correction(self):
             (self.child1, self.child1_correction),
         ]
         tags = pd.DataFrame([self.child1]).assign(in_rate_base=["yes"])
-
-        simple_forest = XbrlCalculationForestFerc1(
-            exploded_meta=self.exploded_meta,
-            exploded_calcs=self._exploded_calcs_from_edges(edges),
-            seeds=[self.child1],
-            tags=tags,
+        annotated_tags = self.build_forest_and_annotated_tags(
+            edges, tags, seeds=[self.child1]
         )
-        annotated_forest = simple_forest.annotated_forest
-        assert len(annotated_forest.nodes) == 3
-        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
         assert annotated_tags[self.child1]["in_rate_base"] == "yes"
         assert annotated_tags[self.grand_child11]["in_rate_base"] == "yes"
         assert annotated_tags[self.child1_correction]["in_rate_base"] == "yes"
@@ -325,16 +266,7 @@ def test_annotated_forest_propagates_rootward_two_layers(self):
         tags = pd.DataFrame(pre_assigned_yes_nodes).assign(
             in_rate_base=["yes"] * len(pre_assigned_yes_nodes),
         )
-
-        simple_forest = XbrlCalculationForestFerc1(
-            exploded_meta=self.exploded_meta,
-            exploded_calcs=self._exploded_calcs_from_edges(edges),
-            seeds=[self.parent],
-            tags=tags,
-        )
-        annotated_forest = simple_forest.annotated_forest
-        assert len(annotated_forest.nodes) == 5
-        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         for pre_yes_node in pre_assigned_yes_nodes:
             assert annotated_tags[pre_yes_node]["in_rate_base"] == "yes"
         for post_yes_node in [self.child1, self.parent]:
@@ -353,16 +285,7 @@ def test_annotated_forest_propagates_rootward_two_layers_plus_corrections(self):
         tags = pd.DataFrame(pre_assigned_yes_nodes).assign(
             in_rate_base=["yes"] * len(pre_assigned_yes_nodes),
         )
-
-        simple_forest = XbrlCalculationForestFerc1(
-            exploded_meta=self.exploded_meta,
-            exploded_calcs=self._exploded_calcs_from_edges(edges),
-            seeds=[self.parent],
-            tags=tags,
-        )
-        annotated_forest = simple_forest.annotated_forest
-        assert len(annotated_forest.nodes) == 7
-        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         for pre_yes_node in pre_assigned_yes_nodes:
             assert annotated_tags[pre_yes_node]["in_rate_base"] == "yes"
         for post_yes_node in [

From 829757adb5e42c1cf0c3e19fb492433bb721cb6d Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Fri, 26 Jan 2024 17:13:20 -0500
Subject: [PATCH 12/17] remove metadata from forest builder and cleanup unit
 tests

---
 src/pudl/output/ferc1.py       | 44 +++++++++----------------------
 test/unit/output/ferc1_test.py | 48 ++++++++--------------------------
 2 files changed, 23 insertions(+), 69 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index fcb76a9faa..7bc331d770 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -1665,7 +1665,6 @@ def calculation_forest(self: Self) -> "XbrlCalculationForestFerc1":
         """Construct a calculation forest based on class attributes."""
         return XbrlCalculationForestFerc1(
             exploded_calcs=self.exploded_calcs,
-            exploded_meta=self.exploded_meta,
             seeds=self.seed_nodes,
             tags=self.tags,
             group_metric_checks=self.group_metric_checks,
@@ -2023,7 +2022,6 @@ class XbrlCalculationForestFerc1(BaseModel):
 
     # Not sure if dynamically basing this on NodeId is really a good idea here.
     calc_cols: list[str] = list(NodeId._fields)
-    exploded_meta: pd.DataFrame = pd.DataFrame()
     exploded_calcs: pd.DataFrame = pd.DataFrame()
     seeds: list[NodeId] = []
     tags: pd.DataFrame = pd.DataFrame()
@@ -2180,32 +2178,9 @@ def node_attrs(self: Self) -> dict[NodeId, dict[str, dict[str, str]]]:
             .reset_index()
             # Type conversion is necessary to get pd.NA in the index:
             .astype({col: pd.StringDtype() for col in self.calc_cols})
-            # We need a dictionary for *all* nodes, not just those with tags.
-            .merge(
-                self.exploded_meta.loc[:, self.calc_cols],
-                how="left",
-                on=self.calc_cols,
-                validate="one_to_many",
-                indicator=True,
-            )
-            # For nodes with no tags, we assign an empty dictionary:
             .assign(tags=lambda x: np.where(x["tags"].isna(), {}, x["tags"]))
         )
-        lefties = node_attrs[
-            (node_attrs._merge == "left_only")
-            & (node_attrs.table_name.isin(self.table_names))
-        ]
-        if not lefties.empty:
-            logger.warning(
-                f"Found {len(lefties)} tags that only exist in our manually compiled "
-                "tags when expected none. Ensure the compiled tags match the metadata."
-                f"Mismatched tags:\n{lefties}"
-            )
-        return (
-            node_attrs.drop(columns=["_merge"])
-            .set_index(self.calc_cols)
-            .to_dict(orient="index")
-        )
+        return node_attrs.set_index(self.calc_cols).to_dict(orient="index")
 
     @cached_property
     def edge_attrs(self: Self) -> dict[Any, Any]:
@@ -2425,7 +2400,7 @@ def seeded_digraph(self: Self) -> nx.DiGraph:
 
         We compile a list of all the :class:`NodeId` values that should be included in
         the pruned graph, and then use that list to select a subset of the exploded
-        metadata to pass to :meth:`exploded_meta_to_digraph`, so that all of the
+        metadata to pass to :meth:`exploded_calcs_to_digraph`, so that all of the
         associated metadata is also added to the pruned graph.
         """
         return self.prune_unrooted(self.full_digraph)
@@ -2553,11 +2528,16 @@ def forest_leaves(self: Self) -> list[NodeId]:
     def orphans(self: Self) -> list[NodeId]:
         """Identify all nodes that appear in metadata but not in the full digraph."""
         nodes = self.full_digraph.nodes
-        return [
-            NodeId(*n)
-            for n in self.exploded_meta.set_index(self.calc_cols).index
-            if n not in nodes
-        ]
+        orphans = []
+        for idx_cols in [self.calc_cols, self.parent_cols]:
+            orphans.extend(
+                [
+                    NodeId(*n)
+                    for n in self.exploded_calcs.set_index(idx_cols).index
+                    if n not in nodes
+                ]
+            )
+        return list(set(orphans))
 
     @cached_property
     def pruned(self: Self) -> list[NodeId]:
diff --git a/test/unit/output/ferc1_test.py b/test/unit/output/ferc1_test.py
index 1d2b16ee19..8a1dfdc6c1 100644
--- a/test/unit/output/ferc1_test.py
+++ b/test/unit/output/ferc1_test.py
@@ -23,7 +23,6 @@
 
 import networkx as nx
 import pandas as pd
-import pytest
 
 from pudl.helpers import dedupe_n_flatten_list_of_lists
 from pudl.output.ferc1 import NodeId, XbrlCalculationForestFerc1
@@ -86,17 +85,6 @@ def setUp(self):
             plant_status=pd.NA,
             plant_function=pd.NA,
         )
-        dtype_node = {col: pd.StringDtype() for col in NodeId._fields}
-        self.exploded_meta = pd.DataFrame(
-            [
-                self.parent,
-                self.child1,
-                self.child2,
-                self.grand_child11,
-                self.grand_child12,
-                self.child1_correction,
-            ]
-        ).astype(dtype_node)
 
     def _exploded_calcs_from_edges(self, edges: list[tuple[NodeId, NodeId]]):
         records = []
@@ -128,7 +116,6 @@ def build_forest_and_annotated_tags(
         if not seeds:
             seeds = [self.parent]
         simple_forest = XbrlCalculationForestFerc1(
-            exploded_meta=self.exploded_meta,
             exploded_calcs=self._exploded_calcs_from_edges(edges),
             seeds=seeds,
             tags=tags,
@@ -178,17 +165,14 @@ def test_leafward_prop_preserve_non_propagating_tags(self):
     def test_rootward_prop_disagreeing_children(self):
         """Parents should not pick sides between disagreeing children."""
         edges = [(self.parent, self.child1), (self.parent, self.child2)]
-        tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
-            in_rate_base=[pd.NA, "no", "yes"]
+        tags = pd.DataFrame([self.child1, self.child2]).assign(
+            in_rate_base=["no", "yes"]
         )
         annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
-        assert annotated_tags[self.parent] == {}
+        assert not annotated_tags.get(self.parent)
         assert annotated_tags[self.child1]["in_rate_base"] == "no"
         assert annotated_tags[self.child2]["in_rate_base"] == "yes"
 
-    @pytest.mark.xfail(
-        reason="we haven't implemented this behavior correctly yet", strict=True
-    )
     def test_prop_no_tags(self):
         """If no tags, don't propagate anything."""
         edges = [(self.parent, self.child1), (self.parent, self.child2)]
@@ -202,9 +186,9 @@ def test_prop_no_tags(self):
 
         tags = pd.DataFrame(columns=NodeId._fields).convert_dtypes()
         annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
-        assert annotated_tags[self.parent] == {}
-        assert annotated_tags[self.child1] == {}
-        assert annotated_tags[self.child2] == {}
+        assert not annotated_tags.get(self.parent)
+        assert not annotated_tags.get(self.child1)
+        assert not annotated_tags.get(self.child2)
 
     def test_annotated_forest_propagates_rootward(self):
         edges = [
@@ -217,11 +201,9 @@ def test_annotated_forest_propagates_rootward(self):
             in_rate_base=["yes", "yes"]
         )
         annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
-        # TODO: WHY THO it doesn't show up
-        # assert annotated_tags[self.parent] == {}
-        assert annotated_tags.get(self.parent, {}) == {}
+        assert not annotated_tags.get(self.parent)
         assert annotated_tags[self.child1]["in_rate_base"] == "yes"
-        assert annotated_tags.get(self.child2, {}) == {}
+        assert not annotated_tags.get(self.child2)
         assert annotated_tags[self.grand_child11]["in_rate_base"] == "yes"
         assert annotated_tags[self.grand_child12]["in_rate_base"] == "yes"
 
@@ -236,9 +218,9 @@ def test_annotated_forest_propagates_rootward_disagreeing_sibling(self):
             in_rate_base=["yes", "no"]
         )
         annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
-        assert annotated_tags.get(self.parent, {}) == {}
-        assert annotated_tags.get(self.child1, {}) == {}
-        assert annotated_tags.get(self.child2, {}) == {}
+        assert not annotated_tags.get(self.parent)
+        assert not annotated_tags.get(self.child1)
+        assert not annotated_tags.get(self.child2)
         assert annotated_tags[self.grand_child11]["in_rate_base"] == "yes"
         assert annotated_tags[self.grand_child12]["in_rate_base"] == "no"
 
@@ -295,11 +277,3 @@ def test_annotated_forest_propagates_rootward_two_layers_plus_corrections(self):
             self.parent_correction,
         ]:
             assert annotated_tags[post_yes_node]["in_rate_base"] == "yes"
-
-
-def test_annotated_forest_propagates_corrections():
-    pass
-
-
-def test_annotate_forest_propagates_both_dirs_with_corrections():
-    pass

From 33fa1efe36e0b06f7ec42a3c5b8d72fde9dac20d Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Tue, 30 Jan 2024 10:54:28 -0500
Subject: [PATCH 13/17] add "validation" checks and standardize null tag
 behavior`

---
 src/pudl/output/ferc1.py       | 245 ++++++++++++++++++++++-----------
 test/unit/output/ferc1_test.py |  55 ++++++--
 2 files changed, 205 insertions(+), 95 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index 7bc331d770..026ba2fde4 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -2161,7 +2161,10 @@ def node_attrs(self: Self) -> dict[NodeId, dict[str, dict[str, str]]]:
         # Reshape the tags to turn them into a dictionary of values per-node. This
         # will make it easier to add arbitrary sets of tags later on.
         tags_dict = (
-            self.tags.convert_dtypes().set_index(self.calc_cols).to_dict(orient="index")
+            self.tags.convert_dtypes()
+            .set_index(self.calc_cols)
+            .dropna(how="all")
+            .to_dict(orient="index")
         )
         # Drop None tags created by combining multiple tagging CSVs
         clean_tags_dict = {
@@ -2226,7 +2229,7 @@ def annotated_forest(self: Self) -> nx.DiGraph:
         annotated_forest = deepcopy(self.forest)
         nx.set_node_attributes(annotated_forest, self.node_attrs)
         nx.set_edge_attributes(annotated_forest, self.edge_attrs)
-        annotated_forest = self.propagate_tags(annotated_forest)
+        annotated_forest = self.propagate_node_attributes(annotated_forest)
 
         logger.info("Checking whether any pruned nodes were also tagged.")
         self.check_lost_tags(lost_nodes=self.pruned)
@@ -2235,53 +2238,18 @@ def annotated_forest(self: Self) -> nx.DiGraph:
         self.check_conflicting_tags(annotated_forest)
         return annotated_forest
 
-    def propagate_tags(self: Self, annotated_forest: nx.DiGraph):
+    def propagate_node_attributes(self: Self, annotated_forest: nx.DiGraph):
         """Propagate tags.
 
         Propagate tags leafwards, rootward &  to the _correction nodes.
         """
-        existing_tags = nx.get_node_attributes(annotated_forest, "tags")
         ## Leafwards propagation
-        leafward_inherited_tags = ["in_rate_base"]
-        for node, parent_tags in existing_tags.items():
-            descendants = nx.descendants(annotated_forest, node)
-            descendant_tags = {
-                desc: {
-                    "tags": {
-                        tag_name: parent_tags[tag_name]
-                        for tag_name in leafward_inherited_tags
-                        if tag_name in parent_tags
-                    }
-                    | existing_tags.get(desc, {})
-                }
-                for desc in descendants
-            }
-            nx.set_node_attributes(annotated_forest, descendant_tags)
-
+        annotated_forest = _propagate_tags_leafward(annotated_forest, ["in_rate_base"])
         # Rootward propagation
         root_node = self.roots(annotated_forest)[0]
-        _ = recursively_propagate_tags_leafward(
-            annotated_forest, root_node, "in_rate_base"
-        )
+        _ = _propagate_tag_rootward(annotated_forest, root_node, "in_rate_base")
         # Correction Records
-        existing_tags = nx.get_node_attributes(annotated_forest, "tags")
-        correction_nodes = [
-            node
-            for node in annotated_forest
-            if node.xbrl_factoid.endswith("_correction")
-        ]
-        correction_tags = {}
-        for correction_node in correction_nodes:
-            # for every correction node, we assume that that nodes parent tags can apply
-            parents = list(annotated_forest.predecessors(correction_node))
-            # all correction records shoul have a parent and only one
-            assert len(parents) == 1
-            parent = parents[0]
-            correction_tags[correction_node] = {
-                "tags": existing_tags.get(parent, {})
-                | existing_tags.get(correction_node, {})
-            }
-        nx.set_node_attributes(annotated_forest, correction_tags)
+        annotated_forest = _propagate_tags_to_corrections(annotated_forest)
         return annotated_forest
 
     def check_lost_tags(self: Self, lost_nodes: list[NodeId]) -> None:
@@ -2813,12 +2781,34 @@ def nodes_to_df(calc_forest: nx.DiGraph, nodes: list[NodeId]) -> pd.DataFrame:
     return pd.concat([index, tags], axis="columns")
 
 
-def recursively_propagate_tags_leafward(
-    annotated_forest, node, tag_name: Literal["in_rate_base"]
-):
-    """Set the tags for nodes when all of its children have same tag.
+def _propagate_tags_leafward(
+    annotated_forest: nx.DiGraph, leafward_inherited_tags: list[str]
+) -> nx.DiGraph:
+    existing_tags = nx.get_node_attributes(annotated_forest, "tags")
+    for node, parent_tags in existing_tags.items():
+        descendants = nx.descendants(annotated_forest, node)
+        descendant_tags = {
+            desc: {
+                "tags": {
+                    tag_name: parent_tags[tag_name]
+                    for tag_name in leafward_inherited_tags
+                    if tag_name in parent_tags
+                }
+                | existing_tags.get(desc, {})
+            }
+            for desc in descendants
+        }
+        nx.set_node_attributes(annotated_forest, descendant_tags)
+    return annotated_forest
+
+
+def _propagate_tag_rootward(
+    annotated_forest: nx.DiGraph, node, tag_name: Literal["in_rate_base"]
+) -> str:
+    """Set the tag for nodes when all of its children have same tag.
 
-    This function returns the value of a tag.
+    This function returns the value of a tag, but also sets node attributes
+    down the tree when all children of a node share the same tag.
     """
 
     def _get_tag(annotated_forest, node, tag_name):
@@ -2826,30 +2816,23 @@ def _get_tag(annotated_forest, node, tag_name):
 
     logger.info(f"propagaging tags leafward from {node}")
     tag = pd.NA
-    # i'm a leaf so i stop looking
-    if not list(annotated_forest.successors(node)):
-        tag = _get_tag(annotated_forest, node, tag_name)
-        logger.info(f"    We found a leaf people. w/ {tag=}")
-    # if i have a value you don't need to keep looking at this nodes childern
-    elif not pd.isna(_get_tag(annotated_forest, node, tag_name)):
+    # i'm a leaf so i stop looking or
+    # if i have a value you don't need to keep looking at this node's childern
+    if not list(annotated_forest.successors(node)) or not pd.isna(
+        _get_tag(annotated_forest, node, tag_name)
+    ):
         tag = _get_tag(annotated_forest, node, tag_name)
-        logger.info(f"    We found a node w/ tags. w/ {tag=}")
+
     else:
         child_tags = set()
         for child_node in annotated_forest.successors(node):
             if not child_node.xbrl_factoid.endswith("_correction"):
                 child_tags.add(
-                    recursively_propagate_tags_leafward(
-                        annotated_forest, child_node, tag_name
-                    )
+                    _propagate_tag_rootward(annotated_forest, child_node, tag_name)
                 )
-        logger.info(f"   found {child_tags=}")
         # if all the children tags are the same and non-null
         if (len(child_tags) == 1) and {t for t in child_tags if not pd.isna(t)}:
             new_node_tag = child_tags.pop()
-            logger.info(
-                f"    We found a node consitent children tags. w/ {new_node_tag=}"
-            )
             # actually assign the tag here but don't wipe out any other tags
             existing_tags = nx.get_node_attributes(annotated_forest, "tags")
             node_tags = {
@@ -2857,14 +2840,117 @@ def _get_tag(annotated_forest, node, tag_name):
             }
             nx.set_node_attributes(annotated_forest, node_tags)
             tag = new_node_tag
+        # elif the children disagree then the node's tag shouldn't be set and
+        # the og null tag should be returned
     return tag
 
 
+def _propagate_tags_to_corrections(annotated_forest: nx.DiGraph) -> nx.DiGraph:
+    existing_tags = nx.get_node_attributes(annotated_forest, "tags")
+    correction_nodes = [
+        node for node in annotated_forest if node.xbrl_factoid.endswith("_correction")
+    ]
+    correction_tags = {}
+    for correction_node in correction_nodes:
+        # for every correction node, we assume that that nodes parent tags can apply
+        parents = list(annotated_forest.predecessors(correction_node))
+        # all correction records shoul have a parent and only one
+        assert len(parents) == 1
+        parent = parents[0]
+        correction_tags[correction_node] = {
+            "tags": existing_tags.get(parent, {})
+            | existing_tags.get(correction_node, {})
+        }
+    nx.set_node_attributes(annotated_forest, correction_tags)
+    return annotated_forest
+
+
+def check_tag_propagation_compared_to_compiled_tags(
+    df: pd.DataFrame,
+    propogated_tag: Literal["in_rate_base"],
+    _out_ferc1__explosion_tags: pd.DataFrame,
+):
+    """Check if tags got propagated.
+
+    Args:
+        df: table to check. This should be either the
+            :func:`out_ferc1__yearly_rate_base`, ``exploded_balance_sheet_assets_ferc1``
+            or ``exploded_balance_sheet_liabilities_ferc1``. The
+            ``exploded_income_statement_ferc1`` table does not currently have propagated
+            tags.
+        propogated_tag: name of tag. Currently ``in_rate_base`` is the only propagated tag.
+        _out_ferc1__explosion_tags: mannually compiled tags. This table includes tags from
+            many of the explosion tables so we will filter it before checking if the tag was
+            propagated.
+
+    Raises:
+        AssertionError: If there are more mannually compiled tags for the ``xbrl_factoids``
+            in ``df`` than found in ``_out_ferc1__explosion_tags``.
+        AssertionError: If there are more mannually compiled tags for the correction
+            ``xbrl_factoids`` in ``df`` than found in ``_out_ferc1__explosion_tags``.
+    """
+    # the tag df has all tags - not just those in a specific explosion
+    # so we need to drop
+    node_idx = list(NodeId._fields)
+    df_filtered = df.filter(node_idx).drop_duplicates()
+    df_tags = _out_ferc1__explosion_tags.merge(
+        df_filtered, on=list(df_filtered.columns), how="right"
+    )
+    mannually_tagged = df_tags[df_tags[propogated_tag].notnull()].xbrl_factoid.unique()
+    detailed_tagged = df[df[f"tags_{propogated_tag}"].notnull()].xbrl_factoid.unique()
+    if len(detailed_tagged) < len(mannually_tagged):
+        raise AssertionError(
+            f"Found more {len(mannually_tagged)} mannually compiled tagged xbrl_factoids"
+            " than tags in propagated detailed data."
+        )
+    mannually_tagged_corrections = df_tags[
+        df_tags[propogated_tag].notnull()
+        & df_tags.xbrl_factoid.str.endswith("_correction")
+    ].xbrl_factoid.unique()
+    detailed_tagged_corrections = df[
+        df[f"tags_{propogated_tag}"].notnull()
+        & df.xbrl_factoid.str.endswith("_correction")
+    ].xbrl_factoid.unique()
+    if len(detailed_tagged_corrections) < len(mannually_tagged_corrections):
+        raise AssertionError(
+            f"Found more {len(mannually_tagged)} mannually compiled tagged "
+            "xbrl_factoids than tags in propagated detailed data."
+        )
+
+
+def check_for_correction_xbrl_factoids_with_tag(
+    df: pd.DataFrame, propogated_tag: Literal["in_rate_base"]
+):
+    """Check if any correction records have tags.
+
+    Args:
+        df: table to check. This should be either the
+            :func:`out_ferc1__yearly_rate_base`, ``exploded_balance_sheet_assets_ferc1``
+            or ``exploded_balance_sheet_liabilities_ferc1``. The
+            ``exploded_income_statement_ferc1`` table does not currently have propagated
+            tags.
+        propogated_tag: name of tag. Currently ``in_rate_base`` is the only propagated tag.
+
+    Raises:
+        AssertionError: If there are zero correction ``xbrl_factoids`` in ``df`` with tags.
+    """
+    detailed_tagged_corrections = df[
+        df[f"tags_{propogated_tag}"].notnull()
+        & df.xbrl_factoid.str.endswith("_correction")
+    ].xbrl_factoid.unique()
+    if len(detailed_tagged_corrections) == 0:
+        raise AssertionError(
+            "We expect there to be more than zero correction recrods with tags, but "
+            f"found {len(detailed_tagged_corrections)}."
+        )
+
+
 @asset
 def out_ferc1__yearly_rate_base(
     exploded_balance_sheet_assets_ferc1: pd.DataFrame,
     exploded_balance_sheet_liabilities_ferc1: pd.DataFrame,
     core_ferc1__yearly_operating_expenses_sched320: pd.DataFrame,
+    _out_ferc1__explosion_tags: pd.DataFrame,
 ) -> pd.DataFrame:
     """Make a table of granular utility rate-base data.
 
@@ -2908,25 +2994,24 @@ def out_ferc1__yearly_rate_base(
         .rename(columns={"dollar_value": "ending_balance"})
     )
     # then select only the leafy exploded records that are in rate base and concat
-    in_rate_base = (
-        pd.concat(
-            [
-                exploded_balance_sheet_assets_ferc1[
-                    exploded_balance_sheet_assets_ferc1.tags_in_rate_base.isin(
-                        ["yes", "partial"]
-                    )
-                ],
-                exploded_balance_sheet_liabilities_ferc1[
-                    exploded_balance_sheet_liabilities_ferc1.tags_in_rate_base.isin(
-                        ["yes", "partial"]
-                    )
-                ],
-                cash_working_capital,
-            ]
-        )
-        # .drop(columns=["tags_in_rate_base"])
-        .sort_values(
-            by=["report_year", "utility_id_ferc1", "table_name"], ascending=False
-        )
+    in_rate_base = pd.concat(
+        [
+            exploded_balance_sheet_assets_ferc1[
+                exploded_balance_sheet_assets_ferc1.tags_in_rate_base.isin(
+                    ["yes", "partial"]
+                )
+            ],
+            exploded_balance_sheet_liabilities_ferc1[
+                exploded_balance_sheet_liabilities_ferc1.tags_in_rate_base.isin(
+                    ["yes", "partial"]
+                )
+            ].assign(ending_balance=lambda x: -x.ending_balance),
+            cash_working_capital,
+        ]
+    ).sort_values(by=["report_year", "utility_id_ferc1", "table_name"], ascending=False)
+    # note: we need the `tags_in_rate_base` column for these checks
+    check_tag_propagation_compared_to_compiled_tags(
+        in_rate_base, "in_rate_base", _out_ferc1__explosion_tags
     )
+    check_for_correction_xbrl_factoids_with_tag(in_rate_base, "in_rate_base")
     return in_rate_base
diff --git a/test/unit/output/ferc1_test.py b/test/unit/output/ferc1_test.py
index 8a1dfdc6c1..e36eb959dd 100644
--- a/test/unit/output/ferc1_test.py
+++ b/test/unit/output/ferc1_test.py
@@ -133,8 +133,11 @@ def test_leafward_prop_undecided_children(self):
         )
         annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         assert annotated_tags[self.parent]["in_rate_base"] == "yes"
-        assert annotated_tags[self.child1]["in_rate_base"] == "yes"
-        assert annotated_tags[self.child2]["in_rate_base"] == "yes"
+        for child_node in [self.child1, self.child2]:
+            assert (
+                annotated_tags[child_node]["in_rate_base"]
+                == annotated_tags[self.parent]["in_rate_base"]
+            )
 
     def test_leafward_prop_disagreeing_child(self):
         """Don't force the diagreeing child to follow the parent."""
@@ -145,7 +148,10 @@ def test_leafward_prop_disagreeing_child(self):
         annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         assert annotated_tags[self.parent]["in_rate_base"] == "yes"
         assert annotated_tags[self.child1]["in_rate_base"] == "no"
-        assert annotated_tags[self.child2]["in_rate_base"] == "yes"
+        assert (
+            annotated_tags[self.child2]["in_rate_base"]
+            == annotated_tags[self.parent]["in_rate_base"]
+        )
 
     def test_leafward_prop_preserve_non_propagating_tags(self):
         """Don't force the diagreeing child to follow the parent."""
@@ -157,7 +163,10 @@ def test_leafward_prop_preserve_non_propagating_tags(self):
         annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         assert annotated_tags[self.parent]["in_rate_base"] == "yes"
         assert annotated_tags[self.child1]["in_rate_base"] == "no"
-        assert annotated_tags[self.child2]["in_rate_base"] == "yes"
+        assert (
+            annotated_tags[self.child2]["in_rate_base"]
+            == annotated_tags[self.parent]["in_rate_base"]
+        )
         assert annotated_tags[self.parent]["in_root_boose"] == "yus"
         assert annotated_tags[self.child1]["in_root_boose"] == "nu"
         assert annotated_tags[self.child2]["in_root_boose"] == "purtiul"
@@ -174,21 +183,34 @@ def test_rootward_prop_disagreeing_children(self):
         assert annotated_tags[self.child2]["in_rate_base"] == "yes"
 
     def test_prop_no_tags(self):
-        """If no tags, don't propagate anything."""
+        """If no tags, don't propagate anything.
+
+        This also tests whether a fully null tag input behaves the same as an
+        empty df. It also checks whether we get the expected behavior when
+        the propogated tags are all null but there is another non-propagating
+        tag.
+        """
         edges = [(self.parent, self.child1), (self.parent, self.child2)]
-        tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
-            in_rate_base=[pd.NA, pd.NA, pd.NA]
-        )
+        null_tag_edges = [self.parent, self.child1, self.child2]
+        tags = pd.DataFrame(null_tag_edges).assign(in_rate_base=[pd.NA, pd.NA, pd.NA])
         annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
-        assert annotated_tags[self.parent] == {}
-        assert annotated_tags[self.child1] == {}
-        assert annotated_tags[self.child2] == {}
+        for node in null_tag_edges:
+            assert not annotated_tags.get(node)
 
         tags = pd.DataFrame(columns=NodeId._fields).convert_dtypes()
         annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
-        assert not annotated_tags.get(self.parent)
-        assert not annotated_tags.get(self.child1)
-        assert not annotated_tags.get(self.child2)
+        for node in null_tag_edges:
+            assert not annotated_tags.get(node)
+
+        tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
+            in_rate_base=[pd.NA, pd.NA, pd.NA],
+            a_non_propped_tag=["hi", "hello", "what_am_i_doing_here_even"],
+        )
+        annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
+        for node in null_tag_edges:
+            assert not annotated_tags[node].get("in_rate_base")
+            # do we still have a non-null value for the non-propped tag
+            assert annotated_tags[node].get("a_non_propped_tag")
 
     def test_annotated_forest_propagates_rootward(self):
         edges = [
@@ -235,7 +257,10 @@ def test_annotated_forest_propagates_rootward_correction(self):
         )
         assert annotated_tags[self.child1]["in_rate_base"] == "yes"
         assert annotated_tags[self.grand_child11]["in_rate_base"] == "yes"
-        assert annotated_tags[self.child1_correction]["in_rate_base"] == "yes"
+        assert (
+            annotated_tags[self.child1_correction]["in_rate_base"]
+            == annotated_tags[self.child1]["in_rate_base"]
+        )
 
     def test_annotated_forest_propagates_rootward_two_layers(self):
         edges = [

From 0f3b6540eb59111528127b9fc7564832e0168694 Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Tue, 30 Jan 2024 11:47:38 -0500
Subject: [PATCH 14/17] light cleaning

---
 src/pudl/output/ferc1.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index 026ba2fde4..ecd6bfae37 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -1154,10 +1154,7 @@ class OffByFactoid(NamedTuple):
 
 
 @asset
-def _out_ferc1__explosion_tags(
-    table_dimensions_ferc1: pd.DataFrame,
-    calculation_components_xbrl_ferc1: pd.DataFrame,
-) -> pd.DataFrame:
+def _out_ferc1__explosion_tags(table_dimensions_ferc1: pd.DataFrame) -> pd.DataFrame:
     """Grab the stored tables of tags and add inferred dimension."""
     rate_tags = _get_tags("xbrl_factoid_rate_base_tags.csv", table_dimensions_ferc1)
     rev_req_tags = _get_tags(
@@ -2025,7 +2022,6 @@ class XbrlCalculationForestFerc1(BaseModel):
     exploded_calcs: pd.DataFrame = pd.DataFrame()
     seeds: list[NodeId] = []
     tags: pd.DataFrame = pd.DataFrame()
-    # TODO: remove the group metric checks and see if things still build / tests still pass
     group_metric_checks: GroupMetricChecks = GroupMetricChecks()
     model_config = ConfigDict(
         arbitrary_types_allowed=True, ignored_types=(cached_property,)

From 3e5c2cdd11a47447e6d9063941d70614d786d721 Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Wed, 31 Jan 2024 07:39:54 -0700
Subject: [PATCH 15/17] root boose docs!

Co-authored-by: Dazhong Xia <dazhong.xia@catalyst.coop>
---
 test/unit/output/ferc1_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/output/ferc1_test.py b/test/unit/output/ferc1_test.py
index e36eb959dd..cf78429c22 100644
--- a/test/unit/output/ferc1_test.py
+++ b/test/unit/output/ferc1_test.py
@@ -154,7 +154,7 @@ def test_leafward_prop_disagreeing_child(self):
         )
 
     def test_leafward_prop_preserve_non_propagating_tags(self):
-        """Don't force the diagreeing child to follow the parent."""
+        """Only propagate tags that actually get inherited - i.e., not `in_root_boose`."""
         edges = [(self.parent, self.child1), (self.parent, self.child2)]
         tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
             in_rate_base=["yes", "no", pd.NA],

From b8758ddfaf65dbcbed913c4a693bbe3cf6d297de Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Wed, 31 Jan 2024 12:06:37 -0500
Subject: [PATCH 16/17] respond to dazhong's comments

---
 src/pudl/output/ferc1.py       | 97 +++++++++++++++++-----------------
 test/unit/output/ferc1_test.py | 28 +++++-----
 2 files changed, 64 insertions(+), 61 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index ecd6bfae37..96054eb85f 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -2242,8 +2242,7 @@ def propagate_node_attributes(self: Self, annotated_forest: nx.DiGraph):
         ## Leafwards propagation
         annotated_forest = _propagate_tags_leafward(annotated_forest, ["in_rate_base"])
         # Rootward propagation
-        root_node = self.roots(annotated_forest)[0]
-        _ = _propagate_tag_rootward(annotated_forest, root_node, "in_rate_base")
+        annotated_forest = _propagate_tag_rootward(annotated_forest, "in_rate_base")
         # Correction Records
         annotated_forest = _propagate_tags_to_corrections(annotated_forest)
         return annotated_forest
@@ -2780,6 +2779,10 @@ def nodes_to_df(calc_forest: nx.DiGraph, nodes: list[NodeId]) -> pd.DataFrame:
 def _propagate_tags_leafward(
     annotated_forest: nx.DiGraph, leafward_inherited_tags: list[str]
 ) -> nx.DiGraph:
+    """Push a parent's tags down to its descendants.
+
+    Only push the `leafward_inherited_tags` - others will be left alone.
+    """
     existing_tags = nx.get_node_attributes(annotated_forest, "tags")
     for node, parent_tags in existing_tags.items():
         descendants = nx.descendants(annotated_forest, node)
@@ -2799,7 +2802,7 @@ def _propagate_tags_leafward(
 
 
 def _propagate_tag_rootward(
-    annotated_forest: nx.DiGraph, node, tag_name: Literal["in_rate_base"]
+    annotated_forest: nx.DiGraph, tag_name: Literal["in_rate_base"]
 ) -> str:
     """Set the tag for nodes when all of its children have same tag.
 
@@ -2808,37 +2811,35 @@ def _propagate_tag_rootward(
     """
 
     def _get_tag(annotated_forest, node, tag_name):
-        return annotated_forest.nodes.get(node, {}).get("tags", {}).get(tag_name, pd.NA)
-
-    logger.info(f"propagaging tags leafward from {node}")
-    tag = pd.NA
-    # i'm a leaf so i stop looking or
-    # if i have a value you don't need to keep looking at this node's childern
-    if not list(annotated_forest.successors(node)) or not pd.isna(
-        _get_tag(annotated_forest, node, tag_name)
-    ):
-        tag = _get_tag(annotated_forest, node, tag_name)
-
-    else:
-        child_tags = set()
-        for child_node in annotated_forest.successors(node):
-            if not child_node.xbrl_factoid.endswith("_correction"):
-                child_tags.add(
-                    _propagate_tag_rootward(annotated_forest, child_node, tag_name)
-                )
-        # if all the children tags are the same and non-null
-        if (len(child_tags) == 1) and {t for t in child_tags if not pd.isna(t)}:
-            new_node_tag = child_tags.pop()
-            # actually assign the tag here but don't wipe out any other tags
-            existing_tags = nx.get_node_attributes(annotated_forest, "tags")
-            node_tags = {
-                node: {"tags": {tag_name: new_node_tag} | existing_tags.get(node, {})}
+        return annotated_forest.nodes.get(node, {}).get("tags", {}).get(tag_name)
+
+    generations = list(nx.topological_generations(annotated_forest))
+    for gen in reversed(generations):
+        untagged_nodes = {
+            node_id
+            for node_id in gen
+            if _get_tag(annotated_forest, node_id, tag_name) is None
+        }
+        for parent_node in untagged_nodes:
+            child_tags = {
+                _get_tag(annotated_forest, c, tag_name)
+                for c in annotated_forest.successors(parent_node)
+                if not c.xbrl_factoid.endswith("_correction")
             }
-            nx.set_node_attributes(annotated_forest, node_tags)
-            tag = new_node_tag
-        # elif the children disagree then the node's tag shouldn't be set and
-        # the og null tag should be returned
-    return tag
+            non_null_tags = child_tags - {None}
+            # sometimes, all children can share same tag but it's null.
+            if len(child_tags) == 1 and non_null_tags:
+                # actually assign the tag here but don't wipe out any other tags
+                new_node_tag = non_null_tags.pop()
+                existing_tags = nx.get_node_attributes(annotated_forest, "tags")
+                node_tags = {
+                    parent_node: {
+                        "tags": {tag_name: new_node_tag}
+                        | existing_tags.get(parent_node, {})
+                    }
+                }
+                nx.set_node_attributes(annotated_forest, node_tags)
+    return annotated_forest
 
 
 def _propagate_tags_to_corrections(annotated_forest: nx.DiGraph) -> nx.DiGraph:
@@ -2863,7 +2864,7 @@ def _propagate_tags_to_corrections(annotated_forest: nx.DiGraph) -> nx.DiGraph:
 
 def check_tag_propagation_compared_to_compiled_tags(
     df: pd.DataFrame,
-    propogated_tag: Literal["in_rate_base"],
+    propagated_tag: Literal["in_rate_base"],
     _out_ferc1__explosion_tags: pd.DataFrame,
 ):
     """Check if tags got propagated.
@@ -2874,7 +2875,7 @@ def check_tag_propagation_compared_to_compiled_tags(
             or ``exploded_balance_sheet_liabilities_ferc1``. The
             ``exploded_income_statement_ferc1`` table does not currently have propagated
             tags.
-        propogated_tag: name of tag. Currently ``in_rate_base`` is the only propagated tag.
+        propagated_tag: name of tag. Currently ``in_rate_base`` is the only propagated tag.
         _out_ferc1__explosion_tags: mannually compiled tags. This table includes tags from
             many of the explosion tables so we will filter it before checking if the tag was
             propagated.
@@ -2892,30 +2893,30 @@ def check_tag_propagation_compared_to_compiled_tags(
     df_tags = _out_ferc1__explosion_tags.merge(
         df_filtered, on=list(df_filtered.columns), how="right"
     )
-    mannually_tagged = df_tags[df_tags[propogated_tag].notnull()].xbrl_factoid.unique()
-    detailed_tagged = df[df[f"tags_{propogated_tag}"].notnull()].xbrl_factoid.unique()
-    if len(detailed_tagged) < len(mannually_tagged):
+    manually_tagged = df_tags[df_tags[propagated_tag].notnull()].xbrl_factoid.unique()
+    detailed_tagged = df[df[f"tags_{propagated_tag}"].notnull()].xbrl_factoid.unique()
+    if len(detailed_tagged) < len(manually_tagged):
         raise AssertionError(
-            f"Found more {len(mannually_tagged)} mannually compiled tagged xbrl_factoids"
+            f"Found more {len(manually_tagged)} mannually compiled tagged xbrl_factoids"
             " than tags in propagated detailed data."
         )
-    mannually_tagged_corrections = df_tags[
-        df_tags[propogated_tag].notnull()
+    manually_tagged_corrections = df_tags[
+        df_tags[propagated_tag].notnull()
         & df_tags.xbrl_factoid.str.endswith("_correction")
     ].xbrl_factoid.unique()
     detailed_tagged_corrections = df[
-        df[f"tags_{propogated_tag}"].notnull()
+        df[f"tags_{propagated_tag}"].notnull()
         & df.xbrl_factoid.str.endswith("_correction")
     ].xbrl_factoid.unique()
-    if len(detailed_tagged_corrections) < len(mannually_tagged_corrections):
+    if len(detailed_tagged_corrections) < len(manually_tagged_corrections):
         raise AssertionError(
-            f"Found more {len(mannually_tagged)} mannually compiled tagged "
-            "xbrl_factoids than tags in propagated detailed data."
+            f"Found more {len(manually_tagged_corrections)} mannually compiled "
+            "tagged xbrl_factoids than tags in propagated detailed data."
         )
 
 
 def check_for_correction_xbrl_factoids_with_tag(
-    df: pd.DataFrame, propogated_tag: Literal["in_rate_base"]
+    df: pd.DataFrame, propagated_tag: Literal["in_rate_base"]
 ):
     """Check if any correction records have tags.
 
@@ -2925,13 +2926,13 @@ def check_for_correction_xbrl_factoids_with_tag(
             or ``exploded_balance_sheet_liabilities_ferc1``. The
             ``exploded_income_statement_ferc1`` table does not currently have propagated
             tags.
-        propogated_tag: name of tag. Currently ``in_rate_base`` is the only propagated tag.
+        propagated_tag: name of tag. Currently ``in_rate_base`` is the only propagated tag.
 
     Raises:
         AssertionError: If there are zero correction ``xbrl_factoids`` in ``df`` with tags.
     """
     detailed_tagged_corrections = df[
-        df[f"tags_{propogated_tag}"].notnull()
+        df[f"tags_{propagated_tag}"].notnull()
         & df.xbrl_factoid.str.endswith("_correction")
     ].xbrl_factoid.unique()
     if len(detailed_tagged_corrections) == 0:
diff --git a/test/unit/output/ferc1_test.py b/test/unit/output/ferc1_test.py
index cf78429c22..8a6ea43a8b 100644
--- a/test/unit/output/ferc1_test.py
+++ b/test/unit/output/ferc1_test.py
@@ -1,8 +1,6 @@
 """Tests for the FERC Form 1 output functions.
 
-These need to be recreated to work with the new XbrlCalculationForest implementation.
-
-Stuff to test:
+Stuff we could test:
 - construction of basic tree from input metadata
 - do nodes not part of any calculation get orphaned?
 - do nodes not in the seeded digraph get pruned?
@@ -12,10 +10,12 @@
 - pruning of passthrough nodes & associated corrections
 - propagation of weights
 - conflicting weights
-- propagation of tags
 - conflicting tags
 - validation of calculations using only leaf-nodes to reproduce root node values
 
+Stuff we are testing:
+- propagation of tags
+
 """
 
 import logging
@@ -30,10 +30,6 @@
 logger = logging.getLogger(__name__)
 
 
-# TODO: combine these into a class because we have a lot of similar method names
-# TODO: make graph construction easier with helper functions
-
-
 class TestTagPropagation(unittest.TestCase):
     def setUp(self):
         self.parent = NodeId(
@@ -158,7 +154,7 @@ def test_leafward_prop_preserve_non_propagating_tags(self):
         edges = [(self.parent, self.child1), (self.parent, self.child2)]
         tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(
             in_rate_base=["yes", "no", pd.NA],
-            in_root_boose=["yus", "nu", "purtiul"],
+            in_root_boose=["yus", "nu", pd.NA],
         )
         annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         assert annotated_tags[self.parent]["in_rate_base"] == "yes"
@@ -169,7 +165,7 @@ def test_leafward_prop_preserve_non_propagating_tags(self):
         )
         assert annotated_tags[self.parent]["in_root_boose"] == "yus"
         assert annotated_tags[self.child1]["in_root_boose"] == "nu"
-        assert annotated_tags[self.child2]["in_root_boose"] == "purtiul"
+        assert not annotated_tags[self.child2].get("in_root_boose")
 
     def test_rootward_prop_disagreeing_children(self):
         """Parents should not pick sides between disagreeing children."""
@@ -208,11 +204,16 @@ def test_prop_no_tags(self):
         )
         annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
         for node in null_tag_edges:
-            assert not annotated_tags[node].get("in_rate_base")
+            assert "in_rate_base" not in annotated_tags[node]
             # do we still have a non-null value for the non-propped tag
             assert annotated_tags[node].get("a_non_propped_tag")
 
     def test_annotated_forest_propagates_rootward(self):
+        """If two grandchildren have the same tag, their parent does inhert the tag.
+
+        But, the rootward propagation only happens when all of a nodes children have
+        the same tag.
+        """
         edges = [
             (self.parent, self.child1),
             (self.parent, self.child2),
@@ -223,13 +224,14 @@ def test_annotated_forest_propagates_rootward(self):
             in_rate_base=["yes", "yes"]
         )
         annotated_tags = self.build_forest_and_annotated_tags(edges, tags)
-        assert not annotated_tags.get(self.parent)
+        assert self.parent not in annotated_tags
         assert annotated_tags[self.child1]["in_rate_base"] == "yes"
-        assert not annotated_tags.get(self.child2)
+        assert self.child2 not in annotated_tags
         assert annotated_tags[self.grand_child11]["in_rate_base"] == "yes"
         assert annotated_tags[self.grand_child12]["in_rate_base"] == "yes"
 
     def test_annotated_forest_propagates_rootward_disagreeing_sibling(self):
+        """If two siblings disagree, their parent does not inherit either of their tag values."""
         edges = [
             (self.parent, self.child1),
             (self.parent, self.child2),

From da8df115e5a8d04815c80dd87ade65e73e003aa6 Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Fri, 2 Feb 2024 10:27:07 -0500
Subject: [PATCH 17/17] add a test about pruned nodes and add the NodeId(*n)
 into the orphans

---
 src/pudl/output/ferc1.py       |  10 ++-
 test/unit/output/ferc1_test.py | 137 +++++++++++++++++++++++----------
 2 files changed, 103 insertions(+), 44 deletions(-)

diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index 2eab3ed65f..c9452b9411 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -2524,7 +2524,11 @@ def forest_leaves(self: Self) -> list[NodeId]:
 
     @cached_property
     def orphans(self: Self) -> list[NodeId]:
-        """Identify all nodes that appear in metadata but not in the full digraph."""
+        """Identify all nodes that appear in the exploded_calcs but not in the full digraph.
+
+        Because we removed the metadata and are now building the tree entirely based on
+        the exploded_calcs, this should now never produce any orphans and is a bit redundant.
+        """
         nodes = self.full_digraph.nodes
         orphans = []
         for idx_cols in [self.calc_cols, self.parent_cols]:
@@ -2532,7 +2536,7 @@ def orphans(self: Self) -> list[NodeId]:
                 [
                     NodeId(*n)
                     for n in self.exploded_calcs.set_index(idx_cols).index
-                    if n not in nodes
+                    if NodeId(*n) not in nodes
                 ]
             )
         return list(set(orphans))
@@ -2838,7 +2842,7 @@ def _propagate_tags_leafward(
 
 def _propagate_tag_rootward(
     annotated_forest: nx.DiGraph, tag_name: Literal["in_rate_base"]
-) -> str:
+) -> nx.DiGraph:
     """Set the tag for nodes when all of its children have same tag.
 
     This function returns the value of a tag, but also sets node attributes
diff --git a/test/unit/output/ferc1_test.py b/test/unit/output/ferc1_test.py
index 16e04a68f1..d86faf1613 100644
--- a/test/unit/output/ferc1_test.py
+++ b/test/unit/output/ferc1_test.py
@@ -35,7 +35,102 @@
 logger = logging.getLogger(__name__)
 
 
-class TestTagPropagation(unittest.TestCase):
+class TestForestSetup(unittest.TestCase):
+    def setUp(self):
+        # this is where you add nodes you want to use
+        pass
+
+    def _exploded_calcs_from_edges(self, edges: list[tuple[NodeId, NodeId]]):
+        records = []
+        for parent, child in edges:
+            record = {"weight": 1}
+            for field in NodeId._fields:
+                record[f"{field}_parent"] = parent.__getattribute__(field)
+                record[field] = child.__getattribute__(field)
+            records.append(record)
+        dtype_parent = {f"{col}_parent": pd.StringDtype() for col in NodeId._fields}
+        dtype_child = {col: pd.StringDtype() for col in NodeId._fields}
+        dtype_weight = {"weight": pd.Int64Dtype()}
+
+        return pd.DataFrame.from_records(records).astype(
+            dtype_child | dtype_parent | dtype_weight
+        )
+
+    def build_forest(
+        self, edges: list[tuple[NodeId, NodeId]], tags: pd.DataFrame, seeds=None
+    ):
+        if not seeds:
+            seeds = [self.parent]
+        forest = XbrlCalculationForestFerc1(
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=seeds,
+            tags=tags,
+        )
+        return forest
+
+    def build_forest_and_annotated_tags(
+        self, edges: list[tuple[NodeId, NodeId]], tags: pd.DataFrame, seeds=None
+    ):
+        """Build a forest, test forest nodes and return annotated tags.
+
+        Args:
+            edges: list of tuples
+            tags: dataframe of tags
+            seeds: list of seed nodes. Default is None and will assume seed node is
+                ``parent``.
+        """
+        simple_forest = self.build_forest(edges, tags, seeds)
+        annotated_forest = simple_forest.annotated_forest
+        # ensure no nodes got dropped
+        assert len(annotated_forest.nodes) == len(dedupe_n_flatten_list_of_lists(edges))
+        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
+        return annotated_tags
+
+
+class TestPrunnedNode(TestForestSetup):
+    def setUp(self):
+        self.root = NodeId(
+            table_name="table_1",
+            xbrl_factoid="reported_1",
+            utility_type="electric",
+            plant_status=pd.NA,
+            plant_function=pd.NA,
+        )
+        self.root_child = NodeId(
+            table_name="table_1",
+            xbrl_factoid="reported_11",
+            utility_type="electric",
+            plant_status=pd.NA,
+            plant_function=pd.NA,
+        )
+        self.root_other = NodeId(
+            table_name="table_1",
+            xbrl_factoid="reported_2",
+            utility_type="electric",
+            plant_status=pd.NA,
+            plant_function=pd.NA,
+        )
+        self.root_other_child = NodeId(
+            table_name="table_1",
+            xbrl_factoid="reported_21",
+            utility_type="electric",
+            plant_status=pd.NA,
+            plant_function=pd.NA,
+        )
+
+    def test_pruned_nodes(self):
+        edges = [(self.root, self.root_child), (self.root_other, self.root_other_child)]
+        tags = pd.DataFrame(columns=list(NodeId._fields)).convert_dtypes()
+        forest = XbrlCalculationForestFerc1(
+            exploded_calcs=self._exploded_calcs_from_edges(edges),
+            seeds=[self.root],
+            tags=tags,
+        )
+        pruned = forest.pruned
+        assert set(pruned) == {self.root_other, self.root_other_child}
+
+
+class TestTagPropagation(TestForestSetup):
     def setUp(self):
         self.parent = NodeId(
             table_name="table_1",
@@ -87,46 +182,6 @@ def setUp(self):
             plant_function=pd.NA,
         )
 
-    def _exploded_calcs_from_edges(self, edges: list[tuple[NodeId, NodeId]]):
-        records = []
-        for parent, child in edges:
-            record = {"weight": 1}
-            for field in NodeId._fields:
-                record[f"{field}_parent"] = parent.__getattribute__(field)
-                record[field] = child.__getattribute__(field)
-            records.append(record)
-        dtype_parent = {f"{col}_parent": pd.StringDtype() for col in NodeId._fields}
-        dtype_child = {col: pd.StringDtype() for col in NodeId._fields}
-        dtype_weight = {"weight": pd.Int64Dtype()}
-
-        return pd.DataFrame.from_records(records).astype(
-            dtype_child | dtype_parent | dtype_weight
-        )
-
-    def build_forest_and_annotated_tags(
-        self, edges: list[tuple[NodeId, NodeId]], tags: pd.DataFrame, seeds=None
-    ):
-        """Build a forest, test forest nodes and return annotated tags.
-
-        Args:
-            edges: list of tuples
-            tags: dataframe of tags
-            seeds: list of seed nodes. Default is None and will assume seed node is
-                ``parent``.
-        """
-        if not seeds:
-            seeds = [self.parent]
-        simple_forest = XbrlCalculationForestFerc1(
-            exploded_calcs=self._exploded_calcs_from_edges(edges),
-            seeds=seeds,
-            tags=tags,
-        )
-        annotated_forest = simple_forest.annotated_forest
-        # ensure no nodes got dropped
-        assert len(annotated_forest.nodes) == len(dedupe_n_flatten_list_of_lists(edges))
-        annotated_tags = nx.get_node_attributes(annotated_forest, "tags")
-        return annotated_tags
-
     def test_leafward_prop_undecided_children(self):
         edges = [(self.parent, self.child1), (self.parent, self.child2)]
         tags = pd.DataFrame([self.parent, self.child1, self.child2]).assign(