Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ensure all the corrections get tags and add the begining of a rate base asset #3214

Merged
merged 27 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
3a9548c
ensure all the corrections get tags and add the begining of a rate ba…
cmgosnell Jan 4, 2024
86f8f63
Merge branch 'dev' into explode-rate-base
cmgosnell Jan 4, 2024
1169f4f
Merge branch 'main' into explode-rate-base
cmgosnell Jan 10, 2024
f33aa82
Add in cash on hand as an additional factoid into rate base table
cmgosnell Jan 10, 2024
ea8301e
add documentation for rate base table
cmgosnell Jan 10, 2024
24cf1cf
remove _correction record from the expense.
cmgosnell Jan 10, 2024
9a93f63
Merge branch 'main' into explode-rate-base
cmgosnell Jan 10, 2024
6d41c5c
attempt to associate tags with _correction factoids when all child ca…
cmgosnell Jan 16, 2024
80ccf5d
Add a simple XbrlCalculationForest test.
jdangerx Jan 17, 2024
50615cb
WIP: write down some to-dos for test cases.
jdangerx Jan 17, 2024
1ee6c7c
Get leafward propagation working
jdangerx Jan 18, 2024
8bc4a96
Merge branch 'main' into explode-rate-base
cmgosnell Jan 25, 2024
9b19f8b
first pass of adding leafward tags one layer and an attempt at a recu…
cmgosnell Jan 25, 2024
d1347c1
integrate the recursive tag propagation method
cmgosnell Jan 25, 2024
a23d87a
Merge branch 'main' into explode-rate-base
cmgosnell Jan 25, 2024
d1a42b4
remove old correction tagging and standardize unit tests a bit
cmgosnell Jan 26, 2024
829757a
remove metadata from forest builder and cleanup unit tests
cmgosnell Jan 26, 2024
d5c2b69
Merge branch 'main' into explode-rate-base
cmgosnell Jan 26, 2024
e975331
Merge branch 'main' into explode-rate-base
cmgosnell Jan 29, 2024
33fa1ef
add "validation" checks and standardize null tag behavior`
cmgosnell Jan 30, 2024
8341299
Merge branch 'main' into explode-rate-base
cmgosnell Jan 30, 2024
0f3b654
light cleaning
cmgosnell Jan 30, 2024
3e5c2cd
root boose docs!
cmgosnell Jan 31, 2024
b8758dd
respond to dazhong's comments
cmgosnell Jan 31, 2024
d93d46c
Merge branch 'main' into explode-rate-base
cmgosnell Jan 31, 2024
17a5fe4
Merge branch 'main' into explode-rate-base
cmgosnell Feb 2, 2024
da8df11
add a test about pruned nodes and add the NodeId(*n) into the orphans
cmgosnell Feb 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
246 changes: 201 additions & 45 deletions src/pudl/output/ferc1.py
Original file line number Diff line number Diff line change
Expand Up @@ -1154,7 +1154,10 @@ class OffByFactoid(NamedTuple):


@asset
def _out_ferc1__explosion_tags(table_dimensions_ferc1) -> pd.DataFrame:
def _out_ferc1__explosion_tags(
table_dimensions_ferc1: pd.DataFrame,
calculation_components_xbrl_ferc1: pd.DataFrame,
) -> pd.DataFrame:
"""Grab the stored tables of tags and add inferred dimension."""
rate_tags = _get_tags("xbrl_factoid_rate_base_tags.csv", table_dimensions_ferc1)
rev_req_tags = _get_tags(
Expand All @@ -1180,9 +1183,10 @@ def _out_ferc1__explosion_tags(table_dimensions_ferc1) -> pd.DataFrame:
plant_function_tags,
utility_type_tags,
]
tags_all = (
tag_idx = list(NodeId._fields)
tags = (
pd.concat(
[df.set_index(list(NodeId._fields)) for df in tag_dfs],
[df.set_index(tag_idx) for df in tag_dfs],
join="outer",
verify_integrity=True,
ignore_index=False,
Expand All @@ -1191,7 +1195,7 @@ def _out_ferc1__explosion_tags(table_dimensions_ferc1) -> pd.DataFrame:
.reset_index()
.drop(columns=["notes"])
)
return tags_all
return tags


def _get_tags(file_name: str, table_dimensions_ferc1: pd.DataFrame) -> pd.DataFrame:
Expand Down Expand Up @@ -1236,7 +1240,10 @@ def _aggregatable_dimension_tags(
)
.set_index(idx)
)
table_dimensions_ferc1 = table_dimensions_ferc1.set_index(idx)
# don't include the corrections because we will add those in later
table_dimensions_ferc1 = table_dimensions_ferc1[
~table_dimensions_ferc1.xbrl_factoid.str.endswith("_correction")
].set_index(idx)
tags_df = pd.concat(
[
tags_df,
Expand Down Expand Up @@ -1658,7 +1665,6 @@ def calculation_forest(self: Self) -> "XbrlCalculationForestFerc1":
"""Construct a calculation forest based on class attributes."""
return XbrlCalculationForestFerc1(
exploded_calcs=self.exploded_calcs,
exploded_meta=self.exploded_meta,
seeds=self.seed_nodes,
tags=self.tags,
group_metric_checks=self.group_metric_checks,
Expand Down Expand Up @@ -2016,10 +2022,10 @@ class XbrlCalculationForestFerc1(BaseModel):

# Not sure if dynamically basing this on NodeId is really a good idea here.
calc_cols: list[str] = list(NodeId._fields)
exploded_meta: pd.DataFrame = pd.DataFrame()
exploded_calcs: pd.DataFrame = pd.DataFrame()
seeds: list[NodeId] = []
tags: pd.DataFrame = pd.DataFrame()
# TODO: remove the group metric checks and see if things still build / tests still pass
group_metric_checks: GroupMetricChecks = GroupMetricChecks()
model_config = ConfigDict(
arbitrary_types_allowed=True, ignored_types=(cached_property,)
Expand Down Expand Up @@ -2135,14 +2141,13 @@ def exploded_calcs_to_digraph(
Then we compile a dictionary of node attributes, based on the individual
calculation components in the exploded calcs dataframe.
"""
source_nodes = list(
exploded_calcs.loc[:, self.parent_cols]
.rename(columns=lambda x: x.removesuffix("_parent"))
.itertuples(name="NodeId", index=False)
)
target_nodes = list(
exploded_calcs.loc[:, self.calc_cols].itertuples(name="NodeId", index=False)
)
source_nodes = [
NodeId(*x)
for x in exploded_calcs.set_index(self.parent_cols).index.to_list()
]
target_nodes = [
NodeId(*x) for x in exploded_calcs.set_index(self.calc_cols).index.to_list()
]
edgelist = pd.DataFrame({"source": source_nodes, "target": target_nodes})
forest = nx.from_pandas_edgelist(edgelist, create_using=nx.DiGraph)
return forest
Expand Down Expand Up @@ -2173,32 +2178,9 @@ def node_attrs(self: Self) -> dict[NodeId, dict[str, dict[str, str]]]:
.reset_index()
# Type conversion is necessary to get pd.NA in the index:
.astype({col: pd.StringDtype() for col in self.calc_cols})
# We need a dictionary for *all* nodes, not just those with tags.
jdangerx marked this conversation as resolved.
Show resolved Hide resolved
.merge(
self.exploded_meta.loc[:, self.calc_cols],
how="left",
on=self.calc_cols,
validate="one_to_many",
indicator=True,
)
# For nodes with no tags, we assign an empty dictionary:
.assign(tags=lambda x: np.where(x["tags"].isna(), {}, x["tags"]))
)
lefties = node_attrs[
(node_attrs._merge == "left_only")
& (node_attrs.table_name.isin(self.table_names))
]
if not lefties.empty:
logger.warning(
f"Found {len(lefties)} tags that only exist in our manually compiled "
"tags when expected none. Ensure the compiled tags match the metadata."
f"Mismatched tags:\n{lefties}"
)
return (
node_attrs.drop(columns=["_merge"])
.set_index(self.calc_cols)
.to_dict(orient="index")
)
return node_attrs.set_index(self.calc_cols).to_dict(orient="index")

@cached_property
def edge_attrs(self: Self) -> dict[Any, Any]:
Expand Down Expand Up @@ -2244,6 +2226,7 @@ def annotated_forest(self: Self) -> nx.DiGraph:
annotated_forest = deepcopy(self.forest)
nx.set_node_attributes(annotated_forest, self.node_attrs)
nx.set_edge_attributes(annotated_forest, self.edge_attrs)
annotated_forest = self.propagate_tags(annotated_forest)

logger.info("Checking whether any pruned nodes were also tagged.")
self.check_lost_tags(lost_nodes=self.pruned)
Expand All @@ -2252,6 +2235,55 @@ def annotated_forest(self: Self) -> nx.DiGraph:
self.check_conflicting_tags(annotated_forest)
return annotated_forest

def propagate_tags(self: Self, annotated_forest: nx.DiGraph):
"""Propagate tags.

Propagate tags leafwards, rootward & to the _correction nodes.
"""
existing_tags = nx.get_node_attributes(annotated_forest, "tags")
## Leafwards propagation
leafward_inherited_tags = ["in_rate_base"]
for node, parent_tags in existing_tags.items():
descendants = nx.descendants(annotated_forest, node)
descendant_tags = {
desc: {
"tags": {
tag_name: parent_tags[tag_name]
for tag_name in leafward_inherited_tags
if tag_name in parent_tags
}
| existing_tags.get(desc, {})
}
for desc in descendants
}
nx.set_node_attributes(annotated_forest, descendant_tags)

# Rootward propagation
root_node = self.roots(annotated_forest)[0]
_ = recursively_propagate_tags_leafward(
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
annotated_forest, root_node, "in_rate_base"
)
# Correction Records
existing_tags = nx.get_node_attributes(annotated_forest, "tags")
correction_nodes = [
node
for node in annotated_forest
if node.xbrl_factoid.endswith("_correction")
]
correction_tags = {}
for correction_node in correction_nodes:
# for every correction node, we assume that that nodes parent tags can apply
parents = list(annotated_forest.predecessors(correction_node))
# all correction records shoul have a parent and only one
assert len(parents) == 1
parent = parents[0]
correction_tags[correction_node] = {
"tags": existing_tags.get(parent, {})
| existing_tags.get(correction_node, {})
}
nx.set_node_attributes(annotated_forest, correction_tags)
return annotated_forest

def check_lost_tags(self: Self, lost_nodes: list[NodeId]) -> None:
"""Check whether any of the input lost nodes were also tagged nodes."""
if lost_nodes:
Expand Down Expand Up @@ -2368,7 +2400,7 @@ def seeded_digraph(self: Self) -> nx.DiGraph:

We compile a list of all the :class:`NodeId` values that should be included in
the pruned graph, and then use that list to select a subset of the exploded
metadata to pass to :meth:`exploded_meta_to_digraph`, so that all of the
metadata to pass to :meth:`exploded_calcs_to_digraph`, so that all of the
associated metadata is also added to the pruned graph.
"""
return self.prune_unrooted(self.full_digraph)
Expand Down Expand Up @@ -2496,11 +2528,16 @@ def forest_leaves(self: Self) -> list[NodeId]:
def orphans(self: Self) -> list[NodeId]:
"""Identify all nodes that appear in metadata but not in the full digraph."""
nodes = self.full_digraph.nodes
return [
NodeId(*n)
for n in self.exploded_meta.set_index(self.calc_cols).index
if n not in nodes
]
orphans = []
for idx_cols in [self.calc_cols, self.parent_cols]:
orphans.extend(
[
NodeId(*n)
for n in self.exploded_calcs.set_index(idx_cols).index
if n not in nodes
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
]
)
return list(set(orphans))
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved

@cached_property
def pruned(self: Self) -> list[NodeId]:
Expand Down Expand Up @@ -2774,3 +2811,122 @@ def nodes_to_df(calc_forest: nx.DiGraph, nodes: list[NodeId]) -> pd.DataFrame:
except AttributeError:
tags = pd.DataFrame()
return pd.concat([index, tags], axis="columns")


def recursively_propagate_tags_leafward(
annotated_forest, node, tag_name: Literal["in_rate_base"]
):
"""Set the tags for nodes when all of its children have same tag.

This function returns the value of a tag.
"""

def _get_tag(annotated_forest, node, tag_name):
return annotated_forest.nodes.get(node, {}).get("tags", {}).get(tag_name, pd.NA)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was just a lil helper function to get the tag or a null because omigosh as you can see it is a lil complicated because of the layered-ness and option for the node to not exist or the tag to not exist etc. I suppose it could also be:

annotated_forest.nodes.get(node, {"tags", {tag_name: pd.NA}})["tags"][tag_name]


logger.info(f"propagaging tags leafward from {node}")
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
tag = pd.NA
# i'm a leaf so i stop looking
if not list(annotated_forest.successors(node)):
tag = _get_tag(annotated_forest, node, tag_name)
logger.info(f" We found a leaf people. w/ {tag=}")
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
# if i have a value you don't need to keep looking at this nodes childern
elif not pd.isna(_get_tag(annotated_forest, node, tag_name)):
tag = _get_tag(annotated_forest, node, tag_name)
logger.info(f" We found a node w/ tags. w/ {tag=}")
else:
child_tags = set()
for child_node in annotated_forest.successors(node):
if not child_node.xbrl_factoid.endswith("_correction"):
child_tags.add(
recursively_propagate_tags_leafward(
annotated_forest, child_node, tag_name
)
)
logger.info(f" found {child_tags=}")
# if all the children tags are the same and non-null
if (len(child_tags) == 1) and {t for t in child_tags if not pd.isna(t)}:
new_node_tag = child_tags.pop()
logger.info(
f" We found a node consitent children tags. w/ {new_node_tag=}"
)
# actually assign the tag here but don't wipe out any other tags
existing_tags = nx.get_node_attributes(annotated_forest, "tags")
node_tags = {
node: {"tags": {tag_name: new_node_tag} | existing_tags.get(node, {})}
}
nx.set_node_attributes(annotated_forest, node_tags)
tag = new_node_tag
return tag


@asset
def out_ferc1__yearly_rate_base(
exploded_balance_sheet_assets_ferc1: pd.DataFrame,
exploded_balance_sheet_liabilities_ferc1: pd.DataFrame,
core_ferc1__yearly_operating_expenses_sched320: pd.DataFrame,
) -> pd.DataFrame:
"""Make a table of granular utility rate-base data.

This table contains granular data consisting of what utilities can
include in their rate bases. This information comes from two core
inputs: ``exploded_balance_sheet_assets_ferc1`` and
``exploded_balance_sheet_liabilities_ferc1``. These tables include granular
data from the nested calculations that are build into the accounting tables.
See :class:`Exploder` for more details.

This rate base table also contains one specific addition from
:ref:`core_ferc1__yearly_operating_expenses_sched320`. In standard ratemaking
processes, utilities are enabled to include working capital - sometimes referred
to as cash on hand or cash reverves. A standard ratemaking process is to consider
the available rate-baseable working capital to be one eigth of the average
operations and maintenance expense. This function grabs that expense and
concatenates it with the rest of the assets and liabilities from the granular
exploded data.

"""
# get the factoid name to grab the right part of the table
xbrl_factoid_name = pudl.transform.ferc1.FERC1_TFR_CLASSES[
"core_ferc1__yearly_operating_expenses_sched320"
]().params.xbrl_factoid_name
# First grab the working capital out of the operating expense table.
# then prep it for concating. Calculate working capital & add tags
cash_working_capital = (
core_ferc1__yearly_operating_expenses_sched320[
core_ferc1__yearly_operating_expenses_sched320[xbrl_factoid_name]
== "operations_and_maintenance_expenses_electric"
]
.assign(
dollar_value=lambda x: x.dollar_value.divide(8),
xbrl_factoid="cash_working_capital", # newly definied (do we need to add it anywhere?)
tags_rate_base_category="net_working_capital",
tags_aggregatable_utility_type="electric",
table_name="core_ferc1__yearly_operating_expenses_sched320",
)
.drop(columns=[xbrl_factoid_name])
# the assets/liabilites both use ending_balance for its main $$ column
.rename(columns={"dollar_value": "ending_balance"})
)
# then select only the leafy exploded records that are in rate base and concat
in_rate_base = (
pd.concat(
[
exploded_balance_sheet_assets_ferc1[
exploded_balance_sheet_assets_ferc1.tags_in_rate_base.isin(
["yes", "partial"]
)
],
exploded_balance_sheet_liabilities_ferc1[
exploded_balance_sheet_liabilities_ferc1.tags_in_rate_base.isin(
["yes", "partial"]
)
],
cash_working_capital,
]
)
# .drop(columns=["tags_in_rate_base"])
.sort_values(
by=["report_year", "utility_id_ferc1", "table_name"], ascending=False
)
)
return in_rate_base
10 changes: 0 additions & 10 deletions src/pudl/package_data/ferc1/xbrl_factoid_plant_status_tags.csv
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,4 @@ core_ferc1__yearly_utility_plant_summary_sched200,depreciation_and_amortization_
core_ferc1__yearly_utility_plant_summary_sched200,abandonment_of_leases,total
core_ferc1__yearly_utility_plant_summary_sched200,amortization_of_plant_acquisition_adjustment,total
core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_in_service_classified_and_property_under_capital_leases,in_service
core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_in_service_plant_purchased_or_sold_correction,in_service
cmgosnell marked this conversation as resolved.
Show resolved Hide resolved
core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_in_service_experimental_plant_unclassified_correction,in_service
core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_in_service_classified_and_unclassified_correction,in_service
core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_and_construction_work_in_progress_correction,construction_work_in_progress
core_ferc1__yearly_utility_plant_summary_sched200,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_correction,total
core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_net_correction,total
core_ferc1__yearly_utility_plant_summary_sched200,depreciation_utility_plant_in_service_correction,in_service
core_ferc1__yearly_utility_plant_summary_sched200,depreciation_amortization_and_depletion_utility_plant_leased_to_others_correction,leased
core_ferc1__yearly_utility_plant_summary_sched200,depreciation_and_amortization_utility_plant_held_for_future_use_correction,future
core_ferc1__yearly_utility_plant_summary_sched200,utility_plant_in_service_classified_and_property_under_capital_leases_correction,in_service
core_ferc1__yearly_utility_plant_summary_sched200,abandonment_of_leases,leased
Loading