Skip to content

Commit

Permalink
Merge pull request #1430 from marc-vdm/ca_range_score_fix
Browse files Browse the repository at this point in the history
additional contribution analysis fixes
  • Loading branch information
marc-vdm authored Jan 16, 2025
2 parents 1e02314 + ed7d705 commit eeb1685
Show file tree
Hide file tree
Showing 7 changed files with 214 additions and 91 deletions.
42 changes: 24 additions & 18 deletions activity_browser/bwutils/multilca.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
from typing import Iterable, Optional, Union
from logging import getLogger

import bw2analyzer as ba
import bw2calc as bc
import numpy as np
import pandas as pd
from PySide2.QtWidgets import QApplication, QMessageBox

from activity_browser.mod import bw2data as bd
from activity_browser.mod.bw2analyzer import ABContributionAnalysis

from .commontasks import wrap_text
from .errors import ReferenceFlowValueError
from .metadata import AB_metadata

log = getLogger(__name__)
ca = ba.ContributionAnalysis()
ca = ABContributionAnalysis()


class MLCA(object):
Expand Down Expand Up @@ -394,20 +394,24 @@ def __init__(self, mlca):
),
}

def normalize(self, contribution_array: np.ndarray) -> np.ndarray:
"""Normalise the contribution array.
def normalize(self, contribution_array: np.ndarray, total_range:bool=True) -> np.ndarray:
"""Normalize the contribution array based on range or score
Parameters
----------
contribution_array : A 2-dimensional contribution array
total_range : A bool, True for normalization based on range, False for score
Returns
-------
2-dimensional array of same shape, with scores normalized.
"""
scores = abs(contribution_array.sum(axis=1, keepdims=True))
return contribution_array / scores
if total_range: # total is based on the range
total = abs(abs(contribution_array).sum(axis=1, keepdims=True))
else: # total is based on the score
total = abs(contribution_array.sum(axis=1, keepdims=True))
return contribution_array / total

def _build_dict(
self,
Expand Down Expand Up @@ -437,12 +441,13 @@ def _build_dict(
for fu_or_method, col in FU_M_index.items():
contribution_col = contributions[col, :]
if total_range: # total is based on the range
total = np.abs(contribution_col).sum()
normalize_to = np.abs(contribution_col).sum()
else: # total is based on the score
total = contribution_col.sum()
normalize_to = contribution_col.sum()
score = contribution_col.sum()

top_contribution = ca.sort_array(
contribution_col, limit=limit, limit_type=limit_type, total=total
contribution_col, limit=limit, limit_type=limit_type, total=normalize_to
)

# split and calculate remaining rest sections for positive and negative part
Expand All @@ -458,7 +463,7 @@ def _build_dict(
cont_per = OrderedDict()
cont_per.update(
{
("Total", ""): total,
("Score", ""): score,
("Rest (+)", ""): pos_rest,
("Rest (-)", ""): neg_rest,
}
Expand Down Expand Up @@ -602,20 +607,21 @@ def get_labelled_contribution_dict(
# If the cont_dict has tuples for keys, coerce df.columns into MultiIndex
if all(isinstance(k, tuple) for k in cont_dict.keys()):
df.columns = pd.MultiIndex.from_tuples(df.columns)
special_keys = [("Total", ""), ("Rest (+)", ""), ("Rest (-)", "")]

special_keys = [("Score", ""), ("Rest (+)", ""), ("Rest (-)", "")]
# replace all 0 values with NaN and drop all rows with only NaNs
df = df.replace(0, np.nan)

# sort on absolute mean of a row
df_bot = deepcopy(df.loc[df.index.difference(special_keys)].dropna(how="all"))

func = lambda row: np.nanmean(np.abs(row))
# sort on mean square of a row
df_bot = deepcopy(df.iloc[3:, :])
func = lambda row: np.nanmean(np.square(row))
if len(df_bot) > 1: # but only sort if there is something to sort
df_bot["_sort_me_"] = (df_bot.select_dtypes(include=np.number)).apply(func, axis=1)
df_bot.sort_values(by="_sort_me_", ascending=False, inplace=True)
del df_bot["_sort_me_"]

df = pd.concat([df.iloc[:3, :], df_bot], axis=0)
df.dropna(how="all", inplace=True)

if not mask:
joined = self.join_df_with_metadata(
Expand All @@ -638,7 +644,7 @@ def adjust_table_unit(df: pd.DataFrame, method: Optional[tuple]) -> pd.DataFrame
"""Given a dataframe, adjust the unit of the table to either match the given method, or not exist."""
if "unit" not in df.columns:
return df
keys = df.index[~df["index"].isin({"Total", "Rest (+)", "Rest (-)"})]
keys = df.index[~df["index"].isin({"Score", "Rest (+)", "Rest (-)"})]
unit = bd.Method(method).metadata.get("unit") if method else "unit"
df.loc[keys, "unit"] = unit
return df
Expand Down Expand Up @@ -850,7 +856,7 @@ def top_elementary_flow_contributions(

# Normalise if required
if normalize:
contributions = self.normalize(contributions)
contributions = self.normalize(contributions, total_range)

top_cont_dict = self._build_dict(
contributions, index, rev_index, limit, limit_type, total_range
Expand Down Expand Up @@ -906,7 +912,7 @@ def top_process_contributions(

# Normalise if required
if normalize:
contributions = self.normalize(contributions)
contributions = self.normalize(contributions, total_range)

top_cont_dict = self._build_dict(
contributions, index, rev_index, limit, limit_type, total_range
Expand Down
61 changes: 37 additions & 24 deletions activity_browser/docs/wiki/LCA-Results.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,26 +98,30 @@ The total impact is still 1.6.
In this section we generalize a little bit for the different contribution approaches,
we call the _from_ part of the contributions (the EFs or activities or FT above) _entities_.

There are several ways Activity Browser manipulates your results by default.
- The results are **sorted** so that the row with the largest (absolute) average values are shown first.
There are several ways Activity Browser manipulates your results by default:
- All reference flows are compared to eachother.
- The contributions are **sorted** so that the most important contributions are shown first.
- The sorting is done on the _mean square_ (ignoring zero values) of each row of contributing entities.
- A `cut-off` of 5% is applied, this only shows results that contribute at least 5% to the total range of results,
all other entities are grouped into a `Rest (+)` or `Rest (-)` groups.
- The contributions are _normalized_ to the impact of that reference flow, meaning they are show as a percentage,
counting up to 100% for every item you compare.

These actions are taken to show you the most relevant results.
all other entities are grouped into the `Rest (+)` and `Rest (-)` groups for positive and negative
contributions respectively.
- The contributions are _normalized_ to the LCA scores,
meaning contributions are shown as a percentage contribution of the score, counting up to 100%.

These defaults exist to show you the most relevant results in most cases, but you may often want to make this more
specific for your analysis.
You can manually manipulate the contribution results in the menu shown below, which we will explain bit by bit
in the next sections.
![contributions cutoff](./assets/contribution_manipulation.png)

#### Cut-off
You can manually change the `Cut-off type` of the results in two ways, `Relative` or `Top #`.
The `Relative` mode shows contributions _from_ entities of _x_% or higher.
The `Top #` mode shows contributions from the _x_ entities that contribute the most (as absolute).
- The `Relative` mode shows contributions _from_ entities of _x_% or higher.
- The `Top #` mode shows contributions from the _x_ entities that contribute the most (as absolute).

You can adjust the `Cut-off level` to change how many results you see.

All results that don't make the cut-off will be grouped into the `Rest (+)` and `Rest (-)` groups.
All contributions that are below the cut-off will be grouped into the `Rest (+)` and `Rest (-)` groups.
The Rest groups are only present when there are positive or negative numbers remaining for the respective rest groups.

#### Compare
Expand All @@ -131,33 +135,42 @@ The compare mode defines what is shown in the figure.

#### Aggregation
The `Aggregate by` menu can be used to _group_ results based on field names.
As an example, EF contributions can be grouped on the name,
for example to group all flows with the same name.
Another example for process contributions can be grouped based on their reference product name.
This is useful to group contributors together so you have fewer -and larger- contributors.
As an example, EF contributions can be grouped on the name to group all flows with the same name
(which would for example group all EFs with the name _carbon dioxide_ together).
As another example, process contributions can be grouped based on their reference product name
(which would for example group all processes with the product name _electricity, high voltage_ together).

#### Plot and Table
By default, Activity Browser shows a plot and a table.
You can disable one of them if you want to focus on one of them.
You can disable one of them if you want to focus on the other.

#### Relative and Absolute
You can choose between `Relative` and `Absolute` results.
The `Relative` results will sum to 100% (the total score), the `Absolute` results will sum to the impact score.
The `Relative` results will sum to 100% (the total `Range` or `Score`),
the `Absolute` results will sum to the impact score.
For `Relative`, you can choose what you use as the 100% reference, the `Range` or the `Score`.

#### Range and Score
If the Cut-off type is `Relative`, you can choose between `Range` and `Score`.
This determines what you use as the _total_ to which the relative contributions are counted.
For `Range`, this is the full _range_ of results, for example, if all your negative results together have a score of -2
and all your positive results together have a score of 10, the _range_ is 12 (-2 * -1 + 10).
For `Score`, this is the total score (sum) of the results, for example, if all your negative results together have a
score of -2 and all your positive results together have a score of 10, the _score_ is 8 (-2 + 10).
The `Range` or `Score` setting are only used when your results contain both positive and negative results.
The `Range`/`Score` determines what you use as the _total_ to which the contributions are counted.
- For `Range`, this is the full _range_ of results
- For example, if all your negative results together have a score of -2 and all your positive results together have a
score of 10, the _range_ is 12 (-2 * -1 + 10).
- An entity with a contribution of 4 would have a relative contribution of 4/12 = 33.3...%.
- For `Score`, this is the total score (sum) of the results
- For example, if all your negative results together have a score of -2 and all your positive results together have a
score of 10, the _score_ is 8 (-2 + 10).
- An entity with a contribution of 4 would have a relative contribution of 4/8 = 50%.

The `Range` or `Score` setting are only relevant when your results contain both positive and negative contributions.

### Positive and negative numbers in contribution results
It can happen in LCA that you get both positive and negative numbers in your contribution results.
Some of these reasons could be negative characterization factors, flows with negative numbers or using substitution flows.
Some reasons for this could be negative characterization factors, flows with negative numbers or using
substitution flows.

When there are both positive and negative numbers in the result, Activity Browser will show a marker to indicate
where the total score is, and show positive and negative contributions to the impact separately.
where the total _score_ is, and show positive and negative contributions to the impact separately.

Below is a simple example (with unrealistic values) to demonstrate this:

Expand Down
Loading

0 comments on commit eeb1685

Please sign in to comment.