From 5f500039f8645952bb3efe5b32b73986fd210411 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Mon, 27 Sep 2021 17:40:22 +0200 Subject: [PATCH 01/19] Add script and rule for scalars preparation --- Snakefile | 9 ++++++++ scripts/prepare_scalars.py | 43 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 scripts/prepare_scalars.py diff --git a/Snakefile b/Snakefile index 36ef43b9..31ac48bb 100644 --- a/Snakefile +++ b/Snakefile @@ -57,6 +57,15 @@ rule prepare_conv_pp: shell: "python scripts/prepare_conv_pp.py {input.opsd} {input.gpkg} {input.b3_regions} {input.scalar_template} {output}" +rule prepares_scalars: + input: + raw_scalars="raw/scalars.csv", + script="scripts/prepare_scalars.py" + output: + "results/_resources/scalars.csv" + shell: + "python {input.script} {input.raw_scalars} {output}" + rule build_datapackage: input: "scenarios/{scenario}.yml" diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py new file mode 100644 index 00000000..c5071276 --- /dev/null +++ b/scripts/prepare_scalars.py @@ -0,0 +1,43 @@ +# coding: utf-8 +r""" +Inputs +------- +in_path1 : str + path of input file with raw scalar data as .csv +out_path : str + path of output file with prepared scalar data as .csv + +Outputs +--------- +pandas.DataFrame + with scalar data prepared for parametrization + +Description +------------- +The script performs the following steps to prepare scalar data for parametrization: + +* Calculate annualized investment cost from overnight cost, lifetime and wacc. +""" +import sys + +from oemof_b3.tools.data_processing import load_b3_scalars + + +def prepare_annuity(df): + return df + + +if __name__ == "__main__": + in_path = sys.argv[1] # path to raw scalar data + out_path = sys.argv[2] # path to destination + + prepare_funcs = [ + prepare_annuity, + ] + + df = load_b3_scalars(in_path) + + for func in prepare_funcs: + df = func(df) + + df.to_csv(out_path, index=False) From 083c2d7fd0726d49e4dad2c9b8897238b6295226 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Wed, 29 Sep 2021 17:19:20 +0200 Subject: [PATCH 02/19] Sketch scalars preparation --- scripts/prepare_scalars.py | 67 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index c5071276..a379d829 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -20,11 +20,74 @@ """ import sys +from oemof.tools.economics import annuity + +import oemof_b3.tools.data_processing as dp from oemof_b3.tools.data_processing import load_b3_scalars +def unstack_var_name(df): + # TODO: to dataprocessing + _df = df.copy() + + _df = _df.set_index( + ["scenario", "name", "region", "carrier", "tech", "type", "var_name"] + ) + + _df = _df.unstack("var_name") + + return _df + + +def stack_var_name(df): + # TODO: to dataprocessing + + _df = df.copy() + + _df = _df.stack("var_name") + + return _df + + +def filter_unstack(df, var_name): + # TODO: to dataprocessing + + _df = df.copy() + + _df = dp.filter_df(_df, "var_name", var_name) + + _df = unstack_var_name(_df) + + _df = _df.loc[:, "var_value"] + + return _df + + def prepare_annuity(df): - return df + _df = df.copy() + + def calculate_annuized_capacity_cost(on_cost, on1_cost): + + annuized_capacity_cost = annuity(on_cost, 2, 0.05) + + annuized_capacity_cost.columns = ["capacity_cost"] + + annuized_capacity_cost.columns.name = "var_name" + + return annuized_capacity_cost + + # filter and unstack + on_cost = filter_unstack(_df, "overnight_cost") + + on_cost = filter_unstack(_df, "overnight_cost") + + # func + capacity_cost = calculate_annuized_capacity_cost(on_cost, on_cost) + + # stack and append + capacity_cost = stack_var_name(capacity_cost) + + return _df if __name__ == "__main__": @@ -38,6 +101,6 @@ def prepare_annuity(df): df = load_b3_scalars(in_path) for func in prepare_funcs: - df = func(df) + df.append(func(df)) df.to_csv(out_path, index=False) From f99bc859847d002ea70a8567be8838ccbedd5950 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Wed, 13 Oct 2021 17:07:21 +0200 Subject: [PATCH 03/19] Introduce class to handle scalar data --- scripts/prepare_scalars.py | 76 +++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index a379d829..62c82fad 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -18,12 +18,17 @@ * Calculate annualized investment cost from overnight cost, lifetime and wacc. """ +import pandas as pd import sys from oemof.tools.economics import annuity import oemof_b3.tools.data_processing as dp -from oemof_b3.tools.data_processing import load_b3_scalars +from oemof_b3.tools.data_processing import ( + load_b3_scalars, + format_header, + HEADER_B3_SCAL, +) def unstack_var_name(df): @@ -39,23 +44,35 @@ def unstack_var_name(df): return _df -def stack_var_name(df): +def stack_var_name(df, var_name): # TODO: to dataprocessing - _df = df.copy() + _df.columns = [var_name] + + _df.columns.name = "var_name" + _df = _df.stack("var_name") + _df.name = "var_value" + + _df = pd.DataFrame(_df).reset_index() + + _df = format_header(_df, HEADER_B3_SCAL, "id_scal") + return _df -def filter_unstack(df, var_name): +def unstack_filter(df, var_name): # TODO: to dataprocessing _df = df.copy() _df = dp.filter_df(_df, "var_name", var_name) + if _df.empty: + raise ValueError(f"No entries for {var_name} in df.") + _df = unstack_var_name(_df) _df = _df.loc[:, "var_value"] @@ -63,44 +80,45 @@ def filter_unstack(df, var_name): return _df -def prepare_annuity(df): - _df = df.copy() +class ScalarCalculator: + def __init__(self, scalars): + self.scalars = scalars - def calculate_annuized_capacity_cost(on_cost, on1_cost): + def unstack_filter(self, var_name): + return unstack_filter(self.scalars, var_name) - annuized_capacity_cost = annuity(on_cost, 2, 0.05) + def append(self, var_name, data): - annuized_capacity_cost.columns = ["capacity_cost"] + _df = data.copy() - annuized_capacity_cost.columns.name = "var_name" + if isinstance(_df, pd.Series): + _df.name = "var_name" - return annuized_capacity_cost + _df = pd.DataFrame(_df) - # filter and unstack - on_cost = filter_unstack(_df, "overnight_cost") + _df = stack_var_name(_df, var_name) - on_cost = filter_unstack(_df, "overnight_cost") - - # func - capacity_cost = calculate_annuized_capacity_cost(on_cost, on_cost) - - # stack and append - capacity_cost = stack_var_name(capacity_cost) - - return _df + self.scalars = self.scalars.append(_df) if __name__ == "__main__": in_path = sys.argv[1] # path to raw scalar data out_path = sys.argv[2] # path to destination - prepare_funcs = [ - prepare_annuity, - ] - df = load_b3_scalars(in_path) - for func in prepare_funcs: - df.append(func(df)) + sc = ScalarCalculator(df) + + invest_data = sc.unstack_filter(["overnight_cost", "lifetime"]) + + wacc = sc.unstack_filter("wacc").iloc[0] + + invest_data["wacc"] = wacc + + annuised_investment_cost = invest_data.apply( + lambda x: annuity(x["overnight_cost"], x["lifetime"], x["wacc"]), 1 + ) + + sc.append("annunity", annuised_investment_cost) - df.to_csv(out_path, index=False) + sc.scalars.to_csv(out_path, index=False) From 1193c0c7cf4748a912004718f41190cd7b105fb1 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Wed, 13 Oct 2021 17:13:04 +0200 Subject: [PATCH 04/19] Make sure wacc is float --- scripts/prepare_scalars.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index 62c82fad..e70372a5 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -111,7 +111,9 @@ def append(self, var_name, data): invest_data = sc.unstack_filter(["overnight_cost", "lifetime"]) - wacc = sc.unstack_filter("wacc").iloc[0] + wacc = sc.unstack_filter("wacc").iloc[0, 0] + + assert isinstance(wacc, float) invest_data["wacc"] = wacc @@ -119,6 +121,6 @@ def append(self, var_name, data): lambda x: annuity(x["overnight_cost"], x["lifetime"], x["wacc"]), 1 ) - sc.append("annunity", annuised_investment_cost) + sc.append("annuity", annuised_investment_cost) sc.scalars.to_csv(out_path, index=False) From 632ebde1fb9d0c1feb4982f2c2a8850564f827e6 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Wed, 13 Oct 2021 17:13:26 +0200 Subject: [PATCH 05/19] Fix typo --- Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index 31ac48bb..8a21ff57 100644 --- a/Snakefile +++ b/Snakefile @@ -57,7 +57,7 @@ rule prepare_conv_pp: shell: "python scripts/prepare_conv_pp.py {input.opsd} {input.gpkg} {input.b3_regions} {input.scalar_template} {output}" -rule prepares_scalars: +rule prepare_scalars: input: raw_scalars="raw/scalars.csv", script="scripts/prepare_scalars.py" From 1dedcec7d5c91e9a291ea3b6c5433332e5d88f97 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Wed, 13 Oct 2021 17:17:47 +0200 Subject: [PATCH 06/19] Rename method --- scripts/prepare_scalars.py | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index e70372a5..034d3f73 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -63,29 +63,22 @@ def stack_var_name(df, var_name): return _df -def unstack_filter(df, var_name): - # TODO: to dataprocessing - - _df = df.copy() - - _df = dp.filter_df(_df, "var_name", var_name) - - if _df.empty: - raise ValueError(f"No entries for {var_name} in df.") +class ScalarProcessor: + def __init__(self, scalars): + self.scalars = scalars - _df = unstack_var_name(_df) + def get_unstacked_var(self, var_name): - _df = _df.loc[:, "var_value"] + _df = dp.filter_df(self.scalars, "var_name", var_name) - return _df + if _df.empty: + raise ValueError(f"No entries for {var_name} in df.") + _df = unstack_var_name(_df) -class ScalarCalculator: - def __init__(self, scalars): - self.scalars = scalars + _df = _df.loc[:, "var_value"] - def unstack_filter(self, var_name): - return unstack_filter(self.scalars, var_name) + return _df def append(self, var_name, data): @@ -107,11 +100,11 @@ def append(self, var_name, data): df = load_b3_scalars(in_path) - sc = ScalarCalculator(df) + sc = ScalarProcessor(df) - invest_data = sc.unstack_filter(["overnight_cost", "lifetime"]) + invest_data = sc.get_unstacked_var(["overnight_cost", "lifetime"]) - wacc = sc.unstack_filter("wacc").iloc[0, 0] + wacc = sc.get_unstacked_var("wacc").iloc[0, 0] assert isinstance(wacc, float) From 1487c505fa21455bb0a8376d90e585da3fd9e387 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Wed, 13 Oct 2021 17:44:01 +0200 Subject: [PATCH 07/19] Elaborate stack function --- scripts/prepare_scalars.py | 58 +++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index 034d3f73..27954c34 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -31,36 +31,64 @@ ) +def is_correct_header(df): + return True + + def unstack_var_name(df): - # TODO: to dataprocessing + r""" + Given a DataFrame in oemof_b3 scalars format, this function will unstack + the variables. The returned DataFrame will have one column for each var_name. + + Parameters + ---------- + df : pd.DataFrame + Stacked scalar data. + Returns + ------- + unstacked : pd.DataFrame + Unstacked scalar data. + """ + assert is_correct_header(df) + _df = df.copy() _df = _df.set_index( ["scenario", "name", "region", "carrier", "tech", "type", "var_name"] ) - _df = _df.unstack("var_name") + unstacked = _df.unstack("var_name") - return _df + return unstacked -def stack_var_name(df, var_name): - # TODO: to dataprocessing - _df = df.copy() +def stack_var_name(df): + r""" + Given a DataFrame, this function will stack the variables. - _df.columns = [var_name] + Parameters + ---------- + df : pd.DataFrame + DataFrame with one column per variable - _df.columns.name = "var_name" + Returns + ------- + stacked : pd.DataFrame + DataFrame with a column "var_name" and "var_value" + """ + assert isinstance(df, pd.DataFrame) - _df = _df.stack("var_name") + _df = df.copy() + + _df.columns.name = "var_name" - _df.name = "var_value" + stacked = _df.stack("var_name") - _df = pd.DataFrame(_df).reset_index() + stacked.name = "var_value" - _df = format_header(_df, HEADER_B3_SCAL, "id_scal") + stacked = pd.DataFrame(_df).reset_index() - return _df + return stacked class ScalarProcessor: @@ -89,7 +117,9 @@ def append(self, var_name, data): _df = pd.DataFrame(_df) - _df = stack_var_name(_df, var_name) + _df = stack_var_name(_df) + + _df = format_header(_df, HEADER_B3_SCAL, "id_scal") self.scalars = self.scalars.append(_df) From a5c86219c82d05e0239719f33b83c94bb990ffaa Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Wed, 13 Oct 2021 17:45:42 +0200 Subject: [PATCH 08/19] Fix setting var_name --- scripts/prepare_scalars.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index 27954c34..02f2ee66 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -86,7 +86,7 @@ def stack_var_name(df): stacked.name = "var_value" - stacked = pd.DataFrame(_df).reset_index() + stacked = pd.DataFrame(stacked).reset_index() return stacked @@ -113,7 +113,7 @@ def append(self, var_name, data): _df = data.copy() if isinstance(_df, pd.Series): - _df.name = "var_name" + _df.name = var_name _df = pd.DataFrame(_df) From 77bd8a6757805583d5975b576163967e60e56c06 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Wed, 13 Oct 2021 17:48:57 +0200 Subject: [PATCH 09/19] Check header and reformat if necessary --- scripts/prepare_scalars.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index 02f2ee66..95a55960 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -31,10 +31,6 @@ ) -def is_correct_header(df): - return True - - def unstack_var_name(df): r""" Given a DataFrame in oemof_b3 scalars format, this function will unstack @@ -49,10 +45,10 @@ def unstack_var_name(df): unstacked : pd.DataFrame Unstacked scalar data. """ - assert is_correct_header(df) - _df = df.copy() + _df = format_header(_df, HEADER_B3_SCAL, "id_scal") + _df = _df.set_index( ["scenario", "name", "region", "carrier", "tech", "type", "var_name"] ) From 369d233a9c7b207de74fda73b8a941699d91db10 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Wed, 13 Oct 2021 17:51:38 +0200 Subject: [PATCH 10/19] Move processing functions to oemof_b3.tools --- oemof_b3/tools/data_processing.py | 89 ++++++++++++++++++++++++++++ scripts/prepare_scalars.py | 98 +------------------------------ 2 files changed, 90 insertions(+), 97 deletions(-) diff --git a/oemof_b3/tools/data_processing.py b/oemof_b3/tools/data_processing.py index 5f396bbd..51fad04c 100644 --- a/oemof_b3/tools/data_processing.py +++ b/oemof_b3/tools/data_processing.py @@ -431,3 +431,92 @@ def unstack_timeseries(df): df_unstacked.index.name = _df["timeindex_start"].index.name return df_unstacked + + +def unstack_var_name(df): + r""" + Given a DataFrame in oemof_b3 scalars format, this function will unstack + the variables. The returned DataFrame will have one column for each var_name. + + Parameters + ---------- + df : pd.DataFrame + Stacked scalar data. + Returns + ------- + unstacked : pd.DataFrame + Unstacked scalar data. + """ + _df = df.copy() + + _df = format_header(_df, HEADER_B3_SCAL, "id_scal") + + _df = _df.set_index( + ["scenario", "name", "region", "carrier", "tech", "type", "var_name"] + ) + + unstacked = _df.unstack("var_name") + + return unstacked + + +def stack_var_name(df): + r""" + Given a DataFrame, this function will stack the variables. + + Parameters + ---------- + df : pd.DataFrame + DataFrame with one column per variable + + Returns + ------- + stacked : pd.DataFrame + DataFrame with a column "var_name" and "var_value" + """ + assert isinstance(df, pd.DataFrame) + + _df = df.copy() + + _df.columns.name = "var_name" + + stacked = _df.stack("var_name") + + stacked.name = "var_value" + + stacked = pd.DataFrame(stacked).reset_index() + + return stacked + + +class ScalarProcessor: + def __init__(self, scalars): + self.scalars = scalars + + def get_unstacked_var(self, var_name): + + _df = filter_df(self.scalars, "var_name", var_name) + + if _df.empty: + raise ValueError(f"No entries for {var_name} in df.") + + _df = unstack_var_name(_df) + + _df = _df.loc[:, "var_value"] + + return _df + + def append(self, var_name, data): + + _df = data.copy() + + if isinstance(_df, pd.Series): + _df.name = var_name + + _df = pd.DataFrame(_df) + + _df = stack_var_name(_df) + + _df = format_header(_df, HEADER_B3_SCAL, "id_scal") + + self.scalars = self.scalars.append(_df) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index 95a55960..939b828f 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -18,107 +18,11 @@ * Calculate annualized investment cost from overnight cost, lifetime and wacc. """ -import pandas as pd import sys from oemof.tools.economics import annuity -import oemof_b3.tools.data_processing as dp -from oemof_b3.tools.data_processing import ( - load_b3_scalars, - format_header, - HEADER_B3_SCAL, -) - - -def unstack_var_name(df): - r""" - Given a DataFrame in oemof_b3 scalars format, this function will unstack - the variables. The returned DataFrame will have one column for each var_name. - - Parameters - ---------- - df : pd.DataFrame - Stacked scalar data. - Returns - ------- - unstacked : pd.DataFrame - Unstacked scalar data. - """ - _df = df.copy() - - _df = format_header(_df, HEADER_B3_SCAL, "id_scal") - - _df = _df.set_index( - ["scenario", "name", "region", "carrier", "tech", "type", "var_name"] - ) - - unstacked = _df.unstack("var_name") - - return unstacked - - -def stack_var_name(df): - r""" - Given a DataFrame, this function will stack the variables. - - Parameters - ---------- - df : pd.DataFrame - DataFrame with one column per variable - - Returns - ------- - stacked : pd.DataFrame - DataFrame with a column "var_name" and "var_value" - """ - assert isinstance(df, pd.DataFrame) - - _df = df.copy() - - _df.columns.name = "var_name" - - stacked = _df.stack("var_name") - - stacked.name = "var_value" - - stacked = pd.DataFrame(stacked).reset_index() - - return stacked - - -class ScalarProcessor: - def __init__(self, scalars): - self.scalars = scalars - - def get_unstacked_var(self, var_name): - - _df = dp.filter_df(self.scalars, "var_name", var_name) - - if _df.empty: - raise ValueError(f"No entries for {var_name} in df.") - - _df = unstack_var_name(_df) - - _df = _df.loc[:, "var_value"] - - return _df - - def append(self, var_name, data): - - _df = data.copy() - - if isinstance(_df, pd.Series): - _df.name = var_name - - _df = pd.DataFrame(_df) - - _df = stack_var_name(_df) - - _df = format_header(_df, HEADER_B3_SCAL, "id_scal") - - self.scalars = self.scalars.append(_df) - +from oemof_b3.tools.data_processing import ScalarProcessor, load_b3_scalars if __name__ == "__main__": in_path = sys.argv[1] # path to raw scalar data From a919240e89ab3cff89b0d5b1a0e57ec3161819a6 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Wed, 13 Oct 2021 17:56:38 +0200 Subject: [PATCH 11/19] Add some docstrings --- oemof_b3/tools/data_processing.py | 37 +++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/oemof_b3/tools/data_processing.py b/oemof_b3/tools/data_processing.py index 51fad04c..38a6c88c 100644 --- a/oemof_b3/tools/data_processing.py +++ b/oemof_b3/tools/data_processing.py @@ -490,11 +490,27 @@ def stack_var_name(df): class ScalarProcessor: + r""" + This class allows to filter and unstack scalar data in a way that makes processing simpler. + """ + def __init__(self, scalars): self.scalars = scalars def get_unstacked_var(self, var_name): - + r""" + Filters the scalars for the given var_name and returns the data in unstacked form. + + Parameters + ---------- + var_name : str + Name of the variable + + Returns + ------- + result : pd.DataFrame + Data in unstacked form. + """ _df = filter_df(self.scalars, "var_name", var_name) if _df.empty: @@ -502,12 +518,25 @@ def get_unstacked_var(self, var_name): _df = unstack_var_name(_df) - _df = _df.loc[:, "var_value"] + result = _df.loc[:, "var_value"] - return _df + return result def append(self, var_name, data): - + r""" + Accepts a Series or DataFrame in unstacked form and appends it to the scalars. + + Parameters + ---------- + var_name : str + Name of the data to append + data : pd.Series or pd.DataFrame + Data to append + + Returns + ------- + None + """ _df = data.copy() if isinstance(_df, pd.Series): From b3e56070f8176b5b68ba389f6ba6af9d08398f19 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Wed, 20 Oct 2021 12:20:35 +0200 Subject: [PATCH 12/19] Fill NaNs with base scenario values --- scripts/prepare_scalars.py | 40 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index 939b828f..bba960fe 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -20,10 +20,47 @@ """ import sys +import pandas as pd + from oemof.tools.economics import annuity from oemof_b3.tools.data_processing import ScalarProcessor, load_b3_scalars + +def fill_na(df): + key = "scenario" + + value = "None" + + _df = df.copy() + + # save index and columns before resetting index + id_names = _df.index.names + + columns = _df.columns + + _df.reset_index(inplace=True) + + # separate data where NaNs should be filled and base + df_fill_na = _df.loc[_df[key] != value] + + base = _df.loc[_df[key] == value] + + # merge data on the columns of the data to update + df_merged = df_fill_na.drop(columns, 1).merge(base.drop(key, 1), "left") + + # update dataframe NaNs + df_fill_na.update(df_merged) + + # combine the filled data with the base data + df_fill_na = pd.concat([df_fill_na, base]) + + # set index as before + df_fill_na = df_fill_na.set_index(id_names) + + return df_fill_na + + if __name__ == "__main__": in_path = sys.argv[1] # path to raw scalar data out_path = sys.argv[2] # path to destination @@ -34,6 +71,9 @@ invest_data = sc.get_unstacked_var(["overnight_cost", "lifetime"]) + # if some value is None in some scenario key, use the values from Base scenario to fill NaNs + invest_data = fill_na(invest_data) + wacc = sc.get_unstacked_var("wacc").iloc[0, 0] assert isinstance(wacc, float) From 549bdd701b6267cd6a9c2af46289ad839211e968 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Thu, 4 Nov 2021 17:24:03 +0100 Subject: [PATCH 13/19] Apply suggestions from code review Co-authored-by: Sabine Haas --- scripts/prepare_scalars.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index bba960fe..d0f41f39 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -3,9 +3,9 @@ Inputs ------- in_path1 : str - path of input file with raw scalar data as .csv + path incl. file name of input file with raw scalar data as .csv out_path : str - path of output file with prepared scalar data as .csv + path incl. file name of output file with prepared scalar data as .csv Outputs --------- From 2bf58d9f7ffc2065756fd71933eb7544ca5d5fda Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Thu, 4 Nov 2021 17:35:37 +0100 Subject: [PATCH 14/19] Wrap annuisation in function and annuise storage capacity cost --- scripts/prepare_scalars.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index bba960fe..4668a33f 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -61,29 +61,36 @@ def fill_na(df): return df_fill_na -if __name__ == "__main__": - in_path = sys.argv[1] # path to raw scalar data - out_path = sys.argv[2] # path to destination +def annuise_investment_cost(sc): - df = load_b3_scalars(in_path) + for var_name_cost in ["capacity_cost_overnight", "storage_capacity_cost_overnight"]: - sc = ScalarProcessor(df) + invest_data = sc.get_unstacked_var([var_name_cost, "lifetime"]) + + # if some value is None in some scenario key, use the values from Base scenario to fill NaNs + invest_data = fill_na(invest_data) + + wacc = sc.get_unstacked_var("wacc").iloc[0, 0] - invest_data = sc.get_unstacked_var(["overnight_cost", "lifetime"]) + assert isinstance(wacc, float) - # if some value is None in some scenario key, use the values from Base scenario to fill NaNs - invest_data = fill_na(invest_data) + invest_data["wacc"] = wacc - wacc = sc.get_unstacked_var("wacc").iloc[0, 0] + annuised_investment_cost = invest_data.apply( + lambda x: annuity(x[var_name_cost], x["lifetime"], x["wacc"]), 1 + ) - assert isinstance(wacc, float) + sc.append("capacity_cost", annuised_investment_cost) - invest_data["wacc"] = wacc - annuised_investment_cost = invest_data.apply( - lambda x: annuity(x["overnight_cost"], x["lifetime"], x["wacc"]), 1 - ) +if __name__ == "__main__": + in_path = sys.argv[1] # path to raw scalar data + out_path = sys.argv[2] # path to destination + + df = load_b3_scalars(in_path) + + sc = ScalarProcessor(df) - sc.append("annuity", annuised_investment_cost) + annuise_investment_cost(sc) sc.scalars.to_csv(out_path, index=False) From 5af0a6dc3cca5c8048853129ba8e6230469e72d7 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Thu, 4 Nov 2021 18:06:16 +0100 Subject: [PATCH 15/19] Save annuised cost under correct var name --- scripts/prepare_scalars.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index ea3992ab..ca6c6ef3 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -80,7 +80,7 @@ def annuise_investment_cost(sc): lambda x: annuity(x[var_name_cost], x["lifetime"], x["wacc"]), 1 ) - sc.append("capacity_cost", annuised_investment_cost) + sc.append(var_name_cost.replace("_overnight", ""), annuised_investment_cost) if __name__ == "__main__": From b2d728b2738013f4c067f5ab056bf59924e10cb0 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Tue, 16 Nov 2021 10:59:04 +0100 Subject: [PATCH 16/19] Allow to filter and drop scalars --- oemof_b3/tools/data_processing.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/oemof_b3/tools/data_processing.py b/oemof_b3/tools/data_processing.py index 0eb103ef..7cf1e898 100644 --- a/oemof_b3/tools/data_processing.py +++ b/oemof_b3/tools/data_processing.py @@ -150,7 +150,7 @@ def save_df(df, path): print(f"User info: The DataFrame has been saved to: {path}.") -def filter_df(df, column_name, values): +def filter_df(df, column_name, values, inverse=False): """ This function filters a DataFrame. @@ -162,6 +162,9 @@ def filter_df(df, column_name, values): The column's name to filter. values : str/numeric/list String, number or list of strings or numbers to filter by. + inverse : Boolean + If True, the entries for `column_name` and `values` are dropped + and the rest of the DataFrame be retained. Returns ------- @@ -171,10 +174,15 @@ def filter_df(df, column_name, values): _df = df.copy() if isinstance(values, list): - df_filtered = _df.loc[df[column_name].isin(values)] + where = _df[column_name].isin(values) else: - df_filtered = _df.loc[df[column_name] == values] + where = _df[column_name] == values + + if inverse: + where = ~where + + df_filtered = _df.loc[where] return df_filtered @@ -529,6 +537,10 @@ def get_unstacked_var(self, var_name): return result + def drop(self, var_name): + + self.scalars = filter_df(self.scalars, "var_name", var_name, inverse=True) + def append(self, var_name, data): r""" Accepts a Series or DataFrame in unstacked form and appends it to the scalars. From 24fb6db6e75d0831f234687da428de0ac4aa4d79 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Tue, 16 Nov 2021 11:00:11 +0100 Subject: [PATCH 17/19] Clean up prepared scalars by dropping obsolete data, sorting and resetting index --- scripts/prepare_scalars.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index ca6c6ef3..92caffef 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -82,6 +82,17 @@ def annuise_investment_cost(sc): sc.append(var_name_cost.replace("_overnight", ""), annuised_investment_cost) + sc.drop( + [ + "wacc", + "lifetime", + "capacity_cost_overnight", + "storage_capacity_cost_overnight", + "fixom_cost", + "storage_fixom_cost", + ] + ) + if __name__ == "__main__": in_path = sys.argv[1] # path to raw scalar data @@ -93,4 +104,8 @@ def annuise_investment_cost(sc): annuise_investment_cost(sc) - sc.scalars.to_csv(out_path, index=False) + sc.scalars = sc.scalars.sort_values(by=["carrier", "tech", "var_name", "scenario"]) + + sc.scalars.reset_index(inplace=True, drop=True) + + sc.scalars.to_csv(out_path) From e8e1d338d32da062ff3a7f662e2e78f9d28dc71a Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Tue, 16 Nov 2021 11:01:30 +0100 Subject: [PATCH 18/19] Add rule --- Snakefile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Snakefile b/Snakefile index 2d3f88e6..c39596a0 100644 --- a/Snakefile +++ b/Snakefile @@ -71,6 +71,15 @@ rule prepare_feedin: shell: "python {input.script} {input.wind_feedin} {input.pv_feedin} {input.ror_feedin} {output}" +rule prepare_scalars: + input: + raw_scalars="raw/base-scenario.csv", + script="scripts/prepare_scalars.py", + output: + "results/_resources/base-scenario.csv" + shell: + "python {input.script} {input.raw_scalars} {output}" + rule build_datapackage: input: "scenarios/{scenario}.yml" From c6f73399120c6439f511a846991e08c820262702 Mon Sep 17 00:00:00 2001 From: jnnr <32454596+jnnr@users.noreply.github.com> Date: Tue, 16 Nov 2021 11:04:42 +0100 Subject: [PATCH 19/19] Set index name --- scripts/prepare_scalars.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/prepare_scalars.py b/scripts/prepare_scalars.py index 92caffef..555f5d0a 100644 --- a/scripts/prepare_scalars.py +++ b/scripts/prepare_scalars.py @@ -108,4 +108,6 @@ def annuise_investment_cost(sc): sc.scalars.reset_index(inplace=True, drop=True) + sc.scalars.index.name = "id_scal" + sc.scalars.to_csv(out_path)