From d64e17a1ab1022d2b96d01ba261cb382bdaf73a8 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Wed, 3 Apr 2024 10:47:05 -0700 Subject: [PATCH] fix config options --- workflow/scripts/osemosys_global/TS_data.py | 1089 +++++++---------- workflow/scripts/osemosys_global/emissions.py | 439 +++---- 2 files changed, 689 insertions(+), 839 deletions(-) diff --git a/workflow/scripts/osemosys_global/TS_data.py b/workflow/scripts/osemosys_global/TS_data.py index 5502aab4..cd82a1d3 100644 --- a/workflow/scripts/osemosys_global/TS_data.py +++ b/workflow/scripts/osemosys_global/TS_data.py @@ -8,36 +8,41 @@ import pandas as pd import itertools -import seaborn as sns; sns.set() +import seaborn as sns + +sns.set() import urllib import os + # from osemosys_global.configuration import ConfigFile, ConfigPaths from configuration import ConfigFile, ConfigPaths from osemosys_global.utils import apply_timeshift from utils import apply_dtypes from constants import SET_DTYPES import time + # from OPG_configuration import ConfigFile, ConfigPaths -import logging -logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) +import logging + +logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) # ### Input data files and user input # CONFIGURATION PARAMETERS config_paths = ConfigPaths() -config = ConfigFile('config') +config = ConfigFile("config") input_dir = config_paths.input_dir input_data_dir = config_paths.input_data_dir output_dir = config_paths.output_dir output_data_dir = config_paths.output_data_dir custom_nodes_dir = config_paths.custom_nodes_dir -geographic_scope = config.get('geographic_scope') -seasons = config.get('seasons') -daytype = config.get('daytype') -dayparts = config.get('dayparts') -reserve_margin = config.get('reserve_margin') +geographic_scope = config.get("geographic_scope") +seasons = config.get("seasons") +daytype = config.get("daytype") +dayparts = config.get("dayparts") +reserve_margin = config.get("reserve_margin") # Check for custom nodes directory try: @@ -46,163 +51,133 @@ pass region_name = config.region_name -custom_nodes = config.get('nodes_to_add') +custom_nodes = config.get("nodes_to_add") # Checks whether PLEXOS-World 2015 data needs to be retrieved from the PLEXOS-World Harvard Dataverse. try: # Open = open(r'data/All_Demand_UTC_2015.csv') - Open = open(os.path.join(input_data_dir, - 'All_Demand_UTC_2015.csv') - ) + Open = open(os.path.join(input_data_dir, "All_Demand_UTC_2015.csv")) # demand_df = pd.read_csv(r'data/All_Demand_UTC_2015.csv' , encoding='latin-1') - demand_df = pd.read_csv(os.path.join(input_data_dir, - 'All_Demand_UTC_2015.csv'), - encoding='latin-1') + demand_df = pd.read_csv( + os.path.join(input_data_dir, "All_Demand_UTC_2015.csv"), encoding="latin-1" + ) except IOError: - urllib.request.urlretrieve ('https://dataverse.harvard.edu/api/access/datafile/3985039?format=original&gbrecs=true', - os.path.join(input_data_dir, - 'All_Demand_UTC_2015.csv') - ) + urllib.request.urlretrieve( + "https://dataverse.harvard.edu/api/access/datafile/3985039?format=original&gbrecs=true", + os.path.join(input_data_dir, "All_Demand_UTC_2015.csv"), + ) - demand_df = pd.read_csv(os.path.join(input_data_dir, - 'All_Demand_UTC_2015.csv'), - encoding='latin-1') + demand_df = pd.read_csv( + os.path.join(input_data_dir, "All_Demand_UTC_2015.csv"), encoding="latin-1" + ) -seasons_raw = config.get('seasons') +seasons_raw = config.get("seasons") seasonsData = [] for s, months in seasons_raw.items(): for month in months: - seasonsData.append([month, s]) -seasons_df = pd.DataFrame(seasonsData, - columns = ['month', 'season']) -seasons_df = seasons_df.sort_values(by = ['month']).reset_index(drop = True) + seasonsData.append([month, s]) +seasons_df = pd.DataFrame(seasonsData, columns=["month", "season"]) +seasons_df = seasons_df.sort_values(by=["month"]).reset_index(drop=True) -dayparts_raw = config.get('dayparts') +dayparts_raw = config.get("dayparts") daypartData = [] for dp, hr in dayparts_raw.items(): daypartData.append([dp, hr[0], hr[1]]) -dayparts_df = pd.DataFrame(daypartData, - columns = ['daypart', 'start_hour', 'end_hour']) -timeshift = config.get('timeshift') -dayparts_df['start_hour'] = dayparts_df['start_hour'].map(lambda x: apply_timeshift(x, timeshift)) -dayparts_df['end_hour'] = dayparts_df['end_hour'].map(lambda x: apply_timeshift(x, timeshift)) - -daytype_included = config.get('daytype') -model_start_year = config.get('startYear') -model_end_year = config.get('endYear') -years = list(range(model_start_year, model_end_year+1)) +dayparts_df = pd.DataFrame(daypartData, columns=["daypart", "start_hour", "end_hour"]) +timeshift = config.get("timeshift") +dayparts_df["start_hour"] = dayparts_df["start_hour"].map( + lambda x: apply_timeshift(x, timeshift) +) +dayparts_df["end_hour"] = dayparts_df["end_hour"].map( + lambda x: apply_timeshift(x, timeshift) +) + +daytype_included = config.get("daytype") +model_start_year = config.get("startYear") +model_end_year = config.get("endYear") +years = list(range(model_start_year, model_end_year + 1)) # Read renewable profile files -csp_df = pd.read_csv(os.path.join(input_data_dir, - 'CSP 2015.csv'), - encoding='latin-1') +csp_df = pd.read_csv(os.path.join(input_data_dir, "CSP 2015.csv"), encoding="latin-1") if custom_nodes: - csp_df_custom = pd.read_csv(os.path.join(custom_nodes_dir, - 'RE_profiles_CSP.csv'), - encoding='latin-1') - csp_df_custom.drop(['Datetime'], - axis=1, - inplace=True) + csp_df_custom = pd.read_csv( + os.path.join(custom_nodes_dir, "RE_profiles_CSP.csv"), encoding="latin-1" + ) + csp_df_custom.drop(["Datetime"], axis=1, inplace=True) csp_df = pd.concat([csp_df, csp_df_custom], axis=1) -csp_df.name = 'CSP' +csp_df.name = "CSP" -spv_df = pd.read_csv(os.path.join(input_data_dir, - 'SolarPV 2015.csv'), - encoding='latin-1') +spv_df = pd.read_csv( + os.path.join(input_data_dir, "SolarPV 2015.csv"), encoding="latin-1" +) if custom_nodes: - spv_df_custom = pd.read_csv(os.path.join(custom_nodes_dir, - 'RE_profiles_SPV.csv'), - encoding='latin-1') - spv_df_custom.drop(['Datetime'], - axis=1, - inplace=True) + spv_df_custom = pd.read_csv( + os.path.join(custom_nodes_dir, "RE_profiles_SPV.csv"), encoding="latin-1" + ) + spv_df_custom.drop(["Datetime"], axis=1, inplace=True) spv_df = pd.concat([spv_df, spv_df_custom], axis=1) -spv_df.name = 'SPV' +spv_df.name = "SPV" -nodes = ['-'.join(x.split('-')[1:]) - for x in spv_df.columns - if x - not in ['Datetime']] -regions = [x - for x in spv_df.columns - if x - not in ['Datetime']] +nodes = ["-".join(x.split("-")[1:]) for x in spv_df.columns if x not in ["Datetime"]] +regions = [x for x in spv_df.columns if x not in ["Datetime"]] -node_region_dict = dict(zip(nodes, - regions)) +node_region_dict = dict(zip(nodes, regions)) -hyd_df = pd.read_csv(os.path.join(input_data_dir, - 'Hydro_Monthly_Profiles (15 year average).csv'), - encoding='latin-1') +hyd_df = pd.read_csv( + os.path.join(input_data_dir, "Hydro_Monthly_Profiles (15 year average).csv"), + encoding="latin-1", +) if custom_nodes: - hyd_df_custom = pd.read_csv(os.path.join(custom_nodes_dir, - 'RE_profiles_HYD.csv'), - encoding='latin-1') + hyd_df_custom = pd.read_csv( + os.path.join(custom_nodes_dir, "RE_profiles_HYD.csv"), encoding="latin-1" + ) hyd_df = pd.concat([hyd_df, hyd_df_custom]) -hyd_df = hyd_df.loc[hyd_df['NAME'].str.endswith('Capacity Scaler')] -hyd_df['NAME'] = (hyd_df['NAME'] - .str.split('_') - .str[0]) +hyd_df = hyd_df.loc[hyd_df["NAME"].str.endswith("Capacity Scaler")] +hyd_df["NAME"] = hyd_df["NAME"].str.split("_").str[0] # Drop Brazil transmission nodes J1, J2, J3 -brazil_j_nodes = ['BRA-J1', 'BRA-J2', 'BRA-J3'] -hyd_df = hyd_df.loc[~hyd_df['NAME'].isin(brazil_j_nodes)] -hyd_df = hyd_df.set_index('NAME').T.reset_index() -hyd_df.rename(columns={'index': 'MONTH'}, - inplace=True) -hyd_df['MONTH'] = (hyd_df['MONTH'] - .str.replace('M', '') - .astype(int)) - -hyd_df_processed = pd.DataFrame(columns=['Datetime']) -hyd_df_processed['Datetime'] = spv_df['Datetime'] -hyd_df_processed['MONTH'] = (hyd_df_processed['Datetime'] - .str.split('/') - .str[1] - .astype(int)) -hyd_df_processed = pd.merge(hyd_df_processed, - hyd_df, - how='left', - on='MONTH') -hyd_df_processed.drop(columns='MONTH', - inplace=True) -hyd_df_processed.rename(columns=node_region_dict, - inplace=True) -hyd_df_processed.name = 'HYD' - -won_df = pd.read_csv(os.path.join(input_data_dir, - 'Won 2015.csv'), - encoding='latin-1') +brazil_j_nodes = ["BRA-J1", "BRA-J2", "BRA-J3"] +hyd_df = hyd_df.loc[~hyd_df["NAME"].isin(brazil_j_nodes)] +hyd_df = hyd_df.set_index("NAME").T.reset_index() +hyd_df.rename(columns={"index": "MONTH"}, inplace=True) +hyd_df["MONTH"] = hyd_df["MONTH"].str.replace("M", "").astype(int) + +hyd_df_processed = pd.DataFrame(columns=["Datetime"]) +hyd_df_processed["Datetime"] = spv_df["Datetime"] +hyd_df_processed["MONTH"] = ( + hyd_df_processed["Datetime"].str.split("/").str[1].astype(int) +) +hyd_df_processed = pd.merge(hyd_df_processed, hyd_df, how="left", on="MONTH") +hyd_df_processed.drop(columns="MONTH", inplace=True) +hyd_df_processed.rename(columns=node_region_dict, inplace=True) +hyd_df_processed.name = "HYD" + +won_df = pd.read_csv(os.path.join(input_data_dir, "Won 2015.csv"), encoding="latin-1") if custom_nodes: - won_df_custom = pd.read_csv(os.path.join(custom_nodes_dir, - 'RE_profiles_WON.csv'), - encoding='latin-1') - won_df_custom.drop(['Datetime'], - axis=1, - inplace=True) + won_df_custom = pd.read_csv( + os.path.join(custom_nodes_dir, "RE_profiles_WON.csv"), encoding="latin-1" + ) + won_df_custom.drop(["Datetime"], axis=1, inplace=True) won_df = pd.concat([won_df, won_df_custom], axis=1) -won_df.name = 'WON' +won_df.name = "WON" -wof_df = pd.read_csv(os.path.join(input_data_dir, - 'Woff 2015.csv'), - encoding='latin-1') +wof_df = pd.read_csv(os.path.join(input_data_dir, "Woff 2015.csv"), encoding="latin-1") if custom_nodes: - wof_df_custom = pd.read_csv(os.path.join(custom_nodes_dir, - 'RE_profiles_WOF.csv'), - encoding='latin-1') - wof_df_custom.drop(['Datetime'], - axis=1, - inplace=True) + wof_df_custom = pd.read_csv( + os.path.join(custom_nodes_dir, "RE_profiles_WOF.csv"), encoding="latin-1" + ) + wof_df_custom.drop(["Datetime"], axis=1, inplace=True) wof_df = pd.concat([wof_df, wof_df_custom], axis=1) -wof_df.name = 'WOF' +wof_df.name = "WOF" # ### Create 'output' directory if it doesn't exist import os + if not os.path.exists(output_data_dir): os.makedirs(output_data_dir) @@ -210,319 +185,271 @@ # ### Create columns for year, month, day, hour, and day type if custom_nodes: - demand_nodes = [x for x in demand_df.columns if x != 'Datetime'] + custom_nodes + demand_nodes = [x for x in demand_df.columns if x != "Datetime"] + custom_nodes else: - demand_nodes = [x for x in demand_df.columns if x != 'Datetime'] + demand_nodes = [x for x in demand_df.columns if x != "Datetime"] # Convert datetime to year, month, day, and hour -demand_df['Datetime'] = pd.to_datetime(demand_df['Datetime']) -demand_df['Year'] = demand_df['Datetime'].dt.strftime('%Y').astype(int) -demand_df['Month'] = demand_df['Datetime'].dt.strftime('%m').astype(int) -demand_df['Day'] = demand_df['Datetime'].dt.strftime('%d').astype(int) -demand_df['Hour'] = demand_df['Datetime'].dt.strftime('%H').astype(int) +demand_df["Datetime"] = pd.to_datetime(demand_df["Datetime"]) +demand_df["Year"] = demand_df["Datetime"].dt.strftime("%Y").astype(int) +demand_df["Month"] = demand_df["Datetime"].dt.strftime("%m").astype(int) +demand_df["Day"] = demand_df["Datetime"].dt.strftime("%d").astype(int) +demand_df["Hour"] = demand_df["Datetime"].dt.strftime("%H").astype(int) if custom_nodes: - custom_sp_demand_profile = pd.read_csv(os.path.join(input_data_dir, - "custom_nodes", - "specified_demand_profile.csv")) - demand_df = pd.merge(demand_df, - custom_sp_demand_profile, - how='left', - on=['Month','Day','Hour']) + custom_sp_demand_profile = pd.read_csv( + os.path.join(input_data_dir, "custom_nodes", "specified_demand_profile.csv") + ) + demand_df = pd.merge( + demand_df, custom_sp_demand_profile, how="left", on=["Month", "Day", "Hour"] + ) # Create column for weekday/weekend -demand_df['Day-of-week'] = demand_df['Datetime'].dt.dayofweek -demand_df.loc[demand_df['Day-of-week'] < 5, 'Day-of-week'] = 'WD' -demand_df.loc[demand_df['Day-of-week'] != 'WD', 'Day-of-week'] = 'WE' +demand_df["Day-of-week"] = demand_df["Datetime"].dt.dayofweek +demand_df.loc[demand_df["Day-of-week"] < 5, "Day-of-week"] = "WD" +demand_df.loc[demand_df["Day-of-week"] != "WD", "Day-of-week"] = "WE" # ### Create dictionaries for 'seasons' and 'dayparts' -seasons_dict = dict(zip(list(seasons_df['month']), - list(seasons_df['season']) - ) - ) +seasons_dict = dict(zip(list(seasons_df["month"]), list(seasons_df["season"]))) -dayparts_dict = {i: [j, k] - for i, j, k - in zip(list(dayparts_df['daypart']), - list(dayparts_df['start_hour']), - list(dayparts_df['end_hour']) - ) - } +dayparts_dict = { + i: [j, k] + for i, j, k in zip( + list(dayparts_df["daypart"]), + list(dayparts_df["start_hour"]), + list(dayparts_df["end_hour"]), + ) +} # ### Create columns with 'seasons' and 'dayparts' +demand_df["Season"] = demand_df["Month"] +demand_df["Season"].replace(seasons_dict, inplace=True) -demand_df['Season'] = demand_df['Month'] -demand_df['Season'].replace(seasons_dict, inplace=True) - -demand_df['Hour'] = demand_df['Hour'].map(lambda x: apply_timeshift(int(x), timeshift)) +demand_df["Hour"] = demand_df["Hour"].map(lambda x: apply_timeshift(int(x), timeshift)) for daypart in dayparts_dict: - if dayparts_dict[daypart][0] > dayparts_dict[daypart][1]: # loops over 24hrs - demand_df.loc[(demand_df['Hour'] >= dayparts_dict[daypart][0]) | - (demand_df['Hour'] < dayparts_dict[daypart][1]), - 'Daypart'] = daypart + if dayparts_dict[daypart][0] > dayparts_dict[daypart][1]: # loops over 24hrs + demand_df.loc[ + (demand_df["Hour"] >= dayparts_dict[daypart][0]) + | (demand_df["Hour"] < dayparts_dict[daypart][1]), + "Daypart", + ] = daypart else: - demand_df.loc[(demand_df['Hour'] >= dayparts_dict[daypart][0]) & - (demand_df['Hour'] < dayparts_dict[daypart][1]), - 'Daypart'] = daypart + demand_df.loc[ + (demand_df["Hour"] >= dayparts_dict[daypart][0]) + & (demand_df["Hour"] < dayparts_dict[daypart][1]), + "Daypart", + ] = daypart # ### Create column for timeslice with and without day-type if daytype_included: - demand_df['TIMESLICE'] = (demand_df['Season'] + - demand_df['Day-of-week'] + - demand_df['Daypart']) + demand_df["TIMESLICE"] = ( + demand_df["Season"] + demand_df["Day-of-week"] + demand_df["Daypart"] + ) else: - demand_df['TIMESLICE'] = (demand_df['Season'] + - demand_df['Daypart']) + demand_df["TIMESLICE"] = demand_df["Season"] + demand_df["Daypart"] # ### Calculate YearSplit -yearsplit = (demand_df['TIMESLICE']. - value_counts(normalize = True). - to_frame('VALUE'). - round(4). - reset_index(). - rename({'index':'TIMESLICE'}, axis = 1)) - -yearsplit_final = pd.DataFrame(list(itertools.product(yearsplit['TIMESLICE'].unique(), - years) - ), - columns = ['TIMESLICE', 'YEAR'] - ) -yearsplit_final = yearsplit_final.join(yearsplit.set_index('TIMESLICE'), - on = 'TIMESLICE') +yearsplit = ( + demand_df["TIMESLICE"] + .value_counts(normalize=True) + .to_frame("VALUE") + .round(4) + .reset_index() + .rename({"index": "TIMESLICE"}, axis=1) +) + +yearsplit_final = pd.DataFrame( + list(itertools.product(yearsplit["TIMESLICE"].unique(), years)), + columns=["TIMESLICE", "YEAR"], +) +yearsplit_final = yearsplit_final.join(yearsplit.set_index("TIMESLICE"), on="TIMESLICE") yearsplit_final = apply_dtypes(yearsplit_final, "Year Split") -yearsplit_final.to_csv(os.path.join(output_data_dir, - 'YearSplit.csv'), - index=None) +yearsplit_final.to_csv(os.path.join(output_data_dir, "YearSplit.csv"), index=None) # Calculate SpecifiedAnnualDemand and SpecifiedDemandProfile # ### Calculate SpecifiedAnnualDemand and SpecifiedDemandProfile -sp_demand_df = demand_df[[x - for x in demand_df.columns - if x in demand_nodes or - x == 'TIMESLICE']] -sp_demand_df = pd.melt(sp_demand_df, - id_vars = 'TIMESLICE', - value_vars = demand_nodes, - var_name = 'node', - value_name = 'demand') +sp_demand_df = demand_df[ + [x for x in demand_df.columns if x in demand_nodes or x == "TIMESLICE"] +] +sp_demand_df = pd.melt( + sp_demand_df, + id_vars="TIMESLICE", + value_vars=demand_nodes, + var_name="node", + value_name="demand", +) -sp_demand_df = sp_demand_df.groupby(['TIMESLICE', 'node'], - as_index = False).agg(sum) +sp_demand_df = sp_demand_df.groupby(["TIMESLICE", "node"], as_index=False).agg(sum) # Calculate SpecifiedAnnualDemand -total_demand_df = sp_demand_df.groupby('node', - as_index = False).agg(sum) +total_demand_df = sp_demand_df.groupby("node", as_index=False).agg(sum) -total_demand_df.rename({'demand':'total_demand'}, - axis = 1, - inplace = True) +total_demand_df.rename({"demand": "total_demand"}, axis=1, inplace=True) -sp_demand_df = sp_demand_df.join(total_demand_df.set_index('node'), - on = 'node') +sp_demand_df = sp_demand_df.join(total_demand_df.set_index("node"), on="node") # Calculate SpecifiedDemandProfile -sp_demand_df['VALUE'] = (sp_demand_df['demand'] / - sp_demand_df['total_demand']) +sp_demand_df["VALUE"] = sp_demand_df["demand"] / sp_demand_df["total_demand"] - -# Filter out country aggregate values for countries with multiple nodes -country_with_nodes = list((sp_demand_df.loc[sp_demand_df['node'].str.len() > 6, - 'node']. - str[:-3]. - unique())) -sp_demand_df = sp_demand_df.loc[~(sp_demand_df['node']. - isin(country_with_nodes))] +# Filter out country aggregate values for countries with multiple nodes +country_with_nodes = list( + (sp_demand_df.loc[sp_demand_df["node"].str.len() > 6, "node"].str[:-3].unique()) +) + +sp_demand_df = sp_demand_df.loc[~(sp_demand_df["node"].isin(country_with_nodes))] -# Rename COMMODITY based on naming convention. +# Rename COMMODITY based on naming convention. # Add 'XX' for countries without multiple nodes -sp_demand_df.loc[sp_demand_df['node'].str.len() == 5, - 'FUEL'] = ('ELC' + - sp_demand_df['node'] + - '02') - -sp_demand_df.loc[sp_demand_df['node'].str.len() == 6, - 'FUEL'] = ('ELC' + - sp_demand_df['node']. - str.split('-'). - str[1:]. - str.join("") + - 'XX02') - -sp_demand_df.loc[sp_demand_df['node'].str.len() > 6, - 'FUEL'] = ('ELC' + - sp_demand_df['node']. - str.split('-'). - str[1:]. - str.join("") + - '02') +sp_demand_df.loc[sp_demand_df["node"].str.len() == 5, "FUEL"] = ( + "ELC" + sp_demand_df["node"] + "02" +) + +sp_demand_df.loc[sp_demand_df["node"].str.len() == 6, "FUEL"] = ( + "ELC" + sp_demand_df["node"].str.split("-").str[1:].str.join("") + "XX02" +) + +sp_demand_df.loc[sp_demand_df["node"].str.len() > 6, "FUEL"] = ( + "ELC" + sp_demand_df["node"].str.split("-").str[1:].str.join("") + "02" +) # Create master table for SpecifiedDemandProfile -sp_demand_df_final = pd.DataFrame(list(itertools.product(sp_demand_df['TIMESLICE'].unique(), - sp_demand_df['FUEL'].unique(), - years) - ), - columns = ['TIMESLICE', 'FUEL', 'YEAR'] - ) -sp_demand_df_final = sp_demand_df_final.join(sp_demand_df.set_index(['TIMESLICE', 'FUEL']), - on = ['TIMESLICE', 'FUEL']) +sp_demand_df_final = pd.DataFrame( + list( + itertools.product( + sp_demand_df["TIMESLICE"].unique(), sp_demand_df["FUEL"].unique(), years + ) + ), + columns=["TIMESLICE", "FUEL", "YEAR"], +) +sp_demand_df_final = sp_demand_df_final.join( + sp_demand_df.set_index(["TIMESLICE", "FUEL"]), on=["TIMESLICE", "FUEL"] +) # Add 'REGION' column and fill 'GLOBAL' throughout -sp_demand_df_final['REGION'] = 'GLOBAL' - -total_demand_df_final = (sp_demand_df_final. - groupby(['REGION', - 'FUEL', - 'YEAR'], - as_index = False)['total_demand']. - agg('mean'). - rename({'total_demand':'VALUE'}, - axis = 1) - ) +sp_demand_df_final["REGION"] = "GLOBAL" + +total_demand_df_final = ( + sp_demand_df_final.groupby(["REGION", "FUEL", "YEAR"], as_index=False)[ + "total_demand" + ] + .agg("mean") + .rename({"total_demand": "VALUE"}, axis=1) +) # Convert SpecifiedAnnualDemand to required units -total_demand_df_final['VALUE'] = total_demand_df_final['VALUE'].mul(3.6*1e-6) +total_demand_df_final["VALUE"] = total_demand_df_final["VALUE"].mul(3.6 * 1e-6) -# Generate SpecifiedDemandProfile.csv file -sp_demand_df_final['VALUE'] = sp_demand_df_final['VALUE'].round(2) -sp_demand_df_final = sp_demand_df_final[['REGION', - 'FUEL', - 'TIMESLICE', - 'YEAR', - 'VALUE']] +# Generate SpecifiedDemandProfile.csv file +sp_demand_df_final["VALUE"] = sp_demand_df_final["VALUE"].round(2) +sp_demand_df_final = sp_demand_df_final[ + ["REGION", "FUEL", "TIMESLICE", "YEAR", "VALUE"] +] # sp_demand_df_final = apply_dtypes(sp_demand_df_final, "SpecifiedDemandProfile") -sp_demand_df_final.drop_duplicates(subset=['REGION','TIMESLICE','FUEL','YEAR'], - keep='last', - inplace=True) -sp_demand_df_final.to_csv(os.path.join(output_data_dir,'SpecifiedDemandProfile.csv'), index=None) +sp_demand_df_final.drop_duplicates( + subset=["REGION", "TIMESLICE", "FUEL", "YEAR"], keep="last", inplace=True +) +sp_demand_df_final.to_csv( + os.path.join(output_data_dir, "SpecifiedDemandProfile.csv"), index=None +) # CapacityFactor -datetime_ts_df = demand_df[['Datetime', 'TIMESLICE']] -capfac_all_df = pd.DataFrame(columns = ['REGION', - 'TECHNOLOGY', - 'TIMESLICE', - 'YEAR', - 'VALUE']) +datetime_ts_df = demand_df[["Datetime", "TIMESLICE"]] +capfac_all_df = pd.DataFrame( + columns=["REGION", "TECHNOLOGY", "TIMESLICE", "YEAR", "VALUE"] +) + def capacity_factor(df): - df['Datetime'] = pd.to_datetime(df['Datetime']) - capfac_df = (df. - set_index('Datetime'). - join(datetime_ts_df. - set_index('Datetime'), - on = 'Datetime') - ) - capfac_nodes = [x for - x in - capfac_df.columns - if x - not in ['Datetime', 'TIMESLICE']] - capfac_df = capfac_df.reset_index().drop('Datetime', - axis = 1) - capfac_df = pd.melt(capfac_df, - id_vars = 'TIMESLICE', - value_vars = capfac_nodes, - var_name = 'node', - value_name = 'VALUE') - capfac_df = (capfac_df. - groupby(['TIMESLICE', 'node'], - as_index = False). - agg('mean') - ) - capfac_df['VALUE'] = (capfac_df['VALUE']. - div(100). - round(4) - ) - - ## Filter out country aggregate values for countries with multiple nodes - capfac_df = capfac_df.loc[~(capfac_df['node']. - isin(country_with_nodes))] - - # Rename COMMODITY based on naming convention. + df["Datetime"] = pd.to_datetime(df["Datetime"]) + capfac_df = df.set_index("Datetime").join( + datetime_ts_df.set_index("Datetime"), on="Datetime" + ) + capfac_nodes = [x for x in capfac_df.columns if x not in ["Datetime", "TIMESLICE"]] + capfac_df = capfac_df.reset_index().drop("Datetime", axis=1) + capfac_df = pd.melt( + capfac_df, + id_vars="TIMESLICE", + value_vars=capfac_nodes, + var_name="node", + value_name="VALUE", + ) + capfac_df = capfac_df.groupby(["TIMESLICE", "node"], as_index=False).agg("mean") + capfac_df["VALUE"] = capfac_df["VALUE"].div(100).round(4) + + ## Filter out country aggregate values for countries with multiple nodes + capfac_df = capfac_df.loc[~(capfac_df["node"].isin(country_with_nodes))] + + # Rename COMMODITY based on naming convention. # Add 'XX' for countries without multiple nodes - capfac_df.loc[capfac_df['node'].str.len() <= 6, - 'TECHNOLOGY'] = ('PWR' + - df.name + - capfac_df['node']. - str.split('-'). - str[1:]. - str.join("") + - 'XX01') - - capfac_df.loc[capfac_df['node'].str.len() > 6, - 'TECHNOLOGY'] = ('PWR' + - df.name + - capfac_df['node']. - str.split('-'). - str[1:]. - str.join("") + - '01') - + capfac_df.loc[capfac_df["node"].str.len() <= 6, "TECHNOLOGY"] = ( + "PWR" + df.name + capfac_df["node"].str.split("-").str[1:].str.join("") + "XX01" + ) + + capfac_df.loc[capfac_df["node"].str.len() > 6, "TECHNOLOGY"] = ( + "PWR" + df.name + capfac_df["node"].str.split("-").str[1:].str.join("") + "01" + ) + # Create master table for CapacityFactor - capfac_df_final = pd.DataFrame(list(itertools.product(capfac_df['TIMESLICE'].unique(), - capfac_df['TECHNOLOGY'].unique(), - years) - ), - columns = ['TIMESLICE', 'TECHNOLOGY', 'YEAR'] - ) - capfac_df_final = (capfac_df_final. - join(capfac_df. - set_index(['TIMESLICE', 'TECHNOLOGY']), - on = ['TIMESLICE', 'TECHNOLOGY']) - ) - + capfac_df_final = pd.DataFrame( + list( + itertools.product( + capfac_df["TIMESLICE"].unique(), capfac_df["TECHNOLOGY"].unique(), years + ) + ), + columns=["TIMESLICE", "TECHNOLOGY", "YEAR"], + ) + capfac_df_final = capfac_df_final.join( + capfac_df.set_index(["TIMESLICE", "TECHNOLOGY"]), on=["TIMESLICE", "TECHNOLOGY"] + ) + # Add 'REGION' column and fill 'GLOBAL' throughout - capfac_df_final['REGION'] = 'GLOBAL' - - capfac_df_final = capfac_df_final[['REGION', - 'TECHNOLOGY', - 'TIMESLICE', - 'YEAR', - 'VALUE']] - + capfac_df_final["REGION"] = "GLOBAL" + + capfac_df_final = capfac_df_final[ + ["REGION", "TECHNOLOGY", "TIMESLICE", "YEAR", "VALUE"] + ] + return capfac_df_final for each in [hyd_df_processed, csp_df, spv_df, won_df, wof_df]: - capfac_all_df = capfac_all_df.append(capacity_factor(each), - ignore_index = True) - + capfac_all_df = capfac_all_df.append(capacity_factor(each), ignore_index=True) + # capfac_all_df = apply_dtypes(capfac_all_df, "CapacityFactor") -capfac_all_df.drop_duplicates(subset=['REGION','TECHNOLOGY','TIMESLICE','YEAR'], - keep='last', - inplace=True) -capfac_all_df.to_csv(os.path.join(output_data_dir, - 'CapacityFactor.csv'), - index=None) +capfac_all_df.drop_duplicates( + subset=["REGION", "TECHNOLOGY", "TIMESLICE", "YEAR"], keep="last", inplace=True +) +capfac_all_df.to_csv(os.path.join(output_data_dir, "CapacityFactor.csv"), index=None) -# Create csv for TIMESLICE +# Create csv for TIMESLICE # ## Create csv for TIMESLICE -time_slice_list = list(demand_df['TIMESLICE'].unique()) -time_slice_df = pd.DataFrame(time_slice_list, columns = ['VALUE']).astype(SET_DTYPES["TIMESLICE"]) -time_slice_df.to_csv(os.path.join(output_data_dir, - 'TIMESLICE.csv'), - index=None) +time_slice_list = list(demand_df["TIMESLICE"].unique()) +time_slice_df = pd.DataFrame(time_slice_list, columns=["VALUE"]).astype( + SET_DTYPES["TIMESLICE"] +) +time_slice_df.to_csv(os.path.join(output_data_dir, "TIMESLICE.csv"), index=None) -''' +""" def add_storage(region_name, years, output_data_dir, @@ -532,120 +459,94 @@ def add_storage(region_name, daytype, dayparts): - ''' + """ -demand_nodes = list(set(list(sp_demand_df_final['FUEL'].str[3:8]))) +demand_nodes = list(set(list(sp_demand_df_final["FUEL"].str[3:8]))) # Create SET STORAGE -storage_set = [('BAT' + x +'01') for x in demand_nodes - if x[:3] in geographic_scope] -df_storage_set = pd.DataFrame(storage_set, - columns=['VALUE']) -df_storage_set.to_csv(os.path.join(output_data_dir, - 'STORAGE.csv'), - index=None) +storage_set = [("BAT" + x + "01") for x in demand_nodes if x[:3] in geographic_scope] +df_storage_set = pd.DataFrame(storage_set, columns=["VALUE"]) +df_storage_set.to_csv(os.path.join(output_data_dir, "STORAGE.csv"), index=None) # Add storage technologies to SET TECHNOLOGY -storage_techs = [('PWRBAT' + x +'01') for x in demand_nodes - if x[:3] in geographic_scope] -df_storage_techs = pd.DataFrame(storage_techs, - columns=['VALUE']) +storage_techs = [ + ("PWRBAT" + x + "01") for x in demand_nodes if x[:3] in geographic_scope +] +df_storage_techs = pd.DataFrame(storage_techs, columns=["VALUE"]) wait_time = 0 -while not os.path.exists(os.path.join(output_data_dir, 'TECHNOLOGY.csv')): +while not os.path.exists(os.path.join(output_data_dir, "TECHNOLOGY.csv")): time.sleep(5) wait_time += 1 - if wait_time > 20 : break + if wait_time > 20: + break -set_techonology = pd.read_csv(os.path.join(output_data_dir, - 'TECHNOLOGY.csv')) +set_techonology = pd.read_csv(os.path.join(output_data_dir, "TECHNOLOGY.csv")) set_technology = pd.concat([set_techonology, df_storage_techs]) -set_technology.to_csv(os.path.join(output_data_dir, - 'TECHNOLOGY.csv'), - index=None) +set_technology.to_csv(os.path.join(output_data_dir, "TECHNOLOGY.csv"), index=None) time.sleep(10) # Add InputActivityRatio and OutputActivityRatio # InputActivityRatio -df_storage_iar = pd.DataFrame(list(itertools.product([region_name], - storage_techs, - years, - [1])), - columns=['REGION', - 'TECHNOLOGY', - 'YEAR', - 'MODE_OF_OPERATION'] - ) -df_storage_iar['VALUE'] = 1 -df_storage_iar['FUEL'] = 'ELC' + df_storage_iar['TECHNOLOGY'].str[6:11] + '01' -df_storage_iar = df_storage_iar[['REGION', - 'TECHNOLOGY', - 'FUEL', - 'MODE_OF_OPERATION', - 'YEAR', - 'VALUE']] +df_storage_iar = pd.DataFrame( + list(itertools.product([region_name], storage_techs, years, [1])), + columns=["REGION", "TECHNOLOGY", "YEAR", "MODE_OF_OPERATION"], +) +df_storage_iar["VALUE"] = 1 +df_storage_iar["FUEL"] = "ELC" + df_storage_iar["TECHNOLOGY"].str[6:11] + "01" +df_storage_iar = df_storage_iar[ + ["REGION", "TECHNOLOGY", "FUEL", "MODE_OF_OPERATION", "YEAR", "VALUE"] +] wait_time = 0 -while not os.path.exists(os.path.join(output_data_dir, 'InputActivityRatio.csv')): +while not os.path.exists(os.path.join(output_data_dir, "InputActivityRatio.csv")): time.sleep(5) wait_time += 1 - if wait_time > 20 : break -df_iar = pd.read_csv(os.path.join(output_data_dir, - 'InputActivityRatio.csv')) + if wait_time > 20: + break +df_iar = pd.read_csv(os.path.join(output_data_dir, "InputActivityRatio.csv")) df_iar = pd.concat([df_iar, df_storage_iar]) -df_iar.to_csv(os.path.join(output_data_dir, - 'InputActivityRatio.csv'), - index=None) +df_iar.to_csv(os.path.join(output_data_dir, "InputActivityRatio.csv"), index=None) time.sleep(20) # OutputActivityRatio -df_storage_oar = pd.DataFrame(list(itertools.product([region_name], - storage_techs, - years, - [2])), - columns=['REGION', - 'TECHNOLOGY', - 'YEAR', - 'MODE_OF_OPERATION'] - ) -df_storage_oar['VALUE'] = 1 -df_storage_oar['FUEL'] = 'ELC' + df_storage_oar['TECHNOLOGY'].str[6:11] + '01' -df_storage_oar = df_storage_oar[['REGION', - 'TECHNOLOGY', - 'FUEL', - 'MODE_OF_OPERATION', - 'YEAR', - 'VALUE']] +df_storage_oar = pd.DataFrame( + list(itertools.product([region_name], storage_techs, years, [2])), + columns=["REGION", "TECHNOLOGY", "YEAR", "MODE_OF_OPERATION"], +) +df_storage_oar["VALUE"] = 1 +df_storage_oar["FUEL"] = "ELC" + df_storage_oar["TECHNOLOGY"].str[6:11] + "01" +df_storage_oar = df_storage_oar[ + ["REGION", "TECHNOLOGY", "FUEL", "MODE_OF_OPERATION", "YEAR", "VALUE"] +] wait_time = 0 -while not os.path.exists(os.path.join(output_data_dir, 'OutputActivityRatio.csv')): +while not os.path.exists(os.path.join(output_data_dir, "OutputActivityRatio.csv")): time.sleep(5) wait_time += 1 - if wait_time > 20 : break -df_oar = pd.read_csv(os.path.join(output_data_dir, - 'OutputActivityRatio.csv')) + if wait_time > 20: + break +df_oar = pd.read_csv(os.path.join(output_data_dir, "OutputActivityRatio.csv")) df_oar = pd.concat([df_oar, df_storage_oar]) -df_oar.to_csv(os.path.join(output_data_dir, - 'OutputActivityRatio.csv'), - index=None) +df_oar.to_csv(os.path.join(output_data_dir, "OutputActivityRatio.csv"), index=None) time.sleep(20) # Create TechnologyToStorage and TechnologyFromStorage -df_tech_storage = pd.DataFrame(columns=['REGION', - 'TECHNOLOGY', - 'STORAGE', - 'MODE_OF_OPERATION']) +df_tech_storage = pd.DataFrame( + columns=["REGION", "TECHNOLOGY", "STORAGE", "MODE_OF_OPERATION"] +) for each_node in [x for x in demand_nodes if x[:3] in geographic_scope]: - df_ts_temp = pd.DataFrame(list(itertools.product([region_name], - ['PWRBAT' + each_node +'01'], - ['BAT' + each_node +'01'], - [1,2]) - ), - columns=['REGION', - 'TECHNOLOGY', - 'STORAGE', - 'MODE_OF_OPERATION'] - ) + df_ts_temp = pd.DataFrame( + list( + itertools.product( + [region_name], + ["PWRBAT" + each_node + "01"], + ["BAT" + each_node + "01"], + [1, 2], + ) + ), + columns=["REGION", "TECHNOLOGY", "STORAGE", "MODE_OF_OPERATION"], + ) df_tech_storage = pd.concat([df_tech_storage, df_ts_temp]) df_ttos = df_tech_storage.copy() @@ -654,213 +555,161 @@ def add_storage(region_name, # TechnologyToStorage -df_ttos.loc[df_ttos['MODE_OF_OPERATION'] == 1, - 'VALUE'] = 1.0 -df_ttos.loc[df_ttos['MODE_OF_OPERATION'] == 2, - 'VALUE'] = 0.0 -df_ttos['VALUE'] = df_ttos['VALUE'].astype(float) -df_ttos.to_csv(os.path.join(output_data_dir, - 'TechnologyToStorage.csv'), - index=None) +df_ttos.loc[df_ttos["MODE_OF_OPERATION"] == 1, "VALUE"] = 1.0 +df_ttos.loc[df_ttos["MODE_OF_OPERATION"] == 2, "VALUE"] = 0.0 +df_ttos["VALUE"] = df_ttos["VALUE"].astype(float) +df_ttos.to_csv(os.path.join(output_data_dir, "TechnologyToStorage.csv"), index=None) # TechnologyFromStorage -df_tfroms.loc[df_tfroms['MODE_OF_OPERATION'] == 1, - 'VALUE'] = 0.0 -df_tfroms.loc[df_tfroms['MODE_OF_OPERATION'] == 2, - 'VALUE'] = 1.0 -df_tfroms['VALUE'] = df_tfroms['VALUE'].astype(float) -df_tfroms.to_csv(os.path.join(output_data_dir, - 'TechnologyFromStorage.csv'), - index=None) - +df_tfroms.loc[df_tfroms["MODE_OF_OPERATION"] == 1, "VALUE"] = 0.0 +df_tfroms.loc[df_tfroms["MODE_OF_OPERATION"] == 2, "VALUE"] = 1.0 +df_tfroms["VALUE"] = df_tfroms["VALUE"].astype(float) +df_tfroms.to_csv(os.path.join(output_data_dir, "TechnologyFromStorage.csv"), index=None) + # Create Conversionls, Conversionld, and Conversionlh # Conversionls -df_ls = pd.DataFrame(list(itertools.product(time_slice_list, - list(range(1,len(seasons)+1)) - ) - ), - columns=['TIMESLICE', - 'SEASON'] - ) -df_ls.loc[df_ls['TIMESLICE'].str[1:2].astype(int) == df_ls['SEASON'], - 'VALUE'] = 1 +df_ls = pd.DataFrame( + list(itertools.product(time_slice_list, list(range(1, len(seasons) + 1)))), + columns=["TIMESLICE", "SEASON"], +) +df_ls.loc[df_ls["TIMESLICE"].str[1:2].astype(int) == df_ls["SEASON"], "VALUE"] = 1 df_ls.fillna(0, inplace=True) -df_ls.to_csv(os.path.join(output_data_dir, - 'Conversionls.csv'), - index=None) +df_ls.to_csv(os.path.join(output_data_dir, "Conversionls.csv"), index=None) -df_season_set = pd.DataFrame(list(range(1,len(seasons)+1)), - columns=['VALUE']) -df_season_set.to_csv(os.path.join(output_data_dir, - 'SEASON.csv'), - index=None) +df_season_set = pd.DataFrame(list(range(1, len(seasons) + 1)), columns=["VALUE"]) +df_season_set.to_csv(os.path.join(output_data_dir, "SEASON.csv"), index=None) # Conversionld -df_ld = pd.DataFrame(list(itertools.product(time_slice_list, - [1] - ) - ), - columns=['TIMESLICE', - 'DAYTYPE'] - ) -df_ld['VALUE'] = 1 +df_ld = pd.DataFrame( + list(itertools.product(time_slice_list, [1])), columns=["TIMESLICE", "DAYTYPE"] +) +df_ld["VALUE"] = 1 df_ld.fillna(0, inplace=True) -df_ld.to_csv(os.path.join(output_data_dir, - 'Conversionld.csv'), - index=None) -df_daytype_set = pd.DataFrame([1], - columns=['VALUE']) -df_daytype_set.to_csv(os.path.join(output_data_dir, - 'DAYTYPE.csv'), - index=None) +df_ld.to_csv(os.path.join(output_data_dir, "Conversionld.csv"), index=None) +df_daytype_set = pd.DataFrame([1], columns=["VALUE"]) +df_daytype_set.to_csv(os.path.join(output_data_dir, "DAYTYPE.csv"), index=None) # Conversionlh -df_lh = pd.DataFrame(list(itertools.product(time_slice_list, - list(range(1,len(dayparts)+1)) - ) - ), - columns=['TIMESLICE', - 'DAILYTIMEBRACKET'] - ) -df_lh.loc[df_lh['TIMESLICE'].str[3:].astype(int) == df_lh['DAILYTIMEBRACKET'], - 'VALUE'] = 1 +df_lh = pd.DataFrame( + list(itertools.product(time_slice_list, list(range(1, len(dayparts) + 1)))), + columns=["TIMESLICE", "DAILYTIMEBRACKET"], +) +df_lh.loc[ + df_lh["TIMESLICE"].str[3:].astype(int) == df_lh["DAILYTIMEBRACKET"], "VALUE" +] = 1 df_lh.fillna(0, inplace=True) -df_lh.to_csv(os.path.join(output_data_dir, - 'Conversionlh.csv'), - index=None) -df_dayparts_set = pd.DataFrame(list(range(1,len(dayparts)+1)), - columns=['VALUE']) -df_dayparts_set.to_csv(os.path.join(output_data_dir, - 'DAILYTIMEBRACKET.csv'), - index=None) +df_lh.to_csv(os.path.join(output_data_dir, "Conversionlh.csv"), index=None) +df_dayparts_set = pd.DataFrame(list(range(1, len(dayparts) + 1)), columns=["VALUE"]) +df_dayparts_set.to_csv( + os.path.join(output_data_dir, "DAILYTIMEBRACKET.csv"), index=None +) # Daysplit daysplit = {} for dp, hr in dayparts_raw.items(): - daysplit[int(dp[1:])] = (hr[1] - hr[0])/8760 - -df_daysplit = pd.DataFrame(itertools.product(list(range(1,len(dayparts)+1)), - years), - columns=['DAILYTIMEBRACKET', - 'YEAR']) -df_daysplit['VALUE'] = df_daysplit['DAILYTIMEBRACKET'].map(daysplit) -df_daysplit = df_daysplit[['DAILYTIMEBRACKET', - 'YEAR', - 'VALUE']] -df_daysplit['VALUE'] = df_daysplit['VALUE'].round(4) -df_daysplit.to_csv(os.path.join(output_data_dir, - 'DaySplit.csv'), - index=None) + daysplit[int(dp[1:])] = (hr[1] - hr[0]) / 8760 + +df_daysplit = pd.DataFrame( + itertools.product(list(range(1, len(dayparts) + 1)), years), + columns=["DAILYTIMEBRACKET", "YEAR"], +) +df_daysplit["VALUE"] = df_daysplit["DAILYTIMEBRACKET"].map(daysplit) +df_daysplit = df_daysplit[["DAILYTIMEBRACKET", "YEAR", "VALUE"]] +df_daysplit["VALUE"] = df_daysplit["VALUE"].round(4) +df_daysplit.to_csv(os.path.join(output_data_dir, "DaySplit.csv"), index=None) # CapitalCostStorage -storage_set = [('BAT' + x +'01') for x in demand_nodes - if x[:3] in geographic_scope] -df_cap_cost_storage = pd.DataFrame(list(itertools.product(storage_set, - years) - ), - columns=['STORAGE', - 'YEAR'] - ) -df_cap_cost_storage['STORAGE_TYPE'] = df_cap_cost_storage['STORAGE'].str[:3] -storage_costs = pd.read_csv(os.path.join(input_data_dir, - 'storage_costs.csv')) - -storage_costs_df = pd.DataFrame(list(itertools.product(storage_costs['STORAGE_TYPE'].unique(), - list(range(storage_costs['YEAR'].min(), - storage_costs['YEAR'].max()+1))) - ), - columns=['STORAGE_TYPE', - 'YEAR'] - ) -storage_costs_df = storage_costs_df.merge(storage_costs, - how='left', - on=['STORAGE_TYPE', 'YEAR']) +storage_set = [("BAT" + x + "01") for x in demand_nodes if x[:3] in geographic_scope] +df_cap_cost_storage = pd.DataFrame( + list(itertools.product(storage_set, years)), columns=["STORAGE", "YEAR"] +) +df_cap_cost_storage["STORAGE_TYPE"] = df_cap_cost_storage["STORAGE"].str[:3] +storage_costs = pd.read_csv(os.path.join(input_data_dir, "storage_costs.csv")) + +storage_costs_df = pd.DataFrame( + list( + itertools.product( + storage_costs["STORAGE_TYPE"].unique(), + list(range(storage_costs["YEAR"].min(), storage_costs["YEAR"].max() + 1)), + ) + ), + columns=["STORAGE_TYPE", "YEAR"], +) +storage_costs_df = storage_costs_df.merge( + storage_costs, how="left", on=["STORAGE_TYPE", "YEAR"] +) storage_costs_df = storage_costs_df.interpolate() -df_cap_cost_storage = df_cap_cost_storage.merge(storage_costs_df, - how='left', - on=['STORAGE_TYPE', 'YEAR']) -df_cap_cost_storage['VALUE'] = df_cap_cost_storage['VALUE'].mul(1e6/3600) -df_cap_cost_storage['REGION'] = region_name -df_cap_cost_storage = df_cap_cost_storage[['REGION', - 'STORAGE', - 'YEAR', - 'VALUE']] -df_cap_cost_storage.to_csv(os.path.join(output_data_dir, - 'CapitalCostStorage.csv'), - index=None) +df_cap_cost_storage = df_cap_cost_storage.merge( + storage_costs_df, how="left", on=["STORAGE_TYPE", "YEAR"] +) +df_cap_cost_storage["VALUE"] = df_cap_cost_storage["VALUE"].mul(1e6 / 3600) +df_cap_cost_storage["REGION"] = region_name +df_cap_cost_storage = df_cap_cost_storage[["REGION", "STORAGE", "YEAR", "VALUE"]] +df_cap_cost_storage.to_csv( + os.path.join(output_data_dir, "CapitalCostStorage.csv"), index=None +) # CapacityToActivityUnit for Storage # ReserveMargin - -df_rm = pd.DataFrame(years, - columns=['YEAR']) -for rm, rm_params in reserve_margin.items(): - df_rm.loc[df_rm['YEAR'].between(rm_params[1], rm_params[2]), - 'VALUE'] = (1 + rm_params[0]/100) - -df_rm = df_rm.interpolate() -df_rm['REGION'] = region_name -df_rm = df_rm[['REGION', - 'YEAR', - 'VALUE']] -df_rm.to_csv(os.path.join(output_data_dir, - 'ReserveMargin.csv'), - index=None) + +if reserve_margin: + df_rm = pd.DataFrame(years, columns=["YEAR"]) + for rm, rm_params in reserve_margin.items(): + df_rm.loc[df_rm["YEAR"].between(rm_params[1], rm_params[2]), "VALUE"] = ( + 1 + rm_params[0] / 100 + ) + + df_rm = df_rm.interpolate() + df_rm["REGION"] = region_name + df_rm = df_rm[["REGION", "YEAR", "VALUE"]] +else: + df_rm = pd.DataFrame(columns=["REGION", "YEAR", "VALUE"]) +df_rm.to_csv(os.path.join(output_data_dir, "ReserveMargin.csv"), index=None) # ReserveMarginTagTechnology -df_rmtt = pd.read_csv(os.path.join(output_data_dir, - 'TECHNOLOGY.csv')) -reserve_margin_techs = ['COA', - 'COG', - 'OCG', - 'CCG', - 'PET', - 'URN', - 'OIL', - 'OTH', - 'BIO', - 'HYD', - 'GEO', - 'SPV', - 'WON' - ] -rm_techs = [x for x in df_rmtt['VALUE'].unique() - if x.startswith('PWR') - if x[3:6] in reserve_margin_techs] -df_rmtt = pd.DataFrame(list(itertools.product([region_name], - rm_techs, - years, - [1])), - columns=['REGION', - 'TECHNOLOGY', - 'YEAR', - 'VALUE'] - ) -df_rmtt.to_csv(os.path.join(output_data_dir, - 'ReserveMarginTagTechnology.csv'), - index=None) +df_rmtt = pd.read_csv(os.path.join(output_data_dir, "TECHNOLOGY.csv")) +reserve_margin_techs = [ + "COA", + "COG", + "OCG", + "CCG", + "PET", + "URN", + "OIL", + "OTH", + "BIO", + "HYD", + "GEO", + "SPV", + "WON", +] +rm_techs = [ + x + for x in df_rmtt["VALUE"].unique() + if x.startswith("PWR") + if x[3:6] in reserve_margin_techs +] +df_rmtt = pd.DataFrame( + list(itertools.product([region_name], rm_techs, years, [1])), + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], +) +df_rmtt.to_csv( + os.path.join(output_data_dir, "ReserveMarginTagTechnology.csv"), index=None +) # ReserveMarginTagFuel -df_rmtf = pd.read_csv(os.path.join(output_data_dir, - 'FUEL.csv')) -rm_fuels = [x for x in df_rmtf['VALUE'].unique() - if x.startswith('ELC') - if x.endswith('01')] -df_rmtf = pd.DataFrame(list(itertools.product([region_name], - rm_fuels, - years, - [1])), - columns=['REGION', - 'FUEL', - 'YEAR', - 'VALUE'] - ) -df_rmtf.to_csv(os.path.join(output_data_dir, - 'ReserveMarginTagFuel.csv'), - index=None) -logging.info('Time Slicing Completed') +df_rmtf = pd.read_csv(os.path.join(output_data_dir, "FUEL.csv")) +rm_fuels = [ + x for x in df_rmtf["VALUE"].unique() if x.startswith("ELC") if x.endswith("01") +] +df_rmtf = pd.DataFrame( + list(itertools.product([region_name], rm_fuels, years, [1])), + columns=["REGION", "FUEL", "YEAR", "VALUE"], +) +df_rmtf.to_csv(os.path.join(output_data_dir, "ReserveMarginTagFuel.csv"), index=None) +logging.info("Time Slicing Completed") diff --git a/workflow/scripts/osemosys_global/emissions.py b/workflow/scripts/osemosys_global/emissions.py index a8bc4196..60bbf05c 100644 --- a/workflow/scripts/osemosys_global/emissions.py +++ b/workflow/scripts/osemosys_global/emissions.py @@ -7,142 +7,169 @@ import itertools -# Logging formatting -logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) +# Logging formatting +logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) -# Constant for tech to fuel emission type mapping +# Constant for tech to fuel emission type mapping _TECH_TO_FUEL = { #'BIO':'Bagasse', - 'WAS':'Municipal Solid Waste', - 'COA':'Lignite Coal', - 'COG':'Lignite Coal', - 'OCG':'Natural Gas', - 'CCG':'Natural Gas', - 'GAS':'Natural Gas', - 'PET':'Crude Oil', - 'OIL':'Crude Oil', - 'OTH':'Natural Gas', - 'CCS':'Lignite Coal', + "WAS": "Municipal Solid Waste", + "COA": "Lignite Coal", + "COG": "Lignite Coal", + "OCG": "Natural Gas", + "CCG": "Natural Gas", + "GAS": "Natural Gas", + "PET": "Crude Oil", + "OIL": "Crude Oil", + "OTH": "Natural Gas", + "CCS": "Lignite Coal", } -# Emission name -_EMISSION = 'CO2' +# Emission name +_EMISSION = "CO2" + def main(): - """Assigns CO2 equivalent values to each technology over all its modes - of operation. For technologies that do not have emissions, it assigns a - value of zero. A global emission penalty value is applied and the - emission type (CO2) is written to the emission set. + """Assigns CO2 equivalent values to each technology over all its modes + of operation. For technologies that do not have emissions, it assigns a + value of zero. A global emission penalty value is applied and the + emission type (CO2) is written to the emission set. """ # CONFIGURATION PARAMETERS config_paths = ConfigPaths() - config = ConfigFile('config') - + config = ConfigFile("config") + output_data_dir = config_paths.output_data_dir - emission_penalty = config.get('emission_penalty') # M$/MT - emission_limit = config.get('emission_limit') # MT of CO2-eq. - start_year = config.get('startYear') - end_year = config.get('endYear') + emission_penalty = config.get("emission_penalty") # M$/MT + emission_limit = config.get("emission_limit") # MT of CO2-eq. + start_year = config.get("startYear") + end_year = config.get("endYear") region = config.region_name - + # ASSIGN EMISSION ACTIVITY RATIOS df_ear = get_ear(_EMISSION) - df_ear.to_csv(Path(output_data_dir, 'EmissionActivityRatio.csv'), - index=False) - logging.info('Successfully generated emission activity ratio') - - # ASSIGN EMISSION - - #df_emission = pd.DataFrame([_EMISSION], columns=['VALUE']) - df_emission = df_ear[['EMISSION']].drop_duplicates() - df_emission.rename(columns={'EMISSION': 'VALUE'}, - inplace=True) - df_emission.to_csv(Path(output_data_dir, 'EMISSION.csv'), - index=False) - logging.info('Successfully generated emission set') - + df_ear.to_csv(Path(output_data_dir, "EmissionActivityRatio.csv"), index=False) + logging.info("Successfully generated emission activity ratio") + + # ASSIGN EMISSION + + # df_emission = pd.DataFrame([_EMISSION], columns=['VALUE']) + df_emission = df_ear[["EMISSION"]].drop_duplicates() + df_emission.rename(columns={"EMISSION": "VALUE"}, inplace=True) + df_emission.to_csv(Path(output_data_dir, "EMISSION.csv"), index=False) + logging.info("Successfully generated emission set") + # Create of list of EMISSIONS - emissions = list(df_emission['VALUE']) - - # ASSIGN EMISSION PENALTY - - df_emission_penalty = get_emission_penalty(emissions, emission_penalty) - df_emission_penalty.to_csv(Path(output_data_dir, 'EmissionsPenalty.csv'), - index=False) - logging.info('Successfully generated emission penalty') - + emissions = list(df_emission["VALUE"]) + + # ASSIGN EMISSION PENALTY + + # print(df_emission) + # print(emissions) + # print(emission_penalty) + + if emission_penalty: + df_emission_penalty = get_emission_penalty(emissions, emission_penalty) + else: + df_emission_penalty = pd.DataFrame( + columns=["REGION", "EMISSION", "YEAR", "VALUE"] + ) + df_emission_penalty.to_csv( + Path(output_data_dir, "EmissionsPenalty.csv"), index=False + ) + logging.info("Successfully generated emission penalty") + # ADD EMISSION LIMITS - df_emission_limits = add_emission_limits(emissions, - emission_limit) - df_emission_limits.to_csv(Path(output_data_dir, 'AnnualEmissionLimit.csv'), - index=False) - logging.info('Successfully generated annual emissions limit') + if emission_limit: + df_emission_limits = add_emission_limits(emissions, emission_limit) + else: + df_emission_limits = pd.DataFrame( + columns=["REGION", "EMISSION", "YEAR", "VALUE"] + ) + df_emission_limits.to_csv( + Path(output_data_dir, "AnnualEmissionLimit.csv"), index=False + ) + logging.info("Successfully generated annual emissions limit") + def get_co2_emission_factors(): - """Gets co2 emission factors for diferent fuels. + """Gets co2 emission factors for diferent fuels. - Reads in a file containing co2, ch4, n2o emission factors and global - warming potentials for various fuel types. This function performs unit + Reads in a file containing co2, ch4, n2o emission factors and global + warming potentials for various fuel types. This function performs unit conversions to convert everyting to MegaTonnes per PetaJoule and collapses - the different emission types to a single co2 equivalent value for each - fuel. + the different emission types to a single co2 equivalent value for each + fuel. Returns: - Dictionary holding fuel type as key and co2 factor as value in units - of MT/PJ + Dictionary holding fuel type as key and co2 factor as value in units + of MT/PJ - Example: + Example: co2_factors = get_co2_emission_factors() - co2_factors['Natural Gas'] + co2_factors['Natural Gas'] -> 0.0503 """ - # Source for all emission factors comes from: + # Source for all emission factors comes from: # https://www.epa.gov/sites/default/files/2018-03/documents/emission-factors_mar_2018_0.pdf # Configuration parameters config_paths = ConfigPaths() - # Read in emission factors + # Read in emission factors input_data_dir = config_paths.input_data_dir - df_raw = pd.read_csv(Path(input_data_dir,'emission_factors.csv')) - df_raw = df_raw.drop([0]).reset_index(drop=True) # drop units row + df_raw = pd.read_csv(Path(input_data_dir, "emission_factors.csv")) + df_raw = df_raw.drop([0]).reset_index(drop=True) # drop units row - # Convert co2 factors from kg/mmbtu to MT/PJ + # Convert co2 factors from kg/mmbtu to MT/PJ # kg/mmbtu * 1mmbtu/1.05GJ * 1000000GJ / PJ * 1T/1000kg * 1MT/1000000T # Multiply by global warming potential to get co2_eq - co2 = df_raw['co2_factor'].astype(float) * (1/1055) * df_raw['co2_gwp'].astype(float) + co2 = ( + df_raw["co2_factor"].astype(float) + * (1 / 1055) + * df_raw["co2_gwp"].astype(float) + ) - # Convert ch4 and n2o factors from g/mmbtu to MT/PJ + # Convert ch4 and n2o factors from g/mmbtu to MT/PJ # kg/mmbtu * 1mmbtu/1.05GJ * 1000000GJ / PJ * 1T/1000000g * 1MT/1000000T # Multiply by global warming potential to get co2_eq - ch4 = df_raw['ch4_factor'].astype(float) * (1/1055000) * df_raw['ch4_gwp'].astype(float) - n2o = df_raw['n2o_factor'].astype(float) * (1/1055000) * df_raw['n2o_gwp'].astype(float) + ch4 = ( + df_raw["ch4_factor"].astype(float) + * (1 / 1055000) + * df_raw["ch4_gwp"].astype(float) + ) + n2o = ( + df_raw["n2o_factor"].astype(float) + * (1 / 1055000) + * df_raw["n2o_gwp"].astype(float) + ) # Find total CO2 equivalent - data = {'co2':co2, 'ch4':ch4, 'n2o':n2o} - df = pd.DataFrame(data).set_axis(df_raw['FUEL TYPE']) - df['co2_eq'] = round(df.sum(axis=1),4).astype(float) + data = {"co2": co2, "ch4": ch4, "n2o": n2o} + df = pd.DataFrame(data).set_axis(df_raw["FUEL TYPE"]) + df["co2_eq"] = round(df.sum(axis=1), 4).astype(float) + + return df.set_index(df.index).to_dict()["co2_eq"] - return df.set_index(df.index).to_dict()['co2_eq'] def get_ear(emission): """Creates emission activity ratio dataframe. - + This function reads in an existing input activity ratio parameter file and removes the fuel and year columns. This leaves a dataframe with info - on when all technologies are allowed to operate over the model horizon. + on when all technologies are allowed to operate over the model horizon. A column is added in to hold the emission type and emission activity ratio - based. + based. - Args: + Args: emission: string describing the emission type (ie. 'CO2') Returns: - df: Dataframe describing emission activity ratio. Dataframe headers + df: Dataframe describing emission activity ratio. Dataframe headers are REGION, TECHNOLOGY, EMISSION, MODE_OF_OPERATION, YEAR, VALUE """ @@ -150,188 +177,161 @@ def get_ear(emission): config_paths = ConfigPaths() output_data_dir = config_paths.output_data_dir - # GET EMISSION FACTORS + # GET EMISSION FACTORS co2_factors = get_co2_emission_factors() # GET INFO FROM INPUT ACTIVITY RATIO - df_oar = pd.read_csv(Path(output_data_dir, 'OutputActivityRatio.csv')) - df = df_oar.drop(['FUEL', 'VALUE'], axis=1) - #df = df[(df['TECHNOLOGY'].str.startswith('MIN')) | + df_oar = pd.read_csv(Path(output_data_dir, "OutputActivityRatio.csv")) + df = df_oar.drop(["FUEL", "VALUE"], axis=1) + # df = df[(df['TECHNOLOGY'].str.startswith('MIN')) | # (df['TECHNOLOGY'].str.startswith('PWRCCS'))] - df = df[(df['TECHNOLOGY'].str.startswith('PWR')) & - ~(df['TECHNOLOGY'].str.startswith('PWRTRN'))] - + df = df[ + (df["TECHNOLOGY"].str.startswith("PWR")) + & ~(df["TECHNOLOGY"].str.startswith("PWRTRN")) + ] + # ADD MAPPING OF TECHNOLOGY TO EMISSION ACTIVITY RATIO - df['TECH_CODE'] = df['TECHNOLOGY'].str[3:6] - df['COUNTRY'] = df['TECHNOLOGY'].str[6:9] - df['FUEL_NAME'] = df['TECH_CODE'].map(_TECH_TO_FUEL) - df['VALUE'] = df['FUEL_NAME'].map(co2_factors) - ''' + df["TECH_CODE"] = df["TECHNOLOGY"].str[3:6] + df["COUNTRY"] = df["TECHNOLOGY"].str[6:9] + df["FUEL_NAME"] = df["TECH_CODE"].map(_TECH_TO_FUEL) + df["VALUE"] = df["FUEL_NAME"].map(co2_factors) + """ ccs_co2_factor = df.loc[df['TECH_CODE'].str.startswith('COA'), 'VALUE'].mean() ccs_co2_factor = round(ccs_co2_factor*(-3), 4) df.loc[df['TECH_CODE'].str.startswith('CCS'), 'VALUE'] = ccs_co2_factor - ''' - #techs = pd.Series(df['TECHNOLOGY'].str[3:6]) - #fuels = techs.map(_TECH_TO_FUEL) - #df['VALUE'] = fuels.map(co2_factors) - + """ + # techs = pd.Series(df['TECHNOLOGY'].str[3:6]) + # fuels = techs.map(_TECH_TO_FUEL) + # df['VALUE'] = fuels.map(co2_factors) + # Multiply by InputActivityRatio - df_iar = pd.read_csv(Path(output_data_dir, - 'InputActivityRatio.csv')) - df_iar.rename(columns={'VALUE': 'IAR'}, - inplace=True) - df = pd.merge(df, df_iar, - how='left', - on=['REGION', 'TECHNOLOGY', 'MODE_OF_OPERATION', 'YEAR']) - df['VALUE'] = df['VALUE'].fillna(0) - df['VALUE'] = df['VALUE'] * df['IAR'] + df_iar = pd.read_csv(Path(output_data_dir, "InputActivityRatio.csv")) + df_iar.rename(columns={"VALUE": "IAR"}, inplace=True) + df = pd.merge( + df, df_iar, how="left", on=["REGION", "TECHNOLOGY", "MODE_OF_OPERATION", "YEAR"] + ) + df["VALUE"] = df["VALUE"].fillna(0) + df["VALUE"] = df["VALUE"] * df["IAR"] df.drop_duplicates(inplace=True) - df['VALUE'] = df['VALUE'].round(4) - + df["VALUE"] = df["VALUE"].round(4) + # ADD IN EMISSION COLUMN - df['EMISSION'] = emission + df['COUNTRY'] - - df.loc[df['TECH_CODE'].str.startswith('CCS'), - 'VALUE'] = df.loc[df['TECH_CODE'].str.startswith('COA'), - 'VALUE'].mean() * 0.1 - df['VALUE'] = df['VALUE'].round(4) + df["EMISSION"] = emission + df["COUNTRY"] + + df.loc[df["TECH_CODE"].str.startswith("CCS"), "VALUE"] = ( + df.loc[df["TECH_CODE"].str.startswith("COA"), "VALUE"].mean() * 0.1 + ) + df["VALUE"] = df["VALUE"].round(4) # Final EmissionActivityRatio dataframe - df = df[[ - 'REGION', - 'TECHNOLOGY', - 'EMISSION', - 'MODE_OF_OPERATION', - 'YEAR', - 'VALUE']] - + df = df[["REGION", "TECHNOLOGY", "EMISSION", "MODE_OF_OPERATION", "YEAR", "VALUE"]] + return df -def get_emission_penalty(emissions, emission_penalty): - """Creates emission penalty dataframe. + +def get_emission_penalty(emissions, emission_penalty): + """Creates emission penalty dataframe. The emission penalty is applied at a global geographical level. All regions - and subregions have the same penalty. + and subregions have the same penalty. - Args: + Args: emission: string describing the emission type (ie. 'CO2') penalty: emission penalty in M$/MT - Returns: - df: Dataframe describing emission penalty. Dataframe headers are REGION, + Returns: + df: Dataframe describing emission penalty. Dataframe headers are REGION, EMISSION, YEAR, VALUE """ # CONFIGURATION PARAMETERS - config = ConfigFile('config') - start_year = config.get('startYear') - end_year = config.get('endYear') - years = list(range(start_year, end_year+1)) + config = ConfigFile("config") + start_year = config.get("startYear") + end_year = config.get("endYear") + years = list(range(start_year, end_year + 1)) region = config.region_name # GENERATE DATA - + # Create dataframe template to calculate SpecifiedAnnualDemand - df = pd.DataFrame(list(itertools.product(emissions, - years) - ), - columns = ['EMISSION', - 'YEAR'] - ) - - if not emission_penalty is None: - for ep_params in emission_penalty: - df.loc[(df['YEAR'].between(ep_params[2], - ep_params[3])) & - (df['EMISSION'].isin([ep_params[0] + - ep_params[1]])), - 'VALUE'] = ep_params[4] - - df = df.pivot(index=['YEAR'], - columns=['EMISSION'], - values='VALUE').reset_index() - #df = df.interpolate() - + df = pd.DataFrame( + list(itertools.product(emissions, years)), columns=["EMISSION", "YEAR"] + ) + + for ep_params in emission_penalty: + df.loc[ + (df["YEAR"].between(ep_params[2], ep_params[3])) + & (df["EMISSION"].isin([ep_params[0] + ep_params[1]])), + "VALUE", + ] = ep_params[4] + + df = df.pivot(index=["YEAR"], columns=["EMISSION"], values="VALUE").reset_index() + # df = df.interpolate() + # Drop all columns with only NaN - df.dropna(axis=1, - how='all', - inplace=True) - + df.dropna(axis=1, how="all", inplace=True) + # Melt to get 'COUNTRY' column and remove all rows with NaN - df = pd.melt(df, - id_vars=['YEAR'], - value_vars=[x for x in df.columns - if x not in ['YEAR']], - var_name='EMISSION', - value_name='VALUE') - df.dropna(axis=0, - inplace=True) - df['REGION'] = region - df = df[['REGION', - 'EMISSION', - 'YEAR', - 'VALUE']] - + df = pd.melt( + df, + id_vars=["YEAR"], + value_vars=[x for x in df.columns if x not in ["YEAR"]], + var_name="EMISSION", + value_name="VALUE", + ) + df.dropna(axis=0, inplace=True) + df["REGION"] = region + df = df[["REGION", "EMISSION", "YEAR", "VALUE"]] + return df -def add_emission_limits(emissions, - emission_limit): - +def add_emission_limits(emissions, emission_limit): + # CONFIGURATION PARAMETERS - config = ConfigFile('config') - start_year = config.get('startYear') - end_year = config.get('endYear') - years = list(range(start_year, end_year+1)) + config = ConfigFile("config") + start_year = config.get("startYear") + end_year = config.get("endYear") + years = list(range(start_year, end_year + 1)) region = config.region_name - + # GENERATE DATA - + # Create dataframe template to calculate SpecifiedAnnualDemand - df = pd.DataFrame(list(itertools.product(emissions, - years) - ), - columns = ['EMISSION', - 'YEAR'] - ) - - if not emission_limit is None: - for el_params in emission_limit: - df.loc[(df['YEAR'] == el_params[2]) & - (df['EMISSION'].isin([el_params[0] + - el_params[1]])), - 'VALUE'] = el_params[3] - - df = df.pivot(index=['YEAR'], - columns=['EMISSION'], - values='VALUE').reset_index() - + df = pd.DataFrame( + list(itertools.product(emissions, years)), columns=["EMISSION", "YEAR"] + ) + + for el_params in emission_limit: + df.loc[ + (df["YEAR"] == el_params[2]) + & (df["EMISSION"].isin([el_params[0] + el_params[1]])), + "VALUE", + ] = el_params[3] + + df = df.pivot(index=["YEAR"], columns=["EMISSION"], values="VALUE").reset_index() + # Drop all columns with only NaN - df.dropna(axis=1, - how='all', - inplace=True) + df.dropna(axis=1, how="all", inplace=True) df = df.interpolate() # Melt to get 'COUNTRY' column and remove all rows with NaN - df = pd.melt(df, - id_vars=['YEAR'], - value_vars=[x for x in df.columns - if x not in ['YEAR']], - var_name='EMISSION', - value_name='VALUE') - - df.dropna(axis=0, - inplace=True) - df['REGION'] = region - df = df[['REGION', - 'EMISSION', - 'YEAR', - 'VALUE']] - ''' + df = pd.melt( + df, + id_vars=["YEAR"], + value_vars=[x for x in df.columns if x not in ["YEAR"]], + var_name="EMISSION", + value_name="VALUE", + ) + + df.dropna(axis=0, inplace=True) + df["REGION"] = region + df = df[["REGION", "EMISSION", "YEAR", "VALUE"]] + """ el_years = {} years = list(range(start_year, @@ -367,8 +367,9 @@ def add_emission_limits(emissions, 'EMISSION', 'YEAR', 'VALUE']) - ''' + """ return df -if __name__ == '__main__': + +if __name__ == "__main__": main()