calliope-project · timtroendle · Apr 13, 2021 · Apr 8, 2021 · Apr 9, 2021 · Apr 9, 2021
diff --git a/README.md b/README.md
@@ -106,6 +106,14 @@ Tests of models with continental and national resolution run automatically when
 
 Exchanging `regional` with `national` or `continental` allows you to run tests on the respective resolution explicitly.
 
+## Run minimal test
+
+As a developer, you may want to run the entire workflow often to spot errors early. For that, you can use a minimal test configuration that takes less time to run.
+
+    snakemake --use-conda --configfile="config/minimal.yaml"
+
+Make sure to run this in a clean working directory. Do not use the working directory in which you are using your normal configuration.
+
 ## License
 
 euro-calliope has been developed and is maintained by Tim Tröndle, IASS Potsdam. The code in this repository is MIT licensed.
diff --git a/Snakefile b/Snakefile
@@ -93,9 +93,7 @@ rule hydro_capacities:
     input:
         script = script_dir + "hydro.py",
         locations = rules.units.output[0],
-        plants = rules.preprocess_hydro_stations.output[0],
-        phs_storage_capacities = config["data-sources"]["national-phs-storage-capacities"]
-    params: scale_phs = config["parameters"]["scale-phs-according-to-geth-et-al"]
+        plants = rules.preprocess_hydro_stations.output[0]
     output: "build/data/{resolution}/hydro-capacities-mw.csv"
     conda: "envs/geo.yaml"
     script: "scripts/hydro.py"

diff --git a/config/minimal.yaml b/config/minimal.yaml
@@ -0,0 +1,77 @@
+# A simple config to allow for quick test runs of the workflow.
+data-sources:
+    biofuel-potentials: data/biofuels/potentials/{feedstock}.csv
+    biofuel-costs: data/biofuels/costs/{feedstock}.csv
+    eez: data/World_EEZ_v10_20180221/eez_v10.shp
+    irena-generation: data/irena/hydro-generation-europe.csv
+    national-phs-storage-capacities: data/pumped-hydro/storage-capacities-gwh.csv
+    capacity-factors: https://zenodo.org/record/3899687/files/{filename}?download=1
+    gadm: https://biogeo.ucdavis.edu/data/gadm3.6/gpkg/gadm36_{country_code}_gpkg.zip
+    hydro-basins: https://www.dropbox.com/sh/hmpwobbz9qixxpe/AADeU9iCgMd3ZO1KgrFmfWu6a/HydroBASINS/standard/eu/hybas_eu_lev07_v1c.zip?dl=1
+    hydro-stations: https://zenodo.org/record/4289229/files/energy-modelling-toolkit/hydro-power-database-v7.zip?download=1
+    load: https://data.open-power-system-data.org/time_series/2019-06-05/time_series_60min_stacked.csv
+    nuts: https://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/NUTS_2013_01M_SH.zip
+    potentials: https://zenodo.org/record/3533038/files/possibility-for-electricity-autarky.zip
+root-directory: . # point to the root directory if working directory is not root directory
+scaling-factors: # values are tuned for models with a few hours resolution and one year duration
+    power: 0.00001 # from MW(h) to 100 GW(h)
+    area: 0.0001 # from km2 to 10,000 km2
+    monetary: 0.000000001 # from EUR to 1 billion EUR
+capacity-factors:
+    min: 0.001 # consider smaller values to be 0; this helps numerics in the LP
+    max: 10 # for hydro reservoirs, leading to a numerical range of 1e5 (hourly resolution)
+    average: # average estimation used to transform annual fixed to variable costs
+        pv: 0.139 # median of average 2016 open-field factors for ~2700 points in Europe
+        onshore: 0.3021 # median of average 2016 factors for ~2700 points in Europe
+        offshore: 0.4223 # median of average 2016 factors for ~2800 points in Europe
+        ror: 0.536781 # median of average 2016 factors for 1889 hydro stations in Europe
+    trim-ninja-timeseries: True # trims renewables.ninja timeseries to the year in question
+year: 2016
+crs: "EPSG:4326"
+parameters:
+    maximum-installable-power-density: # this is not the yield, but the density of installed power
+        pv-on-tilted-roofs: 160 # (MW/km^2) from [@Gagnon:2016][@Klauser:2016], i.e. 16% efficiency
+        pv-on-flat-areas: 80 # (MW/km^2) from [@Gagnon:2016][@Klauser:2016][@Wirth:2017]
+        onshore-wind: 8 # (MW/km^2) from [@EuropeanEnvironmentAgency:2009]
+        offshore-wind: 15 # (MW/km^2) from [@EuropeanEnvironmentAgency:2009]
+    roof-share: # from [@Trondle:2019]
+        E: 0.1660
+        N: 0.1817
+        S: 0.1821
+        W: 0.1681
+        flat: 0.3020
+    jrc-biofuel:
+        scenario: "medium"
+        potential-year: "2020"
+        cost-year: "2030"
+    biofuel-efficiency: 0.45
+    scale-phs-according-to-geth-et-al: True
+sea-connections:
+    continental:
+    national: # Source: https://www.entsoe.eu/data/map/
+        - [GBR, IRL]
+    regional: # Source: https://www.entsoe.eu/data/map/
+        - [GBR.4_1, IRL.17_1] # Wales and Meath
+scope:
+    countries:
+        - "Ireland"
+        - "United Kingdom"
+    bounds:
+        x_min: -10.62 # in degrees east
+        x_max: 1.8  # in degrees east
+        y_min: 49.8 # in degrees north
+        y_max: 60.9  # in degrees north
+shapes: # This config must be consistent with data from https://doi.org/10.5281/zenodo.3244985.
+    continental:
+        Ireland: nuts0
+        United Kingdom: nuts0
+    national:
+        Ireland: nuts0
+        United Kingdom: nuts0
+    regional:
+        # The link between NUTS and administrative units unfortunately is not obvious.
+        # It's not documented anywhere -- at least I could not find any information.
+        # Hence, I am using GADM here instead.
+        # Validation source: https://en.wikipedia.org/wiki/List_of_administrative_divisions_by_country
+        Ireland: gadm1 # match 26
+        United Kingdom: gadm1 # match 4
diff --git a/envs/test.yaml b/envs/test.yaml
@@ -3,7 +3,7 @@ channels:
     - conda-forge
     - gurobi
 dependencies:
-    - python=3.8
+    - python=3.7
     - numpy=1.16.2
     - pandas=0.25.1
     - gurobi=8.1.1

diff --git a/rules/hydro.smk b/rules/hydro.smk
@@ -59,26 +59,32 @@ rule stations_database:
         """
 
 
-rule fix_basins:
-    message: "Fix invalid basins."
+rule preprocess_basins:
+    message: "Preprocess basins."
     input:
-        script = script_dir + "hydro/fix_basins.py",
+        script = script_dir + "hydro/preprocess_basins.py",
         basins = rules.basins_database.output[0]
+    params:
+        x_min = config["scope"]["bounds"]["x_min"],
+        x_max = config["scope"]["bounds"]["x_max"],
+        y_min = config["scope"]["bounds"]["y_min"],
+        y_max = config["scope"]["bounds"]["y_max"]
     output: "build/data/hybas_eu_lev07_v1c.gpkg"
     conda: "../envs/hydro.yaml"
-    script: "../scripts/hydro/fix_basins.py"
+    script: "../scripts/hydro/preprocess_basins.py"
 
 
 rule preprocess_hydro_stations:
-    # Some locations of stations are imprecise and in the sea. Slightly move them.
-    # Some other stations seem incorrect. Remove.
-    # Add missing pumped hydro stations in Romania.
     message: "Preprocess hydro stations."
     input:
         script = script_dir + "hydro/preprocess_hydro_stations.py",
         stations = rules.stations_database.output[0],
-        basins = rules.fix_basins.output[0]
-    params: buffer_size = 1 / 60 # move stations up to 1 arcminute < 1 km
+        basins = rules.preprocess_basins.output[0],
+        phs_storage_capacities = config["data-sources"]["national-phs-storage-capacities"]
+    params:
+        buffer_size = 1 / 60, # move stations up to 1 arcminute < 1 km
+        countries = config["scope"]["countries"],
+        scale_phs = config["parameters"]["scale-phs-according-to-geth-et-al"]
     output: "build/data/jrc-hydro-power-plant-database-preprocessed.csv"
     conda: "../envs/hydro.yaml"
     script: "../scripts/hydro/preprocess_hydro_stations.py"
@@ -89,7 +95,7 @@ rule inflow_m3:
     input:
         script = script_dir + "hydro/inflow_m3.py",
         stations = rules.preprocess_hydro_stations.output[0],
-        basins = rules.fix_basins.output[0],
+        basins = rules.preprocess_basins.output[0],
         runoff = rules.download_runoff_data.output[0]
     params: year = config["year"]
     output: "build/data/hydro-electricity-with-water-inflow.nc"

diff --git a/scripts/hydro.py b/scripts/hydro.py
@@ -1,27 +1,17 @@
 import pandas as pd
 import geopandas as gpd
 from shapely.geometry import Point
-import pycountry
 
 WGS_84 = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
 
 
-def main(path_to_plants, path_to_locations, path_to_phs_storage_capacities, scale_phs, path_to_output):
+def main(path_to_plants, path_to_locations, path_to_output):
     locations = gpd.read_file(path_to_locations).to_crs(WGS_84).set_index("id")
     plants = pd.read_csv(path_to_plants, index_col="id")
 
-    hror = capacities_per_location(plants[plants.type == "HROR"].copy(), locations, tech_type="hror",
-                                   fill_storage_capacity=False)
+    hror = capacities_per_location(plants[plants.type == "HROR"].copy(), locations, tech_type="hror")
     hdam = capacities_per_location(plants[plants.type == "HDAM"].copy(), locations, tech_type="hdam")
     hphs = capacities_per_location(plants[plants.type == "HPHS"].copy(), locations, tech_type="hphs")
-    if scale_phs:
-        hphs["storage_capacity_hphs_MWh"] = scale_phs_storage_capacities(
-            hphs=hphs,
-            locations=locations,
-            national_storage_capacities=read_national_phs_storage_capacities(
-                path_to_phs_storage_capacities, locations
-            )
-        )
 
     pd.concat([hror, hdam, hphs], axis="columns").to_csv(
         path_to_output,
@@ -30,9 +20,7 @@ def main(path_to_plants, path_to_locations, path_to_phs_storage_capacities, scal
     )
 
 
-def capacities_per_location(plants, locations, tech_type, fill_storage_capacity=True):
-    if fill_storage_capacity:
-        plants = fill_missing_storage_capacity_values(plants.copy())
+def capacities_per_location(plants, locations, tech_type):
     plant_centroids = gpd.GeoDataFrame(
         crs=WGS_84,
         geometry=list(map(Point, zip(plants.lon, plants.lat))),
@@ -47,59 +35,9 @@ def capacities_per_location(plants, locations, tech_type, fill_storage_capacity=
                   .rename(columns={"storage_capacity_MWh": f"storage_capacity_{tech_type}_MWh"}))
 
 
-def fill_missing_storage_capacity_values(plants):
-    # ASSUME country specific median E/P ratio for missing values, global median where no country specific available
-    e_to_p = plants.storage_capacity_MWh / plants.installed_capacity_MW
-    based_on_global = e_to_p.median() * plants.installed_capacity_MW
-    based_on_country_specific = plants.merge(
-        e_to_p.groupby(plants.country_code).median().rename("country_specific"),
-        left_on="country_code",
-        right_index=True,
-        how="left"
-    ).loc[:, "country_specific"] * plants.installed_capacity_MW
-    plants["storage_capacity_MWh"] = (plants["storage_capacity_MWh"].where(pd.notnull, other=based_on_country_specific)
-                                                                    .where(pd.notnull, other=based_on_global))
-    assert not plants["storage_capacity_MWh"].isnull().any()
-    return plants
-
-
-def read_national_phs_storage_capacities(path_to_data, locations):
-    data = pd.read_csv(path_to_data, index_col=0)
-    data.index = [pycountry.countries.lookup(iso2).alpha_3 for iso2 in data.index]
-    data["storage-capacity-mwh"] = data["storage-capacity-gwh"] * 1000
-    if (len(locations.index) == 1) and (locations.index[0] == "EUR"): # special case for continental level
-        data = pd.DataFrame(index=["EUR"], data=data.sum(axis=0).to_dict())
-    return data.reindex(locations.country_code.unique(), fill_value=0)
-
-
-def scale_phs_storage_capacities(hphs, locations, national_storage_capacities):
-    """Scale PHS storage capacities to match (Geth et al., 2015).
-
-    Storage capacities of pumped hydro within the JRC database may seem too high.
-    Thus, they are scaled here if requested by the user (`scale-phs-according-to-geth-et-al: true` in config),
-    so that the national numbers of (Geth et al., 2015) are fulfilled.
-
-    Geth, F., Brijs, T., Kathan, J., Driesen, J., Belmans, R., 2015. An overview of large-scale
-    stationary electricity storage plants in Europe: Current status and new developments. Renewable
-    and Sustainable Energy Reviews 52, 1212–1227. https://doi.org/10.1016/j.rser.2015.07.145
-    """
-    locations = pd.concat([
-        locations,
-        hphs.groupby(locations.country_code)
-            .storage_capacity_hphs_MWh
-            .transform(lambda x: x / x.sum())
-            .rename("storage_capacity_share")
-    ], axis=1)
-    locations = locations.merge(national_storage_capacities, right_index=True, left_on="country_code")
-    new_storage_capacities = locations["storage_capacity_share"] * locations["storage-capacity-mwh"]
-    return new_storage_capacities.rename("storage_capacity_hphs_MWh").fillna(0)
-
-
 if __name__ == "__main__":
     main(
         path_to_plants=snakemake.input.plants,
         path_to_locations=snakemake.input.locations,
-        path_to_phs_storage_capacities=snakemake.input.phs_storage_capacities,
-        scale_phs=snakemake.params.scale_phs,
         path_to_output=snakemake.output[0]
     )
diff --git a/scripts/hydro/fix_basins.py → scripts/hydro/preprocess_basins.py b/scripts/hydro/fix_basins.py → scripts/hydro/preprocess_basins.py
@@ -3,26 +3,33 @@
 DRIVER = "GPKG"
 
 
-def fix_basins(path_to_basins, path_to_output):
-    """Fix the basins shapes which are invalid.
-
-    Following the advice given here:
-    https://github.com/Toblerity/Shapely/issues/344
-    """
-    basins = gpd.read_file(path_to_basins)
+def preprocess_basins(path_to_basins, bbox, path_to_output):
+    "Filter and fix basin shapes."
+    basins = gpd.read_file(path_to_basins, bbox=bbox)
     basins.geometry = basins.geometry.map(_buffer_if_necessary)
     basins.to_file(path_to_output, driver=DRIVER)
 
 
 def _buffer_if_necessary(shape):
+    """Fix the basins shapes which are invalid.
+
+    Following the advice given here:
+    https://github.com/Toblerity/Shapely/issues/344
+    """
     if not shape.is_valid:
         shape = shape.buffer(0.0)
     assert shape.is_valid
     return shape
 
 
 if __name__ == "__main__":
-    fix_basins(
+    preprocess_basins(
         path_to_basins=snakemake.input.basins,
+        bbox=(
+            snakemake.params.x_min,
+            snakemake.params.y_min,
+            snakemake.params.x_max,
+            snakemake.params.y_max
+        ),
         path_to_output=snakemake.output[0]
     )