Compatibility of data processing for Ukraine (#1146)

* data: retrieve gdp and pop raw data for UA,MD * Updated retrieval code. * add_electricity script for UA and MD using updated, endogenised GDP and PPP data. * Outsourced building of gdp and ppp. Only called when UA and/or MD are in country list. * Accepted suggestion by @fneum in retrieve rule. * Cleaned determine_availability_matrix_MD_UA.py, removed redundant code * Updated build_gdp_pop_non_nuts3 script to use the mean to distribute GDP p.c. Added release notes. Bug fixes. * Bug fixes --> 'ppp' to 'pop' * Bug fix: only distribute load to buses with substation. * Updated to download GDP and population raster via PyPSA-Eur zenodo databundle. Removal of dedicated retrieve function. Updated release notes. * Updated release notes. * correct input file paths * update zenodo url for databundle release v0.3.0 * doc: update release_notes * minor adjustments: benchmark, increase mem_mb, remove plots, handling of optional inputs --------- Co-authored-by: bobbyxng <[email protected]> Co-authored-by: Bobby Xiong <[email protected]> Co-authored-by: Fabian Neumann <[email protected]>
PyPSA · Jul 19, 2024 · ba55971 · ba55971
1 parent 71efc8f
commit ba55971
Show file tree

Hide file tree

Showing 8 changed files with 214 additions and 166 deletions.
diff --git a/data/GDP_PPP_30arcsec_v3_mapped_default.csv b/data/GDP_PPP_30arcsec_v3_mapped_default.csv
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
@@ -31,6 +31,12 @@ Upcoming Release
 
 * Bugfix: Correctly read in threshold capacity below which to remove components from previous planning horizons in :mod:`add_brownfield`.
 
+* For countries not contained in the NUTS3-specific datasets (i.e. MD and UA), the mapping of GDP per capita and population per bus region used to spatially distribute electricity demand is now endogenised in a new rule :mod:`build_gdp_ppp_non_nuts3`. https://github.com/PyPSA/pypsa-eur/pull/1146 
+
+* The databundle has been updated to release v0.3.0, which includes raw GDP and population data for countries outside the NUTS system (UA, MD). https://github.com/PyPSA/pypsa-eur/pull/1146 
+
+* Updated filtering in :mod:`determine_availability_matrix_MD_UA.py` to improve speed. https://github.com/PyPSA/pypsa-eur/pull/1146 
+
 * Bugfix: Impose minimum value of zero for district heating progress between current and future market share in :mod:`build_district_heat_share`.
 
 PyPSA-Eur 0.11.0 (25th May 2024)

diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk
@@ -375,6 +375,37 @@ def input_conventional(w):
     }
 
 
+# Optional input when having Ukraine (UA) or Moldova (MD) in the countries list
+def input_gdp_pop_non_nuts3(w):
+    countries = set(config_provider("countries")(w))
+    if {"UA", "MD"}.intersection(countries):
+        return {"gdp_pop_non_nuts3": resources("gdp_pop_non_nuts3.geojson")}
+    return {}
+
+
+rule build_gdp_pop_non_nuts3:
+    params:
+        countries=config_provider("countries"),
+    input:
+        base_network=resources("networks/base.nc"),
+        regions=resources("regions_onshore.geojson"),
+        gdp_non_nuts3="data/bundle/GDP_per_capita_PPP_1990_2015_v2.nc",
+        pop_non_nuts3="data/bundle/ppp_2013_1km_Aggregated.tif",
+    output:
+        resources("gdp_pop_non_nuts3.geojson"),
+    log:
+        logs("build_gdp_pop_non_nuts3.log"),
+    benchmark:
+        benchmarks("build_gdp_pop_non_nuts3")
+    threads: 1
+    resources:
+        mem_mb=8000,
+    conda:
+        "../envs/environment.yaml"
+    script:
+        "../scripts/build_gdp_pop_non_nuts3.py"
+
+
 rule add_electricity:
     params:
         length_factor=config_provider("lines", "length_factor"),
@@ -390,6 +421,7 @@ rule add_electricity:
     input:
         unpack(input_profile_tech),
         unpack(input_conventional),
+        unpack(input_gdp_pop_non_nuts3),
         base_network=resources("networks/base.nc"),
         line_rating=lambda w: (
             resources("networks/line_rating.nc")
@@ -411,7 +443,6 @@ rule add_electricity:
         ),
         load=resources("electricity_demand.csv"),
         nuts3_shapes=resources("nuts3_shapes.geojson"),
-        ua_md_gdp="data/GDP_PPP_30arcsec_v3_mapped_default.csv",
     output:
         resources("networks/elec.nc"),
     log:

diff --git a/rules/retrieve.smk b/rules/retrieve.smk
@@ -29,6 +29,8 @@ if config["enable"]["retrieve"] and config["enable"].get("retrieve_databundle",
         "h2_salt_caverns_GWh_per_sqkm.geojson",
         "natura/natura.tiff",
         "gebco/GEBCO_2014_2D.nc",
+        "GDP_per_capita_PPP_1990_2015_v2.nc",
+        "ppp_2013_1km_Aggregated.tif",
     ]
 
     rule retrieve_databundle:
@@ -163,7 +165,7 @@ if config["enable"]["retrieve"]:
     rule retrieve_ship_raster:
         input:
             storage(
-                "https://zenodo.org/records/10973944/files/shipdensity_global.zip",
+                "https://zenodo.org/records/12760663/files/shipdensity_global.zip",
                 keep_local=True,
             ),
         output:

diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py
@@ -294,19 +294,19 @@ def shapes_to_shapes(orig, dest):
     return transfer
 
 
-def attach_load(n, regions, load, nuts3_shapes, ua_md_gdp, countries, scaling=1.0):
+def attach_load(
+    n, regions, load, nuts3_shapes, gdp_pop_non_nuts3, countries, scaling=1.0
+):
     substation_lv_i = n.buses.index[n.buses["substation_lv"]]
-    regions = gpd.read_file(regions).set_index("name").reindex(substation_lv_i)
+    gdf_regions = gpd.read_file(regions).set_index("name").reindex(substation_lv_i)
     opsd_load = pd.read_csv(load, index_col=0, parse_dates=True).filter(items=countries)
 
-    ua_md_gdp = pd.read_csv(ua_md_gdp, dtype={"name": "str"}).set_index("name")
-
     logger.info(f"Load data scaled by factor {scaling}.")
     opsd_load *= scaling
 
     nuts3 = gpd.read_file(nuts3_shapes).set_index("index")
 
-    def upsample(cntry, group):
+    def upsample(cntry, group, gdp_pop_non_nuts3):
         load = opsd_load[cntry]
 
         if len(group) == 1:
@@ -325,7 +325,15 @@ def upsample(cntry, group):
         factors = normed(0.6 * normed(gdp_n) + 0.4 * normed(pop_n))
         if cntry in ["UA", "MD"]:
             # overwrite factor because nuts3 provides no data for UA+MD
-            factors = normed(ua_md_gdp.loc[group.index, "GDP_PPP"].squeeze())
+            gdp_pop_non_nuts3 = gpd.read_file(gdp_pop_non_nuts3).set_index("Bus")
+            gdp_pop_non_nuts3 = gdp_pop_non_nuts3.loc[
+                (gdp_pop_non_nuts3.country == cntry)
+                & (gdp_pop_non_nuts3.index.isin(substation_lv_i))
+            ]
+            factors = normed(
+                0.6 * normed(gdp_pop_non_nuts3["gdp"])
+                + 0.4 * normed(gdp_pop_non_nuts3["pop"])
+            )
         return pd.DataFrame(
             factors.values * load.values[:, np.newaxis],
             index=load.index,
@@ -334,8 +342,8 @@ def upsample(cntry, group):
 
     load = pd.concat(
         [
-            upsample(cntry, group)
-            for cntry, group in regions.geometry.groupby(regions.country)
+            upsample(cntry, group, gdp_pop_non_nuts3)
+            for cntry, group in gdf_regions.geometry.groupby(gdf_regions.country)
         ],
         axis=1,
     )
@@ -821,7 +829,7 @@ def attach_line_rating(
         snakemake.input.regions,
         snakemake.input.load,
         snakemake.input.nuts3_shapes,
-        snakemake.input.ua_md_gdp,
+        snakemake.input.get("gdp_pop_non_nuts3"),
         params.countries,
         params.scaling_factor,
     )