diff --git a/config/default.yaml b/config/default.yaml index 3f2ffb2..3ccdb50 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -6,7 +6,7 @@ data-sources: lau: http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/COMM-01M-2013-SH.zip degurba: http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/DGURBA_2014_SH.zip land_cover: http://due.esrin.esa.int/files/Globcover2009_V2.3_Global_.zip - protected_areas: https://www.protectedplanet.net/downloads/WDPA_Feb2019?type=shapefile + protected-areas: http://d1gam3xoknrgr2.cloudfront.net/current/WDPA_{version}_Public_shp.zip cgiar_tile: http://srtm.csi.cgiar.org/wp-content/uploads/files/srtm_5x5/TIFF/ gmted_tile: https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/topo/downloads/GMTED/Global_tiles_GMTED/075darcsec/mea/ gadm: https://biogeo.ucdavis.edu/data/gadm3.6/gpkg/gadm36_{country_code}_gpkg.zip @@ -243,6 +243,10 @@ parameters: max-building-share: 0.01 # Above, a pixel cannot be used for energy farms. Equals roughly 900m2 max-urban-green-share: 0.01 # Above, a pixel cannot be used for energy farms. Equals roughly 900m2. Removes e.g. Berlin's Tempelhofer Feld. nuts-year: 2010 # Choice of NUTS data to use from [2006, 2010, 2013, 2016, 2021] + protected-areas: + version: May2021 # Pointer to latest WDPA data. + shapes-to-include: [polygons, points] + scenarios: technical-potential: use-protected-areas: True diff --git a/config/schema.yaml b/config/schema.yaml index 27126bf..760c8c4 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -1,6 +1,12 @@ $schema: "http://json-schema.org/draft-07/schema#" description: Default configuration schema properties: + data-sources: + type: object + properties: + protected-areas: + type: string + format: ^(https?|http?):\/\/.+{version}.* parameters: type: object properties: @@ -8,6 +14,18 @@ properties: type: number enum: [2006, 2010, 2013, 2016, 2021] description: Indicates the reference NUTS year + wdpa: + type: object + properties: + version: + type: string + description: Indicates the latest Protected Planet WDPA year. Only the two latest datasets are available for download at any given time. + shapes-to-include: + type: array + description: WDPA includes exact polygons of protected areas and points with a rough indication of protected area around that point. This configuration option allows points and or polygons from WDPA to be processed in the workflow. + items: + type: string + enum: [polygons, points] crs: type: string enum: ["EPSG:4326"] diff --git a/lib/renewablepotentialslib/shape_utils.py b/lib/renewablepotentialslib/shape_utils.py index 0b61975..b48ee8d 100644 --- a/lib/renewablepotentialslib/shape_utils.py +++ b/lib/renewablepotentialslib/shape_utils.py @@ -165,28 +165,30 @@ def update_features(gdf, src): return gdf -def drop_countries(gdf, scope_config): +def drop_countries(gdf, scope_config, print_dropped=True): countries = [pycountry.countries.lookup(i).alpha_3 for i in scope_config["countries"]] _not_in = set(gdf.country_code).difference(countries) - print(f"Removing {_not_in} as they are outside of study area.") + if print_dropped: + print(f"Removing {_not_in} as they are outside of study area.") return gdf[gdf.country_code.isin(countries)] -def drop_geoms_completely_outside_study_area(gdf, scope_config): +def drop_geoms_completely_outside_study_area(gdf, scope_config, print_dropped=True): _study_area = study_area(scope_config) completely_in = gdf.intersects(_study_area) for row_index, row in gdf[~completely_in].iterrows(): - print( - "Removing {} ({}, country={}) as they are outside of study area." - .format(*row[["name", "level", "country_code"]]) - ) + if print_dropped: + print( + "Removing {} ({}, country={}) as they are outside of study area." + .format(*row[["name", "level", "country_code"]]) + ) gdf = gdf[completely_in] return gdf -def drop_parts_of_geoms_completely_outside_study_area(gdf, scope_config): +def drop_parts_of_geoms_completely_outside_study_area(gdf, scope_config, print_dropped=True): gdf = gdf.copy() _study_area = study_area(scope_config) all_geoms = gdf.explode() @@ -199,10 +201,11 @@ def drop_parts_of_geoms_completely_outside_study_area(gdf, scope_config): return gdf for row_index, row in gdf.loc[geoms_to_update.any(level=0)].iterrows(): - print( - "Removing parts of {} ({}, country={}) as they are outside of study area." - .format(*row[["name", "level", "country_code"]]) - ) + if print_dropped: + print( + "Removing parts of {} ({}, country={}) as they are outside of study area." + .format(*row[["name", "level", "country_code"]]) + ) # Unlike groupby, dissolve can only operate on columns, not multiindex levels new_geoms = ( @@ -253,3 +256,26 @@ def study_area(scope_config): _study_area = buffer_if_necessary(_study_area) return _study_area + + +def estimate_polygons_from_points(points, reported_area_col_name): + """Estimates the shape of protected areas for which only centroids are known.""" + + def __test_area_size(points): + area_size_calculated = points.area.sum() / 1e6 + area_size_reported = points[reported_area_col_name].sum() + assert np.isclose(area_size_calculated, area_size_reported, rtol=1e-2) + + original_crs = points.crs + # convert points to circles + points_in_metres = points.to_crs("epsg:3035") + points_in_metres.geometry = points_in_metres.buffer( + _radius_meter(points_in_metres[reported_area_col_name]) + ) + __test_area_size(points_in_metres) + return points_in_metres.to_crs(original_crs) + + +def _radius_meter(area_squarekilometer): + area_squaremeter = area_squarekilometer * 1e6 + return np.sqrt(area_squaremeter / np.pi) diff --git a/lib/tests/test_shape_utils.py b/lib/tests/test_shape_utils.py index 28ce171..0da72da 100644 --- a/lib/tests/test_shape_utils.py +++ b/lib/tests/test_shape_utils.py @@ -1,7 +1,9 @@ import geopandas as gpd +import pandas as pd import pytest import shapely.geometry import pycountry +import numpy as np from renewablepotentialslib.shape_utils import ( study_area, @@ -9,9 +11,13 @@ drop_countries, drop_geoms_completely_outside_study_area, drop_parts_of_geoms_completely_outside_study_area, - update_features + update_features, + estimate_polygons_from_points, + _radius_meter ) +from renewablepotentialslib import WGS84, EPSG_3035 + # Loading the 60M file for its smaller size (1M used in actual workflow) URL_NUTS = "https://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/geojson/NUTS_RG_60M_{}_4326.geojson" @@ -140,3 +146,46 @@ def test_drop_parts_of_geoms(self, capsys, europe_gdf): europe_gdf[europe_gdf.name == 'Highlands and Islands'].area > gdf[gdf.name == 'Highlands and Islands'].area ).all() + +class TestPolygonsFromPoints: + + @pytest.fixture + def points(self): + latitudes = { + 'athens': 37.983, 'davos': 46.802, 'stockholm': 59.329, 'wurzburg': 49.791 + } + longitudes = { + 'athens': 23.727, 'davos': 9.836, 'stockholm': 18.068, 'wurzburg': 9.953 + } + areas = {'athens': 1, 'davos': 5, 'stockholm': 100, 'wurzburg': 500} + point_gdf = gpd.GeoDataFrame( + # x = longitude, y = latitude + geometry=gpd.points_from_xy(longitudes.values(), latitudes.values()), + index=latitudes.keys(), + crs = WGS84 + ) + point_gdf["REP_AREA"] = pd.Series(areas) + return point_gdf + + @pytest.fixture + def estimated_polygons(self, points): + return estimate_polygons_from_points(points, "REP_AREA") + + + @pytest.mark.parametrize( + ("area", "radius"), + [(np.pi, 1000), (1, 564.19), ([np.pi, 1], [1000, 564.19])] + ) + def test_radius_meter(self, area, radius): + calculated_radius = _radius_meter(area) + assert np.isclose(calculated_radius, radius) + + def test_all_points_are_now_polygons(self, estimated_polygons): + assert all(isinstance(geom, shapely.geometry.Polygon) for geom in estimated_polygons.geometry) + + def test_all_polygons_have_expected_area(self, points, estimated_polygons): + assert np.allclose(estimated_polygons.to_crs(EPSG_3035).area / 1e6, points["REP_AREA"], rtol=1e-2) + + def test_estimate_polygons_crs(self, points, estimated_polygons): + assert estimated_polygons.crs == points.crs + diff --git a/rules/data-preprocessing.smk b/rules/data-preprocessing.smk index e58e33d..8d02b65 100644 --- a/rules/data-preprocessing.smk +++ b/rules/data-preprocessing.smk @@ -79,7 +79,7 @@ rule raw_lau_units_unzipped: rule administrative_borders_lau: message: "Normalise LAU administrative borders." input: - src = script_dir + "lau.py", + script = script_dir + "lau.py", shapes = rules.raw_lau_units_unzipped.output.shapes, attributes = rules.raw_lau_units_unzipped.output.attributes, output: @@ -92,7 +92,7 @@ rule administrative_borders_lau: rule administrative_borders: message: "Normalise all administrative borders." input: - src = script_dir + "administrative_borders.py", + script = script_dir + "administrative_borders.py", nuts_geojson = rules.raw_nuts_units.output[0], gadm_gpkg = rules.all_gadm_administrative_borders.output[0], lau_gpkg = rules.administrative_borders_lau.output[0] @@ -124,18 +124,29 @@ rule raw_land_cover: rule raw_protected_areas_zipped: message: "Download protected areas data as zip." output: protected("data/automatic/raw-wdpa.zip") - params: url = config["data-sources"]["protected_areas"] + params: url = config["data-sources"]["protected-areas"].format(version=config["parameters"]["protected-areas"]["version"]) shell: "curl -sLo {output} -H 'Referer: {params.url}' {params.url}" -rule raw_protected_areas: - message: "Extract protected areas data as zip." +rule raw_protected_area_shapes: + message: "Extract protected areas data as separate zip files." input: rules.raw_protected_areas_zipped.output - output: - polygons = "build/raw-wdpa-feb2019/WDPA_Feb2019-shapefile-polygons.shp", - polygon_data = "build/raw-wdpa-feb2019/WDPA_Feb2019-shapefile-polygons.dbf", - points = "build/raw-wdpa-feb2019/WDPA_Feb2019-shapefile-points.shp" - shell: "unzip -o {input} -d build/raw-wdpa-feb2019" + output: temp(directory("build/raw-wdpa")) + shell: "unzip {input} *.zip -d {output}" + + +rule protected_areas: + message: "Extract protected areas data as zip." + input: + script = script_dir + "protected_areas.py", + shape_dir = rules.raw_protected_area_shapes.output[0] + params: + shapefile_prefix = "WDPA_{}_Public_shp".format(config["parameters"]["protected-areas"]["version"]), + shapes_to_include = config["parameters"]["protected-areas"]["shapes-to-include"], + scope_config = config["scope"] + output: "build/protected-areas.geojson" + conda: "../envs/default.yaml" + script: "../scripts/protected_areas.py" rule raw_srtm_elevation_tile_zipped: @@ -268,41 +279,19 @@ rule slope_in_europe: """ -rule protected_areas_points_to_circles: - message: "Estimate shape of protected areas available as points only." - input: - script_dir + "estimate_protected_shapes.py", - protected_areas= rules.raw_protected_areas.output.points - params: - scope = config["scope"] - output: - temp("build/protected-areas-points-as-circles.geojson") - conda: "../envs/default.yaml" - script: "../scripts/estimate_protected_shapes.py" - - rule protected_areas_in_europe: - message: "Rasterise protected areas data and clip to Europe." + message: "Rasterise protected areas data." input: - polygons = rules.raw_protected_areas.output.polygons, - points = rules.protected_areas_points_to_circles.output, - land_cover = rules.land_cover_in_europe.output + all_shapes = rules.protected_areas.output[0], + land_cover = rules.land_cover_in_europe.output[0] output: "build/protected-areas-europe.tif" benchmark: "build/rasterisation-benchmark.txt" - params: - bounds = "{x_min},{y_min},{x_max},{y_max}".format(**config["scope"]["bounds"]) conda: "../envs/default.yaml" shell: - # The filter is in accordance to the way UNEP-WCMC calculates statistics: - # https://www.protectedplanet.net/c/calculating-protected-area-coverage """ - fio cat --rs --bbox {params.bounds} {input.polygons} {input.points} | \ - fio filter "f.properties.STATUS in ['Designated', 'Inscribed', 'Established'] and \ - f.properties.DESIG_ENG != 'UNESCO-MAB Biosphere Reserve'" | \ - fio collect --record-buffered | \ - rio rasterize --like {input.land_cover} \ + rio rasterize {input.all_shapes} --like {input.land_cover} \ --default-value 255 --all_touched -f "GTiff" --co dtype=uint8 -o {output} """ diff --git a/rules/potential.smk b/rules/potential.smk index 337b0f1..b40b8fb 100644 --- a/rules/potential.smk +++ b/rules/potential.smk @@ -13,7 +13,7 @@ rule category_of_technical_eligibility: message: "Determine upper bound surface eligibility for renewables based on land cover, slope, bathymetry, and settlements." input: - src = script_dir + "technical_eligibility.py", + script = script_dir + "technical_eligibility.py", land_cover = rules.land_cover_in_europe.output[0], slope = rules.slope_in_europe.output[0], bathymetry = rules.bathymetry_in_europe.output[0], @@ -33,7 +33,7 @@ rule category_of_technical_eligibility: rule total_size_swiss_building_footprints_according_to_settlement_data: message: "Sum the size of building footprints from settlement data." input: - src = script_dir + "swiss_building_footprints.py", + script = script_dir + "swiss_building_footprints.py", building_footprints = rules.settlements.output.buildings, eligibility = "build/technically-eligible-land.tif", countries = rules.administrative_borders.output[0] diff --git a/scripts/estimate_protected_shapes.py b/scripts/estimate_protected_shapes.py deleted file mode 100644 index 16f5f99..0000000 --- a/scripts/estimate_protected_shapes.py +++ /dev/null @@ -1,55 +0,0 @@ -"""This module estimates the shape of protected areas, for which only centroids are known. - -This procedure is applied by the provider of the database, UNEP-WCMC, as well. See: -https://www.protectedplanet.net/c/calculating-protected-area-coverage -or the manual of the database for further information. -""" -import math - -import geopandas as gpd -import pycountry - -from renewablepotentialslib import EPSG_3035_PROJ4 - - -def estimate_shapes(path_to_input, scope_config, path_to_output): - """Estimates the shap of protected areas for which only centroids are known.""" - points = gpd.read_file(path_to_input) - points_in_scope = filter_points(points, scope_config) - original_crs = points_in_scope.crs - # convert points to circles - points_in_scope = points_in_scope.to_crs(EPSG_3035_PROJ4) - points_in_scope.geometry = [rec[1].geometry.buffer(radius_meter(rec[1]["REP_AREA"])) - for rec in points_in_scope.iterrows()] - test_area_size(points_in_scope) - points_in_scope.to_crs(original_crs).to_file(path_to_output, driver="GeoJSON") - - -def filter_points(points, scope_config): - x_min, x_max, y_min, y_max = [scope_config["bounds"][z] - for z in ["x_min", "x_max", "y_min", "y_max"]] - countries = [pycountry.countries.lookup(country).alpha_3 - for country in scope_config["countries"]] - return points.cx[x_min:x_max, y_min:y_max].loc[ - (points.ISO3.isin(countries)) & - (points.REP_AREA > 0) - ].copy() - - -def radius_meter(area_squarekilometer): - area_squaremeter = area_squarekilometer * 1e6 - return math.sqrt(area_squaremeter / math.pi) - - -def test_area_size(points): - area_size_calculated = points.area.sum() / 1e6 - area_size_reported = points.REP_AREA.sum() - assert abs(area_size_calculated - area_size_reported) < (area_size_reported / 100) - - -if __name__ == "__main__": - estimate_shapes( - path_to_input=snakemake.input.protected_areas, - scope_config=snakemake.params.scope, - path_to_output=snakemake.output[0] - ) diff --git a/scripts/protected_areas.py b/scripts/protected_areas.py new file mode 100644 index 0000000..b608a01 --- /dev/null +++ b/scripts/protected_areas.py @@ -0,0 +1,66 @@ +import glob +import os +import geopandas as gpd +import pandas as pd + +from renewablepotentialslib.shape_utils import ( + to_multi_polygon, + drop_countries, + estimate_polygons_from_points, + study_area +) + +OUTPUT_DRIVER = "GeoJSON" + + +def wdpa(path_to_shape_dir, path_to_output, shapefile_prefix, shapes_to_include, scope): + all_shapes = pd.concat([ + _get_shapes(path_to_shape_dir, shapefile_prefix, geodata_type, scope).geometry + for geodata_type in shapes_to_include + ]) + + all_shapes.to_file(path_to_output, driver=OUTPUT_DRIVER) + + +def _get_shapes(path_to_shape_dir, shapefile_prefix, geodata_type, scope): + shapefile_zips = glob.glob(os.path.join(path_to_shape_dir, "*.zip")) + shapefile_name = f"zip://{{}}!{shapefile_prefix}-{geodata_type}.shp" + bbox = study_area(scope) + shapes = pd.concat([ + gpd.read_file(shapefile_name.format(zipfile), bbox=bbox) + for zipfile in shapefile_zips + ]) + + print(f"Number of WDPA shapes pre-filter = {len(shapes)}") + shapes = _filter_shapes(shapes, scope) + print(f"Number of WDPA shapes post-filter = {len(shapes)}") + + if geodata_type == "points": + shapes = estimate_polygons_from_points(shapes, "REP_AREA") + print(f"Processed points to polys") + shapes.geometry = shapes.geometry.map(to_multi_polygon) + + return shapes + + +def _filter_shapes(shapes, scope): + shapes = shapes[ + (shapes.REP_AREA.astype(float) > 0) & + # The filter is in accordance to the way UNEP-WCMC calculates statistics: + # https://www.protectedplanet.net/c/calculating-protected-area-coverage + (shapes.STATUS.isin(['Designated', 'Inscribed', 'Established'])) & + (shapes.DESIG_ENG != 'UNESCO-MAB Biosphere Reserve') + ] + shapes = drop_countries(shapes.rename(columns={"ISO3": "country_code"}), scope, print_dropped=False) + + return shapes + + +if __name__ == "__main__": + wdpa( + path_to_shape_dir=snakemake.input.shape_dir, + shapefile_prefix=snakemake.params.shapefile_prefix, + shapes_to_include=snakemake.params.shapes_to_include, + scope=snakemake.params.scope_config, + path_to_output=snakemake.output[0] + )