diff --git a/Snakefile b/Snakefile index 5028396..9d32c47 100644 --- a/Snakefile +++ b/Snakefile @@ -66,7 +66,7 @@ rule test: "build/technically-eligible-land.tif", "build/technically-eligible-area-km2.tif", "build/technically-eligible-electricity-yield-pv-prio-twh.tif", - "build/administrative-borders-nuts.gpkg", + "build/administrative-borders.gpkg", "data/automatic/sonnendach/total-rooftop-area-km2.txt", "data/automatic/sonnendach/total-yield-twh.txt" output: "build/logs/test-report.html" diff --git a/config/default.yaml b/config/default.yaml index f7845b0..b4be1d3 100644 --- a/config/default.yaml +++ b/config/default.yaml @@ -37,11 +37,19 @@ scope: - "Norway" - "Serbia" - "Switzerland" + - "Iceland" bounds: - x_min: -15.8 # in degrees east + x_min: -30 # in degrees east x_max: 37 # in degrees east y_min: 30 # in degrees north y_max: 75 # in degrees north + exclusion_zones: + atlantic_islands: + x_min: -30 # in degrees east + x_max: -10 # in degrees east + y_min: 30 # in degrees north + y_max: 41 # in degrees north + layers: continental: Austria: nuts0 @@ -78,6 +86,7 @@ layers: Norway: nuts0 Serbia: gadm0 Switzerland: nuts0 + Iceland: nuts0 national: Austria: nuts0 Belgium: nuts0 @@ -113,6 +122,7 @@ layers: Norway: nuts0 Serbia: gadm0 Switzerland: nuts0 + Iceland: nuts0 regional: # The link between NUTS and administrative units unfortunately is not obvious. # It's not documented anywhere -- at least I could not find any information. @@ -152,6 +162,7 @@ layers: Norway: gadm1 # match 19 Serbia: gadm1 # gadm1 25 regions; wiki 5 regions Switzerland: gadm1 # match 26 + Iceland: gadm1 municipal: Austria: lau2 Belgium: lau2 @@ -187,6 +198,7 @@ layers: Norway: lau2 Serbia: lau2 Switzerland: lau2 + Iceland: lau2 parameters: maximum-installable-power-density: # this is not the yield, but the density of installed power pv-on-tilted-roofs: 160 # [W/m^2] from (Gagnon:2016, Klauser:2016), i.e. 16% efficiency diff --git a/config/euler/cluster-config.yaml b/config/euler/cluster-config.yaml index ebcda87..38c4215 100644 --- a/config/euler/cluster-config.yaml +++ b/config/euler/cluster-config.yaml @@ -18,8 +18,6 @@ elevation_in_europe: cores: 4 eez_eligibility: cores: 4 -shared_coast: - cores: 4 sensitivities: cores: 4 capacityfactor_timeseries: @@ -29,5 +27,5 @@ time_average_capacityfactor_map: memory: 32000 national_boundaries: runtime: 60 -administrative_borders_lau: - runtime: 60 +administrative_borders: + runtime: 240 diff --git a/config/schema.yaml b/config/schema.yaml index 348a9b7..54cd4f8 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -7,5 +7,122 @@ properties: nuts-year: type: number enum: [2006, 2010, 2013, 2016, 2021] - default: 2016 - description: Indicates the reference NUTS year \ No newline at end of file + description: Indicates the reference NUTS year + crs: + type: string + enum: ["EPSG:4326"] + description: Coordinate reference system to which all datasets are reprojected during preprocessing + scope: + type: object + properties: + countries: + type: array + items: + type: string + enum: + - "Austria" + - "Belgium" + - "Bulgaria" + - "Croatia" + - "Cyprus" + - "Czech Republic" + - "Denmark" + - "Estonia" + - "Finland" + - "France" + - "Germany" + - "Greece" + - "Hungary" + - "Ireland" + - "Italy" + - "Latvia" + - "Lithuania" + - "Luxembourg" + - "Netherlands" + - "Poland" + - "Portugal" + - "Romania" + - "Slovakia" + - "Slovenia" + - "Spain" + - "Sweden" + - "United Kingdom" + - "Albania" + - "Bosnia and Herzegovina" + - "North Macedonia" + - "Montenegro" + - "Norway" + - "Serbia" + - "Switzerland" + - "Iceland" + description: Countries to include in the model + bounds: + type: object + properties: + x_min: + type: number + minimum: -180 + maximum: 180 + description: Minimum longitude, in degrees east + x_max: + type: number + minimum: -180 + maximum: 180 + description: Maximum longitude, in degrees east + y_min: + type: number + minimum: -90 + maximum: 90 + description: Minimum Latitude, in degrees north + y_max: + type: number + minimum: -90 + maximum: 90 + description: Maximum Latitude, in degrees north + description: Total extent of system under study. Defaults to all of Europe + exclusion_zones: + patternProperties: + ^.*$: + properties: + x_min: + type: number + minimum: -180 + maximum: 180 + description: Minimum longitude, in degrees east + x_max: + type: number + minimum: -180 + maximum: 180 + description: Maximum longitude, in degrees east + y_min: + type: number + minimum: -90 + maximum: 90 + description: Minimum Latitude, in degrees north + y_max: + type: number + minimum: -90 + maximum: 90 + description: Maximum Latitude, in degrees north + description: Any number of bounding boxes defining exclusion zones, where spatial features within the total bounds are to be ignored. + layers: + scope: object + patternProperties: + ^.*$: + type: object + properties: + ^.*$: # ideally this would be 'oneof' the list of countries above (can this be done in a schema??) + type: string + enum: + - gadm0 + - gadm1 + - gadm2 + - gadm3 + - gadm4 + - gadm5 + - lau2 + - nuts0 + - nuts1 + - nuts2 + - nuts3 + diff --git a/envs/default.yaml b/envs/default.yaml index 1906f9f..65c83dc 100644 --- a/envs/default.yaml +++ b/envs/default.yaml @@ -13,7 +13,7 @@ dependencies: - fiona=1.8.4 - rasterio=1.0.25 - rasterstats=0.13.0 - - geopandas=0.4.1 + - geopandas=0.8.2 - xarray=0.12.1 - netcdf4=1.5.1.2 - pyyaml=5.1 diff --git a/rules/data-preprocessing.smk b/rules/data-preprocessing.smk index 956fb65..9d8d375 100644 --- a/rules/data-preprocessing.smk +++ b/rules/data-preprocessing.smk @@ -2,7 +2,7 @@ import pycountry URL_LOAD = "https://data.open-power-system-data.org/time_series/2018-06-30/time_series_60min_stacked.csv" -URL_NUTS = "https://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/shp/NUTS_RG_01M_{}_4326.shp.zip" +URL_NUTS = "https://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/geojson/NUTS_RG_01M_{}_4326.geojson" URL_LAU = "http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/COMM-01M-2013-SH.zip" URL_DEGURBA = "http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/DGURBA_2014_SH.zip" URL_LAND_COVER = "http://due.esrin.esa.int/files/Globcover2009_V2.3_Global_.zip" @@ -28,7 +28,7 @@ GMTED_Y = ["50N", "70N"] GMTED_X = ["030W", "000E", "030E"] localrules: raw_gadm_administrative_borders_zipped, raw_protected_areas_zipped, - raw_nuts_units_zipped, raw_lau_units_zipped, raw_land_cover_zipped, + raw_lau_units_zipped, raw_land_cover_zipped, raw_land_cover, raw_protected_areas, raw_srtm_elevation_tile_zipped, raw_gmted_elevation_tile, raw_bathymetry_zipped, raw_bathymetry, raw_gadm_administrative_borders @@ -42,55 +42,33 @@ rule raw_gadm_administrative_borders_zipped: rule raw_gadm_administrative_borders: message: "Unzip administrative borders of {wildcards.country_code} as zip." input: "data/automatic/raw-gadm/{country_code}.zip" - output: temp("data/automatic/raw-gadm/gadm36_{country_code}.gpkg") - shell: "unzip -o {input} -d data/automatic/raw-gadm" + output: temp("build/raw-gadm/gadm36_{country_code}.gpkg") + shell: "unzip -o {input} -d build/raw-gadm" -rule administrative_borders_gadm: - message: "Merge administrative borders of all countries up to layer {params.max_layer_depth}." +rule all_gadm_administrative_borders: + message: "Merge gadm administrative borders of all countries." input: - "src/gadm.py", - ["data/automatic/raw-gadm/gadm36_{}.gpkg".format(country_code) + ["build/raw-gadm/gadm36_{}.gpkg".format(country_code) for country_code in [pycountry.countries.lookup(country).alpha_3 for country in config['scope']['countries']] ] - params: max_layer_depth = 3 - output: "build/administrative-borders-gadm.gpkg" - conda: "../envs/default.yaml" - shell: - PYTHON + " {input} {params.max_layer_depth} {output} {CONFIG_FILE}" + output: temp("build/raw-gadm/gadm36.gpkg") + params: crs = config["crs"] + conda: '../envs/default.yaml' + shell: "ogrmerge.py -o {output} -f gpkg -src_layer_field_content "{{LAYER_NAME}}" -t_srs {params.crs} -single {input}" -rule raw_nuts_units_zipped: - message: "Download units as zip." +rule raw_nuts_units: + message: "Download NUTS units as GeoJSON." output: - protected("data/automatic/raw-nuts{}-units.zip".format(config["parameters"]["nuts-year"])) + protected("data/automatic/raw-nuts{}-units.geojson".format(config["parameters"]["nuts-year"])) params: url = URL_NUTS.format(config["parameters"]["nuts-year"]) shell: "curl -sLo {output} '{params.url}'" -rule administrative_borders_nuts: - message: "Normalise NUTS administrative borders." - input: - src = "src/nuts.py", - zip = rules.raw_nuts_units_zipped.output - output: - "build/administrative-borders-nuts.gpkg" - shadow: "full" - params: - year = config['parameters']['nuts-year'] - conda: "../envs/default.yaml" - shell: - """ - unzip {input.zip} -d ./build/NUTS_RG_01M_{params.year}_4326/ - {PYTHON} {input.src} to_multipolygon \ - ./build/NUTS_RG_01M_{params.year}_4326 ./build/raw-nuts.gpkg - {PYTHON} {input.src} normalise ./build/raw-nuts.gpkg {output} {CONFIG_FILE} - """ - - rule raw_lau_units_zipped: message: "Download LAU units as zip." output: @@ -105,7 +83,7 @@ rule administrative_borders_lau: src = "src/lau.py", zip = rules.raw_lau_units_zipped.output output: - "build/administrative-borders-lau.geojson" + temp("build/raw-lau.gpkg") shadow: "full" conda: "../envs/default.yaml" shell: @@ -113,11 +91,23 @@ rule administrative_borders_lau: unzip {input.zip} -d ./build {PYTHON} {input.src} merge ./build/COMM_01M_2013_SH/data/COMM_RG_01M_2013.shp \ ./build/COMM_01M_2013_SH/data/COMM_AT_2013.dbf ./build/raw-lau.gpkg - {PYTHON} {input.src} identify ./build/raw-lau.gpkg ./build/raw-lau-identified.gpkg - {PYTHON} {input.src} normalise ./build/raw-lau-identified.gpkg {output} {CONFIG_FILE} """ +rule administrative_borders: + message: "Normalise all administrative borders." + input: + src = "src/administrative_borders.py", + nuts_geojson = rules.raw_nuts_units.output, + gadm_gpkg = rules.all_gadm_administrative_borders.output, + lau_gpkg = rules.administrative_borders_lau.output + output: + "build/administrative-borders.gpkg" + shadow: "full" + conda: "../envs/default.yaml" + shell: "{PYTHON} {input.src} {input.nuts_geojson} {input.gadm_gpkg} {input.lau_gpkg} {output} {CONFIG_FILE}" + + rule raw_land_cover_zipped: message: "Download land cover data as zip." output: protected("data/automatic/raw-globcover2009.zip") diff --git a/rules/potential.smk b/rules/potential.smk index e0dda97..6b9d29b 100644 --- a/rules/potential.smk +++ b/rules/potential.smk @@ -30,7 +30,7 @@ rule total_size_swiss_building_footprints_according_to_settlement_data: src = "src/swiss_building_footprints.py", building_footprints = rules.settlements.output.buildings, eligibility = "build/technically-eligible-land.tif", - countries = rules.administrative_borders_nuts.output[0] + countries = rules.administrative_borders.output output: "build/building-footprints-according-to-settlement-data-km2.txt" conda: "../envs/default.yaml" @@ -124,9 +124,7 @@ rule units: message: "Form units of layer {wildcards.layer} by remixing NUTS, LAU, and GADM." input: "src/units.py", - rules.administrative_borders_nuts.output, - rules.administrative_borders_lau.output, - rules.administrative_borders_gadm.output + rules.administrative_borders.output, output: "build/{layer}/units.geojson" conda: "../envs/default.yaml" diff --git a/src/administrative_borders.py b/src/administrative_borders.py new file mode 100644 index 0000000..3b9dacf --- /dev/null +++ b/src/administrative_borders.py @@ -0,0 +1,188 @@ +import click +import geopandas as gpd +import shapely.geometry +import shapely.errors +import pycountry + +from src.conversion import eu_country_code_to_iso3 +from src.utils import Config, buffer_if_necessary + +OUTPUT_DRIVER = 'GPKG' +SCHEMA = { + "properties": { + "country_code": "str", # id of the country to which the unit belongs + "id": "str", # a unique id of this unit + "name": "str", # the name of the unit, not necessarily unique + "type": "str", # the type of the unit + "proper": "bool" # flag indicating proper administrative unit (not the case for water bodies e.g.) + }, + "geometry": "MultiPolygon" +} + + +@click.command() +@click.argument("path_to_nuts") +@click.argument("path_to_gadm") +@click.argument("path_to_lau") +@click.argument("path_to_output") +@click.argument("config", type=Config()) +def normalise_admin_borders(path_to_nuts, path_to_gadm, path_to_lau, path_to_output, config): + """Normalises raw administrative boundary data and places it in one, layered geodatapackage.""" + + #lau + for _src, _path in { + 'nuts': path_to_nuts, 'gadm': path_to_gadm, 'lau': path_to_lau + }.items(): + gdf = gpd.read_file(_path) + gdf = gdf.to_crs(config["crs"]) + gdf.geometry = gdf.geometry.map(buffer_if_necessary).map(_to_multi_polygon) + gdf = _update_features(gdf, _src) + gdf = _drop_countries(gdf, config) + gdf = _drop_geoms_completely_outside_study_area(gdf, config) + gdf = _drop_parts_of_geoms_completely_outside_study_area(gdf, config) + + assert gdf.id.duplicated().sum() == 0 + + allowed_cols = list(SCHEMA["properties"].keys()) + ['geometry'] + + for lvl in gdf.level.unique(): + gdf.loc[gdf.level == lvl, allowed_cols].to_file( + path_to_output, schema=SCHEMA, layer=lvl, driver=OUTPUT_DRIVER + ) + + +def _update_features(gdf, src): + if src == 'nuts': + gdf["CNTR_CODE"] = gdf.CNTR_CODE.apply(eu_country_code_to_iso3) + gdf = gdf.rename(columns={"NUTS_NAME": "name", "CNTR_CODE": "country_code"}) + gdf["type"] = gdf.LEVL_CODE.map({0: "country"}) + gdf["proper"] = True + gdf = gdf.drop(columns=["FID"]) + + # Country IDs should have three letters instead of two + gdf.loc[gdf.LEVL_CODE == 0, "id"] = gdf.loc[gdf.LEVL_CODE == 0, "country_code"] + # Many country names are broken or missing + gdf.loc[gdf.LEVL_CODE == 0, "name"] = gdf.loc[gdf.LEVL_CODE == 0, "id"].apply( + lambda x: pycountry.countries.lookup(x).name + ) + + gdf["level"] = 'nuts' + gdf.LEVL_CODE.astype(str) + + elif src == 'gadm': + gdf["level"] = gdf.source_ds_lyr.str.rsplit('_', 1, expand=True)[1].astype(int) + for lvl in gdf.level.unique(): + lvl_mask = gdf.level == lvl + gdf.loc[lvl_mask, "country_code"] = gdf.loc[lvl_mask, "GID_0"] + gdf.loc[lvl_mask, "id"] = gdf.loc[lvl_mask, f"GID_{lvl}"] + gdf.loc[lvl_mask, "name"] = gdf.loc[lvl_mask, f"NAME_{lvl}"] + if lvl > 0: + gdf.loc[lvl_mask, "type"] = gdf.loc[lvl_mask, f"ENGTYPE_{lvl}"] + else: + gdf.loc[lvl_mask, "type"] = "country" + gdf["proper"] = True + gdf["level"] = 'gadm' + gdf.level.astype(str) + + elif src == 'lau': + gdf["CNTR_CODE"] = gdf.COMM_ID.str[:2].apply(eu_country_code_to_iso3) + gdf = gdf.rename( + columns={"COMM_ID": "id", "NAME_LATN": "name", "CNTR_CODE": "country_code"} + ) + gdf["type"] = "commune" + gdf["proper"] = gdf["TRUE_COMM_"] == "T" + gdf["level"] = "lau2" + + return gdf + + +def _drop_countries(gdf, config): + countries = [pycountry.countries.lookup(i).alpha_3 for i in config["scope"]["countries"]] + _not_in = set(gdf.country_code).difference(countries) + print(f"Removing {_not_in} as they are outside of study area.") + + return gdf[gdf.country_code.isin(countries)] + + +def _drop_geoms_completely_outside_study_area(gdf, config): + study_area = _study_area(config) + completely_in = gdf.intersects(study_area) + for row_index, row in gdf[~completely_in].iterrows(): + print( + "Removing {} ({}, country={}) as they are outside of study area." + .format(*row[["name", "level", "country_code"]]) + ) + gdf = gdf[completely_in] + + return gdf + + +def _drop_parts_of_geoms_completely_outside_study_area(gdf, config): + gdf = gdf.copy() + study_area = _study_area(config) + all_geoms = gdf.explode() + geoms_within_study_area = all_geoms.within(study_area) + geoms_partially_out = ~geoms_within_study_area.all(level=0) + + # work only with geoms which have some polygons within the study area and some out + geoms_to_update = geoms_within_study_area.mul(geoms_partially_out, level=0) + if gdf.loc[geoms_to_update.any(level=0)].empty: + return gdf + + for row_index, row in gdf.loc[geoms_to_update.any(level=0)].iterrows(): + print( + "Removing parts of {} ({}, country={}) as they are outside of study area." + .format(*row[["name", "level", "country_code"]]) + ) + # Unlike groupby, dissolve can only operate on columns, not multiindex levels + + new_geoms = ( + all_geoms[geoms_to_update] + .geometry + .map(buffer_if_necessary) + .groupby(level=0) + .apply(lambda x: x.unary_union) + .map(_to_multi_polygon) + ) + gdf.loc[new_geoms.index, 'geometry'] = new_geoms + + return gdf + + +def _to_multi_polygon(geometry): + if isinstance(geometry, dict): + geometry = shapely.geometry.shape(geometry) + if isinstance(geometry, shapely.geometry.polygon.Polygon): + return shapely.geometry.MultiPolygon(polygons=[geometry]) + else: + return geometry + + +def _study_area(config): + """ + Create a bounding box for the study area, and cut out holes for all defined + exclusion zones. For plotting purposes, exclusion zones and the bounding box are + defined in opposite orientations, see https://github.com/geopandas/geopandas/issues/951 + """ + holes = [ + ( + (exclusion_zone["x_max"], exclusion_zone["y_min"]), + (exclusion_zone["x_max"], exclusion_zone["y_max"]), + (exclusion_zone["x_min"], exclusion_zone["y_max"]), + (exclusion_zone["x_min"], exclusion_zone["y_min"]) + ) + for exclusion_zone in config["scope"].get("exclusion_zones", {}).values() + ] + + study_area = shapely.geometry.Polygon( + ((config["scope"]["bounds"]["x_min"], config["scope"]["bounds"]["y_min"]), + (config["scope"]["bounds"]["x_min"], config["scope"]["bounds"]["y_max"]), + (config["scope"]["bounds"]["x_max"], config["scope"]["bounds"]["y_max"]), + (config["scope"]["bounds"]["x_max"], config["scope"]["bounds"]["y_min"])), + holes=holes + ) + study_area = buffer_if_necessary(study_area) + + return study_area + + +if __name__ == "__main__": + normalise_admin_borders() diff --git a/src/gadm.py b/src/gadm.py deleted file mode 100644 index 8f6a412..0000000 --- a/src/gadm.py +++ /dev/null @@ -1,139 +0,0 @@ -"""Module to merge and preprocess GADM administrative borders.""" -from itertools import chain - -import click -import fiona -import fiona.transform -import geopandas as gpd -import shapely.geometry -from shapely.prepared import prep - -from src.utils import Config - -LAYER_NAME = "gadm{layer_id}" -SCHEMA = { - "properties": { - "country_code": "str", # id of the country to which the unit belongs - "id": "str", # a unique id of this unit - "name": "str", # the name of the unit, not necessarily unqique - "type": "str", # the type of the unit - "proper": "int" # flag indicating proper administrative unit (not the case for water bodies e.g.) - # FIXME should be bool, but GeoPandas doesn't support bool: https://github.com/geopandas/geopandas/issues/437 - # will be fixed here: https://github.com/geopandas/geopandas/pull/855 - }, - "geometry": "MultiPolygon" -} - - -@click.command() -@click.argument("path_to_countries", nargs=-1, metavar="COUNTRIES...") -@click.argument("max_layer_depths", type=click.INT) -@click.argument("path_to_output") -@click.argument("config", type=Config()) -def retrieve_administrative_borders(path_to_countries, max_layer_depths, path_to_output, config): - study_area = shapely.geometry.box( - minx=config["scope"]["bounds"]["x_min"], - maxx=config["scope"]["bounds"]["x_max"], - miny=config["scope"]["bounds"]["y_min"], - maxy=config["scope"]["bounds"]["y_max"] - ) - study_area = prep(study_area) # improves performance - with fiona.open(path_to_countries[0], "r", layer=0) as first_country: - src_crs = first_country.crs - src_driver = first_country.driver - for layer_id in range(max_layer_depths + 1): - print(f"Merging layer {layer_id}...") - with fiona.open(path_to_output, - "w", - crs=config["crs"], - schema=SCHEMA, - driver=src_driver, - layer=LAYER_NAME.format(layer_id=layer_id)) as merged_file: - merged_file.writerecords( - [_reproject(feature, src_crs, config["crs"]) for feature in chain( - *[_country_features(path_to_country, layer_id, study_area) - for path_to_country in path_to_countries]) - ] - ) - _test_id_uniqueness(path_to_output) - - -def _country_features(path_to_file, layer_id, study_area): - max_layer_id = int(sorted(fiona.listlayers(path_to_file))[-1][-1]) - layer_id = min(layer_id, max_layer_id) - layer_name = fiona.listlayers(path_to_file)[0][:-1] + str(layer_id) - with fiona.open(path_to_file, "r", layer=layer_name) as country_file: - for feature in filter(_in_study_area(study_area), country_file): - new_feature = {} - new_feature["properties"] = {} - new_feature["properties"]["country_code"] = feature["properties"]["GID_0"] - new_feature["properties"]["id"] = feature["properties"][f"GID_{layer_id}"] - new_feature["properties"]["name"] = feature["properties"][f"NAME_{layer_id}"] - new_feature["properties"]["type"] = ( - feature["properties"][f"ENGTYPE_{layer_id}"] if layer_id > 0 - else "country" - ) - new_feature["properties"]["proper"] = True - new_feature["geometry"] = _all_parts_in_study_area(feature, study_area) - yield new_feature - - -def _in_study_area(study_area): - def _in_study_area(feature): - unit = shapely.geometry.shape(feature["geometry"]) - if study_area.contains(unit) or study_area.intersects(unit): - return True - else: - print("Removing {} as it is outside of study area.".format(_feature_name(feature))) - return False - return _in_study_area - - -def _all_parts_in_study_area(feature, study_area): - unit = _to_multi_polygon(feature["geometry"]) - if not study_area.contains(unit): - print("Removing parts of {} outside of study area.".format(_feature_name(feature))) - new_unit = shapely.geometry.MultiPolygon([polygon for polygon in unit.geoms - if study_area.contains(polygon)]) - unit = new_unit - return shapely.geometry.mapping(unit) - - -def _feature_name(feature): - # brute force way of finding name - # the problem is that the name of the feature depends on the layer - for property_name in ["NAME_7", "NAME_6", "NAME_5", "NAME_4", "NAME_3", "NAME_2", "NAME_1", - "NAME_0"]: - try: - name = feature["properties"][property_name] - break - except KeyError: - pass # nothing to do here - return name - - -def _reproject(feature, src_crs, dst_crs): - feature["geometry"] = fiona.transform.transform_geom( - src_crs=src_crs, - dst_crs=dst_crs, - geom=feature["geometry"] - ) - return feature - - -def _to_multi_polygon(geometry): - if isinstance(geometry, dict): - geometry = shapely.geometry.shape(geometry) - if isinstance(geometry, shapely.geometry.polygon.Polygon): - return shapely.geometry.MultiPolygon(polygons=[geometry]) - else: - return geometry - - -def _test_id_uniqueness(path_to_file): - for layer_name in fiona.listlayers(path_to_file): - assert not gpd.read_file(path_to_file, layer=layer_name).id.duplicated().any() - - -if __name__ == "__main__": - retrieve_administrative_borders() diff --git a/src/lau.py b/src/lau.py index fe3f995..dc10c40 100644 --- a/src/lau.py +++ b/src/lau.py @@ -1,15 +1,9 @@ """Preprocessing of raw LAU2 data to bring it into normalised form.""" import click -import fiona -import fiona.transform -import shapely.geometry import geopandas as gpd import pandas as pd -import pycountry -from gadm import SCHEMA, _test_id_uniqueness -from nuts import _to_multi_polygon, _study_area -from conversion import eu_country_code_to_iso3 +from administrative_borders import _to_multi_polygon from utils import Config OUTPUT_DRIVER = "GeoJSON" @@ -32,23 +26,19 @@ def merge(path_to_shapes, path_to_attributes, path_to_output): attributes = gpd.read_file(path_to_attributes) attributes = pd.DataFrame(attributes) # to be able to remove the geo information del attributes["geometry"] - shapes.merge(attributes, on="COMM_ID", how="left").to_file(path_to_output, driver=OUTPUT_DRIVER) + all_shapes = shapes.merge(attributes, on="COMM_ID", how="left") + all_shapes_no_kosovo = _remove_kosovo(all_shapes) + all_shapes_no_kosovo.to_file(path_to_output, driver=OUTPUT_DRIVER) -@lau.command() -@click.argument("path_to_shapes") -@click.argument("path_to_output") -def identify(path_to_shapes, path_to_output): +def _remove_kosovo(shapes): """Identify and remove municipalities in Kosovo. Those municipalities must be removed as we do not have load data and pycountry cannot handle them at the moment (as of 2018, Kosovo does not have a standardised country code). """ - gpd.read_file(path_to_shapes).set_index("COMM_ID").drop(KOSOVO_MUNICIPALITIES).reset_index().to_file( - path_to_output, - driver=OUTPUT_DRIVER - ) + return shapes.set_index("COMM_ID").drop(KOSOVO_MUNICIPALITIES).reset_index() @lau.command() @@ -67,61 +57,5 @@ def degurba(path_to_lau, path_to_degurba, path_to_output): })[["id", "urbanisation_class"]].to_csv(path_to_output, header=True, index=False) -@lau.command() -@click.argument("path_to_lau") -@click.argument("path_to_output") -@click.argument("config", type=Config()) -def normalise(path_to_lau, path_to_output, config): - """Normalises raw LAU2 data.""" - with fiona.open(path_to_lau, "r") as lau_file, fiona.open(path_to_output, - "w", - crs=config["crs"], - schema=SCHEMA, - driver=OUTPUT_DRIVER) as result_file: - result_file.writerecords(_layer_features(lau_file, config)) - _test_id_uniqueness(path_to_output) - - -def _layer_features(lau_file, config): - for feature in filter(lambda feature: _in_study_area(config, feature), lau_file): - new_feature = {} - new_feature["properties"] = {} - new_feature["properties"]["country_code"] = eu_country_code_to_iso3(feature["properties"]["COMM_ID"][:2]) - new_feature["properties"]["id"] = feature["properties"]["COMM_ID"] - new_feature["properties"]["name"] = feature["properties"]["NAME_LATN"] - new_feature["properties"]["type"] = "commune" - new_feature["properties"]["proper"] = True if feature["properties"]["TRUE_COMM_"] == "T" else False - new_feature["geometry"] = _all_parts_in_study_area_and_crs(feature, lau_file.crs, config) - yield new_feature - - -def _all_parts_in_study_area_and_crs(feature, src_crs, config): - study_area = _study_area(config) - unit = _to_multi_polygon(feature["geometry"]) - if not study_area.contains(unit): - print("Removing parts of {} outside of study area.".format(feature["properties"]["COMM_ID"])) - new_unit = shapely.geometry.MultiPolygon([polygon for polygon in unit.geoms - if study_area.contains(polygon)]) - unit = new_unit - geometry = shapely.geometry.mapping(unit) - return fiona.transform.transform_geom( - src_crs=src_crs, - dst_crs=config["crs"], - geom=geometry - ) - - -def _in_study_area(config, feature): - study_area = _study_area(config) - countries = [pycountry.countries.lookup(country) for country in config["scope"]["countries"]] - unit = shapely.geometry.shape(feature["geometry"]) - country = pycountry.countries.lookup(eu_country_code_to_iso3(feature["properties"]["COMM_ID"][:2])) - if (country in countries) and (study_area.contains(unit) or study_area.intersects(unit)): - return True - else: - print("Removing {} as it is outside of study area.".format(feature["properties"]["COMM_ID"])) - return False - - if __name__ == "__main__": lau() diff --git a/src/nuts.py b/src/nuts.py deleted file mode 100644 index fdd35d4..0000000 --- a/src/nuts.py +++ /dev/null @@ -1,137 +0,0 @@ -"""Preprocessing of raw NUTS data to bring it into normalised form.""" -import click -import fiona -import fiona.transform -import shapely.geometry -import geopandas as gpd -import pandas as pd -import pycountry - -from gadm import SCHEMA, _to_multi_polygon, _test_id_uniqueness -from conversion import eu_country_code_to_iso3 -from utils import Config, buffer_if_necessary - -OUTPUT_DRIVER = "GPKG" -LAYER_NAME = "nuts{layer_id}" - - -@click.group() -def nuts(): - pass - - -@nuts.command() -@click.argument("path_to_shapes") -@click.argument("path_to_output") -def to_multipolygon(path_to_shapes, path_to_output): - """Map NUTS shapes to multipolygon.""" - shapes = gpd.read_file(path_to_shapes) - shapes.geometry = shapes.geometry.map(buffer_if_necessary).map(_to_multi_polygon) - shapes.drop('FID', axis=1).to_file( - path_to_output, driver=OUTPUT_DRIVER - ) - - -@nuts.command() -@click.argument("path_to_nuts") -@click.argument("path_to_output") -@click.argument("config", type=Config()) -def normalise(path_to_nuts, path_to_output, config): - """Normalises raw NUTS data. - - Raw data contains all NUTS layers in one layer of one shapefile. The output - of this function corresponds to the form the data is used in this analysis, - where each geographical layer is stored in one layer of a GeoPackage. - """ - with fiona.open(path_to_nuts, "r") as nuts_file: - for layer_id in range(4): - print("Building layer {}...".format(layer_id)) - _write_layer(nuts_file, config, path_to_output, layer_id) - _test_id_uniqueness(path_to_output) - - -def _write_layer(nuts_file, config, path_to_output, layer_id): - with fiona.open(path_to_output, - "w", - crs=config["crs"], - schema=SCHEMA, - driver=OUTPUT_DRIVER, - layer=LAYER_NAME.format(layer_id=layer_id)) as result_file: - result_file.writerecords(_layer_features(nuts_file, config, layer_id)) - - -def _layer_features(nuts_file, config, layer_id): - for feature in filter(_in_layer_and_in_study_area(layer_id, config), nuts_file): - new_feature = {} - new_feature["properties"] = {} - new_feature["properties"]["country_code"] = eu_country_code_to_iso3(feature["properties"]["NUTS_ID"][:2]) - new_feature["properties"]["id"] = feature["properties"]["NUTS_ID"] - new_feature["properties"]["name"] = feature["properties"]["NUTS_NAME"] - new_feature["properties"]["type"] = "country" if layer_id == 0 else None - new_feature["properties"]["proper"] = True - new_feature["geometry"] = _all_parts_in_study_area_and_crs(feature, nuts_file.crs, config) - if layer_id == 0: - new_feature = _fix_country_feature(new_feature) - yield new_feature - - -def _fix_country_feature(feature): - # * IDs should have three letters instead of two - # * many country names are broken or missing - feature["properties"]["id"] = eu_country_code_to_iso3(feature["properties"]["id"]) - feature["properties"]["name"] = pycountry.countries.lookup(feature["properties"]["id"]).name - return feature - - -def _all_parts_in_study_area_and_crs(feature, src_crs, config): - study_area = _study_area(config) - unit = _to_multi_polygon(feature["geometry"]) - if not study_area.contains(unit): - print("Removing parts of {} outside of study area.".format(feature["properties"]["NUTS_ID"])) - new_unit = shapely.geometry.MultiPolygon([polygon for polygon in unit.geoms - if study_area.contains(polygon)]) - unit = new_unit - geometry = shapely.geometry.mapping(unit) - return fiona.transform.transform_geom( - src_crs=src_crs, - dst_crs=config["crs"], - geom=geometry - ) - - -def _in_layer_and_in_study_area(layer_id, config): - def _in_layer_and_in_study_area(feature): - return _in_layer(layer_id, feature) and _in_study_area(config, feature) - return _in_layer_and_in_study_area - - -def _in_layer(layer_id, feature): - if feature["properties"]["LEVL_CODE"] == layer_id: - return True - else: - return False - - -def _study_area(config): - return shapely.geometry.box( - minx=config["scope"]["bounds"]["x_min"], - maxx=config["scope"]["bounds"]["x_max"], - miny=config["scope"]["bounds"]["y_min"], - maxy=config["scope"]["bounds"]["y_max"] - ) - - -def _in_study_area(config, feature): - study_area = _study_area(config) - countries = [pycountry.countries.lookup(country) for country in config["scope"]["countries"]] - unit = shapely.geometry.shape(feature["geometry"]) - country = pycountry.countries.lookup(eu_country_code_to_iso3(feature["properties"]["NUTS_ID"][:2])) - if (country in countries) and (study_area.contains(unit) or study_area.intersects(unit)): - return True - else: - print("Removing {} as it is outside of study area.".format(feature["properties"]["NUTS_ID"])) - return False - - -if __name__ == "__main__": - nuts() diff --git a/src/units.py b/src/units.py index 372ac78..d46c077 100644 --- a/src/units.py +++ b/src/units.py @@ -1,8 +1,6 @@ -"""Remixes NUTS, LAU, and GADM data to form the units of the analysis.""" import click import pandas as pd import geopandas as gpd -import fiona import pycountry from utils import Config @@ -11,15 +9,13 @@ @click.command() -@click.argument("path_to_nuts") -@click.argument("path_to_lau2") -@click.argument("path_to_gadm") +@click.argument("path_to_borders") @click.argument("path_to_output") @click.argument("layer_name") @click.argument("config", type=Config()) -def remix_units(path_to_nuts, path_to_lau2, path_to_gadm, path_to_output, layer_name, config): +def remix_units(path_to_borders, path_to_output, layer_name, config): """Remixes NUTS, LAU, and GADM data to form the units of the analysis.""" - source_layers = _read_source_layers(path_to_nuts, path_to_lau2, path_to_gadm) + source_layers = _read_source_layers(path_to_borders, config["layers"][layer_name]) _validate_source_layers(source_layers) _validate_layer_config(config, layer_name) layer = _build_layer(config["layers"][layer_name], source_layers) @@ -29,16 +25,11 @@ def remix_units(path_to_nuts, path_to_lau2, path_to_gadm, path_to_output, layer_ _write_layer(layer, path_to_output) -def _read_source_layers(path_to_nuts, path_to_lau2, path_to_gadm): +def _read_source_layers(path_to_borders, layers): source_layers = { - layer_name: gpd.read_file(path_to_nuts, layer=layer_name) - for layer_name in fiona.listlayers(path_to_nuts) + layer_name: gpd.read_file(path_to_borders, layer=layer_name) + for layer_name in set(layers.values()) } - source_layers["lau2"] = gpd.read_file(path_to_lau2) - source_layers.update({ - layer_name: gpd.read_file(path_to_gadm, layer=layer_name) - for layer_name in fiona.listlayers(path_to_gadm) - }) return source_layers diff --git a/src/utils.py b/src/utils.py index 9e98fc5..9b6465d 100644 --- a/src/utils.py +++ b/src/utils.py @@ -98,7 +98,11 @@ def buffer_if_necessary(shape): Following the advice given here: https://github.com/Toblerity/Shapely/issues/344 """ - if not shape.is_valid: - shape = shape.buffer(0.0) - assert shape.is_valid - return shape \ No newline at end of file + if shape.is_valid: + return shape + + new_shape = shape.buffer(0.0) + assert new_shape.is_valid + assert np.isclose(new_shape.area, shape.area, rtol=1e-5) + + return new_shape diff --git a/tests/test_administrative_borders.py b/tests/test_administrative_borders.py index 0904ffb..78d1dbb 100644 --- a/tests/test_administrative_borders.py +++ b/tests/test_administrative_borders.py @@ -1,16 +1,161 @@ +import os import yaml +from pathlib import Path import geopandas as gpd import pytest +import shapely.geometry +import pycountry +import fiona -# Loading the 20M file for its smaller size (1M used in actual workflow) -URL_NUTS = "https://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/shp/NUTS_RG_20M_{}_4326.shp.zip" +from src.administrative_borders import _study_area, _to_multi_polygon, _drop_countries, _drop_geoms_completely_outside_study_area, _drop_parts_of_geoms_completely_outside_study_area, _update_features + +# Loading the 60M file for its smaller size (1M used in actual workflow) +URL_NUTS = "https://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/geojson/NUTS_RG_60M_{}_4326.geojson" +ROOT_DIR = Path(os.path.abspath(__file__)).parent.parent with open('config/schema.yaml', 'r') as src: - config_schema = yaml.safe_load(src) + CONFIG_SCHEMA = yaml.safe_load(src) + +with open('config/default.yaml', 'r') as src: + CONFIG_DEFAULT = yaml.safe_load(src) -@pytest.mark.parametrize("year", config_schema["properties"]["parameters"]["properties"]["nuts-year"]["enum"]) + +@pytest.mark.parametrize("year", CONFIG_SCHEMA["properties"]["parameters"]["properties"]["nuts-year"]["enum"]) def test_nuts_years(year): gdf = gpd.read_file(URL_NUTS.format(year)) - cols = ['FID', 'NUTS_ID', 'LEVL_CODE', 'NUTS_NAME'] + cols = ['id', 'FID', 'LEVL_CODE', 'NUTS_NAME'] assert all(i in gdf.columns for i in cols) + +class TestStudyArea(): + def config(self, exclusions=0): + _config = { + "scope": { + 'bounds': { + 'x_min': 0, + 'x_max': 1, + 'y_min': 50, + 'y_max': 51, + } + } + } + if exclusions > 0: + _config["scope"]["exclusion_zones"] = { + 'foo': { + 'x_min': 0.5, + 'x_max': 0.6, + 'y_min': 50.2, + 'y_max': 50.5, + } + } + if exclusions == 2: + _config["scope"]["exclusion_zones"]["bar"] = { + 'x_min': 0, + 'x_max': 0.1, + 'y_min': 50, + 'y_max': 50.1, + } + return _config + + def test_bounding_box(self): + area = _study_area(self.config()) + assert area.bounds == (0.0, 50.0, 1.0, 51.0) + + + @pytest.mark.parametrize( + "exclusions,not_in,is_in", + [(1, (0.55, 50.3), (0.7, 50.7)), (2, (0.05, 50.05), (0.05, 50.3))] + ) + def test_exclusion_zone(self, exclusions, not_in, is_in): + no_exclusions = _study_area(self.config()) + with_exclusions = _study_area(self.config(exclusions)) + + assert no_exclusions.area > with_exclusions.area + assert with_exclusions.bounds == no_exclusions.bounds + + assert not shapely.geometry.Point(not_in).within(with_exclusions) + assert shapely.geometry.Point(is_in).within(with_exclusions) + + +class TestGeomManipulation(): + CONFIG = { + "scope": { + 'bounds': { # IRE and GBR, not including shetland and some of cornwall + 'x_min': -10, + 'x_max': 1, + 'y_min': 51.11, + 'y_max': 60.1, + } + } + } + NOT_DROPPED = ['IRL', 'North West (England)'] + FULL_DROPPED = [ + "Removing France (nuts0, country=FRA) as they are outside of study area", + ] + PARTIAL_DROPPED = [ + "Removing parts of Highlands and Islands (nuts2, country=GBR)", + "Removing parts of United Kingdom (nuts0, country=GBR)", + ] + + @pytest.fixture("class") + def europe_gdf(self): + gdf = gpd.read_file( + URL_NUTS.format(CONFIG_DEFAULT["parameters"]["nuts-year"]) + ) + return _update_features(gdf, "nuts") + + def test_multipolygon(self, europe_gdf): + assert not all([isinstance(i, shapely.geometry.Polygon) for i in europe_gdf.geometry]) + + polys = europe_gdf.geometry.map(_to_multi_polygon) + + assert all([isinstance(i, shapely.geometry.MultiPolygon) for i in polys]) + assert all(polys.area == europe_gdf.area) + + def test_drop_countries(self, europe_gdf): + config = { + "scope": {"countries": ["Germany", "France", "Greece"]} + } + + assert set(europe_gdf.country_code) != set( + [pycountry.countries.lookup(i).alpha_3 for i in config["scope"]["countries"]] + ) + + gdf = _drop_countries(europe_gdf, config) + + assert set(gdf.country_code) == set( + [pycountry.countries.lookup(i).alpha_3 for i in config["scope"]["countries"]] + ) + + def test_drop_geoms(self, capsys, europe_gdf): + gdf = _drop_geoms_completely_outside_study_area(europe_gdf, self.CONFIG) + out, err = capsys.readouterr() + + for i in self.NOT_DROPPED + self.PARTIAL_DROPPED: + assert i not in out + + for i in self.FULL_DROPPED: + assert i in out + assert sorted(gdf.country_code.unique()) == sorted(['IRL', 'GBR']) + + def test_drop_parts_of_geoms(self, capsys, europe_gdf): + gdf = _drop_parts_of_geoms_completely_outside_study_area(europe_gdf, self.CONFIG) + out, err = capsys.readouterr() + for i in self.NOT_DROPPED + self.FULL_DROPPED: + assert i not in out + for i in self.PARTIAL_DROPPED: + assert i in out + assert ( + europe_gdf[europe_gdf.name == 'Highlands and Islands'].area + > gdf[gdf.name == 'Highlands and Islands'].area + ).all() + +class TestFinalBorders(): + PATH_TO_BORDERS = os.path.join(ROOT_DIR, "build", "administrative-borders.gpkg") + + def test_administrative_border_layers(self): + layers = fiona.listlayers(self.PATH_TO_BORDERS) + default_layers = set( + v for countries in CONFIG_DEFAULT['layers'].values() for k, v in countries.items() + ) + assert len(default_layers.difference(layers)) == 0 diff --git a/tests/test_sonnendach_reference.py b/tests/test_sonnendach_reference.py index ec78a08..e45aa01 100644 --- a/tests/test_sonnendach_reference.py +++ b/tests/test_sonnendach_reference.py @@ -14,7 +14,7 @@ PATH_TO_CATEGORIES = ROOT_DIR / "build" / "technically-eligible-land.tif" PATH_TO_AREAS = ROOT_DIR / "build" / "technically-eligible-area-km2.tif" PATH_TO_ENERGY_YIELD = ROOT_DIR / "build" / "technically-eligible-electricity-yield-pv-prio-twh.tif" -PATH_TO_NUTS = ROOT_DIR / "build" / "administrative-borders-nuts.gpkg" +PATH_TO_NUTS = ROOT_DIR / "build" / "administrative-borders.gpkg" PATH_TO_SONNENDACH_AREA_ESTIMATE = ROOT_DIR / "data" / "automatic" / "sonnendach" /\ "total-rooftop-area-km2.txt" PATH_TO_SONNENDACH_YIELD_ESTIMATE = ROOT_DIR / "data" / "automatic" / "sonnendach" /\