From aafcf9b13e036b0bd7ce2c865a67887771a173b8 Mon Sep 17 00:00:00 2001 From: vikineema Date: Tue, 21 Nov 2023 13:23:53 +0300 Subject: [PATCH] filter large polygons before grouping by wofs_ls regions --- deafrica_waterbodies/cli/generate_polygons.py | 29 +++++++++---------- tests/test_generate_polygons_cli.py | 2 +- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/deafrica_waterbodies/cli/generate_polygons.py b/deafrica_waterbodies/cli/generate_polygons.py index cbc426d..64c4b4c 100644 --- a/deafrica_waterbodies/cli/generate_polygons.py +++ b/deafrica_waterbodies/cli/generate_polygons.py @@ -102,12 +102,6 @@ show_default=True, help="Maximum area in m2 of the waterbody polygons to be included.", ) -@click.option( - "--length-threshold-km", - default=150, - show_default=True, - help="Length threshold in kilometers by which to filter out large polygons.", -) @click.option( "--output-directory", type=str, @@ -125,10 +119,16 @@ help="File name for the final output", ) @click.option( - "--split-by-wofs-ls-regions/--no-split-by-wofs-ls-regions", + "--group-by-wofs-ls-regions/--not-group-by-wofs-ls-regions", default=True, help="Group waterbody polygons by wofs_ls regions.", ) +@click.option( + "--length-threshold-km", + default=150, + show_default=True, + help="Length threshold in kilometers by which to filter out large polygons before grouping polygons by wofs_ls region.", +) def generate_polygons( verbose, aoi_vector_file, @@ -142,11 +142,11 @@ def generate_polygons( overwrite, min_polygon_size, max_polygon_size, - length_threshold_km, output_directory, timeseries_directory, file_name_prefix, - split_by_wofs_ls_regions, + group_by_wofs_ls_regions, + length_threshold_km, ): """ Generate water body polygons from WOfS All Time Summary data @@ -196,7 +196,7 @@ def generate_polygons( fs.mkdirs(final_outputs_dir, exist_ok=True) _log.info(f"Created directory {final_outputs_dir}") - if split_by_wofs_ls_regions: + if group_by_wofs_ls_regions: if not check_dir_exists(polygons_split_by_region_dir): fs.mkdirs(polygons_split_by_region_dir, exist_ok=True) _log.info(f"Created directory {polygons_split_by_region_dir}") @@ -333,10 +333,6 @@ def generate_polygons( waterbodies_gdf = add_polygon_properties(polygons=waterbodies_gdf) - waterbodies_gdf = filter_by_length( - polygons_gdf=waterbodies_gdf, length_threshold_km=length_threshold_km - ) - waterbodies_gdf = add_timeseries_attribute( polygons=waterbodies_gdf, timeseries_directory=timeseries_directory, @@ -355,7 +351,10 @@ def generate_polygons( waterbodies_gdf_4326.to_parquet(os.path.join(final_outputs_dir, f"{file_name_prefix}.parquet")) - if split_by_wofs_ls_regions: + if group_by_wofs_ls_regions: + waterbodies_gdf_4326 = filter_by_length(polygons_gdf=waterbodies_gdf_4326, + length_threshold_km=length_threshold_km) + split_by_region_fps = split_polygons_by_region( # noqa F841 polygons_gdf=waterbodies_gdf_4326, output_directory=polygons_split_by_region_dir, diff --git a/tests/test_generate_polygons_cli.py b/tests/test_generate_polygons_cli.py index 42ad3e9..6606152 100644 --- a/tests/test_generate_polygons_cli.py +++ b/tests/test_generate_polygons_cli.py @@ -47,7 +47,7 @@ def test_generate_polygons(runner, capsys: pytest.CaptureFixture): f"--max-polygon-size={max_polygon_size}", f"--length-threshold-km={length_threshold_km}", "--overwrite", - "--no-split-by-wofs-ls-regions", + "--not-group-by-wofs-ls-regions", f"--timeseries-directory={timeseries_directory}", f"--file-name-prefix={file_name_prefix}", f"--output-directory={output_directory}",