From 94498e92e56c2c1947db891881e9424f072dd6b1 Mon Sep 17 00:00:00 2001
From: hboisgon <helene.boisgontier@deltares.nl>
Date: Fri, 18 Oct 2024 14:00:00 +0800
Subject: [PATCH] update setup_allocation_areas workflow

---
 examples/update_model_water_demand.ipynb |  38 +++-
 examples/wflow_update_water_demand.yml   |   4 +-
 hydromt_wflow/wflow.py                   |  79 +++++---
 hydromt_wflow/workflows/demand.py        | 232 ++++++++++-------------
 tests/test_model_methods.py              |  11 +-
 5 files changed, 190 insertions(+), 174 deletions(-)

diff --git a/examples/update_model_water_demand.ipynb b/examples/update_model_water_demand.ipynb
index 5b379dee..9f829609 100644
--- a/examples/update_model_water_demand.ipynb
+++ b/examples/update_model_water_demand.ipynb
@@ -111,7 +111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -121,7 +121,7 @@
     "\n",
     "mod = WflowModel(\n",
     "    root=\"wflow_piave_water_demand\",\n",
-    "    mode=\"r\",\n",
+    "    mode=\"r+\",\n",
     "    data_libs=[\n",
     "        \"artifact_data\",\n",
     "        \"https://github.com/Deltares/hydromt_wflow/releases/download/v0.5.0/wflow_artifacts.yml\"\n",
@@ -371,7 +371,9 @@
    "source": [
     "#### Water allocation regions\n",
     "\n",
-    "To define regions where water can be shared and allocated, a merge between catchment and administrative boundaries is computed. These result in regions where wflow can allocate available water. No water allocation is supported between these regions. To give an impression on how these regions look like, see the following figures for an example."
+    "To define regions where water can be shared and allocated, a merge between catchment and water areas or administrative boundaries is computed. These result in regions where wflow can allocate available water. No water allocation is supported between these regions. To give an impression on how these regions look like, see the following figures for an example.\n",
+    "\n",
+    "Note: Water areas or regions are generally defined by sub-river-basins within a Country. In order to mimick reality, it is advisable to avoid cross-Country-border abstractions. Whenever information is available, it is strongly recommended to align the water regions with the actual areas managed by water management authorities, such as regional water boards."
    ]
   },
   {
@@ -388,7 +390,35 @@
     "ax.set_title(\"Allocation areas\")\n",
     "\n",
     "mod.grid[\"allocation_areas\"].raster.mask_nodata().plot(ax=ax, add_labels=False)\n",
-    "admin.plot(ax=ax, facecolor=\"none\", edgecolor=\"black\")\n",
+    "admin.plot(ax=ax, facecolor=\"none\", edgecolor=\"red\")\n",
+    "mod.geoms[\"rivers\"].plot(ax=ax)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "When merging the wflow basins with the water regions, some small subbasins can be created that do not contain river cells. These small basins will be merged to larger basins. When merging, you can decide if you prefer to merge with the nearest downstream basin, or with any basin in the same water region that does contain river using the ``priotity_basins`` argument. In the previous map, we gave priority to the basins, here is the results if priority is given to water regions instead:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create allocations areas \n",
+    "mod.setup_allocation_areas(\n",
+    "    waterareas_fn=\"gadm_level2\",\n",
+    "    priority_basins=False,\n",
+    ")\n",
+    "\n",
+    "fig, ax = plt.subplots(1)\n",
+    "\n",
+    "ax.set_title(\"Allocation areas\")\n",
+    "\n",
+    "mod.grid[\"allocation_areas\"].raster.mask_nodata().plot(ax=ax, add_labels=False, cmap=\"viridis_r\")\n",
+    "admin.plot(ax=ax, facecolor=\"none\", edgecolor=\"red\")\n",
     "mod.geoms[\"rivers\"].plot(ax=ax)"
    ]
   },
diff --git a/examples/wflow_update_water_demand.yml b/examples/wflow_update_water_demand.yml
index 66b5a6c7..81e2cc0c 100644
--- a/examples/wflow_update_water_demand.yml
+++ b/examples/wflow_update_water_demand.yml
@@ -1,6 +1,6 @@
 setup_allocation_areas:
-  min_area: 30
-  admin_bounds_fn: gadm_level2
+  waterareas_fn: gadm_level2
+  priority_basins: True
 
 setup_allocation_surfacewaterfrac:
   gwfrac_fn: lisflood_gwfrac
diff --git a/hydromt_wflow/wflow.py b/hydromt_wflow/wflow.py
index e31de358..d5deeefc 100644
--- a/hydromt_wflow/wflow.py
+++ b/hydromt_wflow/wflow.py
@@ -1029,7 +1029,7 @@ def setup_laimaps(
                 logger=self.logger,
             )
             # Save to csv
-            if isinstance(lulc_fn, str):
+            if isinstance(lulc_fn, str) and not isfile(lulc_fn):
                 df_fn = f"lai_per_lulc_{lulc_fn}.csv"
             else:
                 df_fn = "lai_per_lulc.csv"
@@ -3347,55 +3347,58 @@ def setup_1dmodel_connection(
 
     def setup_allocation_areas(
         self,
-        admin_bounds_fn: Union[str, gpd.GeoDataFrame] | None = None,
-        min_area: float | int = 0,
+        waterareas_fn: Union[str, gpd.GeoDataFrame],
+        priority_basins: bool = True,
     ):
         """Create water demand allocation areas.
 
         The areas are based on the wflow model basins (at model resolution), the
-        wflow model rivers and optional provided administrative boundaries.
+        wflow model rivers and water areas or regions for allocation.
+
+        Water regions are generally defined by sub-river-basins within a Country. In
+        order to mimick reality, it is advisable to avoid cross-Country-border
+        abstractions. Whenever information is available, it is strongly recommended to
+        align the water regions with the actual areas managed by water management
+        authorities, such as regional water boards.
 
         The allocation area will be an intersection the the wflow model basins and the
-        administrative boundaries (if provided). Small area that do not contain any
-        river cells will be merged with their larger bordering neighbors. The same
-        is done for those areas (even if they contain river cells) that fall under the
-        `min_area` threshold.
+        water areas. For areas that do not contain river cells after intersection with
+        the water areas, the priority_basins flag can be used to decide if these basins
+        should be merged with the closest downstream basin or with any large enough
+        basin in the same water area.
 
         Parameters
         ----------
-        admin_bounds_fn : Union[str, gpd.GeoDataFrame] | None, optional
+        waterareas_fn : Union[str, gpd.GeoDataFrame]
             Administrative boundaries in geopandas GeoDataFrame format, this could be
-            e.g. the boundaries of sovereign nations, by default None
-        min_area : float | int, optional
-            The minimum area in square kilometers the allocation area is ought to be.
-            A value of 30 sqkm for most cases is adequate, by default 0
+            e.g. water management areas by water boards or the administrative
+            boundaries of countries.
+        priority_basins : bool, optional
+            If True, merge the basins with the closest downstream basin, else merge
+            with any large enough basin in the same water area, by default True.
         """
         self.logger.info("Preparing water demand allocation map.")
-        # Will be fixes but for know this is done like this
-        # TODO fix in the future
-        admin_bounds = None
-        if admin_bounds_fn is not None:
-            admin_bounds = self.data_catalog.get_geodataframe(
-                admin_bounds_fn,
-                geom=self.region,
-            )
-            # Add this identifier for usage in the workflow
-            admin_bounds["admin_id"] = range(len(admin_bounds))
+
+        # Read the data
+        waterareas = self.data_catalog.get_geodataframe(
+            waterareas_fn,
+            geom=self.region,
+        )
 
         # Create the allocation grid
-        alloc = workflows.demand.allocation_areas(
-            da_like=self.grid[self._MAPS["rivmsk"]],
-            min_area=min_area,
-            admin_bounds=admin_bounds,
-            basins=self.geoms["basins"],
+        da_alloc, gdf_alloc = workflows.demand.allocation_areas(
+            ds_like=self.grid,
+            waterareas=waterareas,
+            basins=self.basins,
+            priority_basins=priority_basins,
         )
-        self.set_grid(alloc, name="allocation_areas")
+        self.set_grid(da_alloc, name="allocation_areas")
 
         # Update the settings toml
         self.set_config("input.vertical.allocation.areas", "allocation_areas")
 
         # Add alloc to geoms
-        self.set_geoms(alloc.raster.vectorize(), name="allocation_areas")
+        self.set_geoms(gdf_alloc, name="allocation_areas")
 
     def setup_allocation_surfacewaterfrac(
         self,
@@ -3404,6 +3407,7 @@ def setup_allocation_surfacewaterfrac(
         gwbodies_fn: Optional[Union[str, xr.DataArray]] = None,
         ncfrac_fn: Optional[Union[str, xr.DataArray]] = None,
         interpolate_nodata: bool = False,
+        mask_and_scale_gwfrac: bool = True,
     ):
         """Create the fraction of water allocated from surface water.
 
@@ -3443,6 +3447,11 @@ def setup_allocation_surfacewaterfrac(
             If True, nodata values in the resulting frac_sw_used map will be linearly
             interpolated. Else a default value of 1 will be used for nodata values
             (default).
+        mask_and_scale_gwfrac : bool, optional
+            If True, gwfrac will be masked for areas with no groundwater bodies. To keep
+            the average gwfrac used over waterareas similar after the masking, gwfrac
+            for areas with groundwater bodies can increase. If False, gwfrac will be
+            used as is. By default True.
         """
         self.logger.info("Preparing surface water fraction map.")
         # Load the data
@@ -3493,6 +3502,7 @@ def setup_allocation_surfacewaterfrac(
             gwbodies=gwbodies,
             ncfrac=ncfrac,
             interpolate=interpolate_nodata,
+            mask_and_scale_gwfrac=mask_and_scale_gwfrac,
         )
 
         # Update the settings toml
@@ -3558,6 +3568,15 @@ def setup_domestic_demand(
             buffer=2,
             variables=["dom_gross", "dom_net"],
         )
+        # Increase the buffer if original resolution is provided
+        if domestic_fn_original_res is not None:
+            buffer = np.ceil(domestic_fn_original_res / abs(domestic_raw.raster.res[0]))
+            domestic_raw = self.data_catalog.get_rasterdataset(
+                domestic_fn,
+                geom=self.region,
+                buffer=buffer,
+                variables=["dom_gross", "dom_net"],
+            )
         # Check if data is time dependent
         if "time" in domestic_raw.coords:
             # Check that this is indeed cyclic data
diff --git a/hydromt_wflow/workflows/demand.py b/hydromt_wflow/workflows/demand.py
index f709497d..3f03938d 100644
--- a/hydromt_wflow/workflows/demand.py
+++ b/hydromt_wflow/workflows/demand.py
@@ -1,7 +1,7 @@
 """Workflow for water demand."""
 
 import logging
-from typing import List, Optional
+from typing import List, Optional, Tuple
 
 import dask
 import geopandas as gpd
@@ -9,7 +9,6 @@
 import xarray as xr
 from hydromt import raster
 from hydromt.workflows.grid import grid_from_constant
-from scipy.ndimage import convolve
 
 logger = logging.getLogger(__name__)
 
@@ -254,138 +253,100 @@ def other_demand(
 
 
 def allocation_areas(
-    da_like: xr.DataArray,
-    min_area: float | int,
-    admin_bounds: gpd.GeoDataFrame,
+    ds_like: xr.Dataset,
+    waterareas: gpd.GeoDataFrame,
     basins: gpd.GeoDataFrame,
-) -> xr.DataArray:
+    priority_basins: bool = True,
+) -> Tuple[xr.DataArray, gpd.GeoDataFrame]:
     """Create water allocation area.
 
     Based on current wflow model domain and resolution and making use of
-    the model basins and optional administrative boundaries.
+    the model basins and administrative boundaries.
 
     Parameters
     ----------
-    da_like : xr.DataArray
-        A grid covering the wflow model domain.
-    min_area : float | int
-        The minimum area an allocation area should have.
-    admin_bounds : gpd.GeoDataFrame
+    ds_like : xr.DataArray
+        A grid covering the wflow model domain and the rivers.
+
+        * Required variables: ['wflow_river', 'wflow_subcatch']
+    waterareas : gpd.GeoDataFrame
         Administrative boundaries, e.g. sovereign nations.
     basins : gpd.GeoDataFrame
         The wflow model basins.
+    priority_basins : bool
+        For basins that do not contain river cells after intersection with the admin
+        boundaries, the priority_basins flag can be used to decide if these basins
+        should be merged with the closest downstream basin (True, default) or with any
+        large enough basin in the same administrative area (False).
 
     Returns
     -------
     xr.DataArray
         The water demand allocation areas.
+    gpd.GeoDataFrame
+        The water demand allocation areas as geodataframe.
     """
-    # Set variables
-    nodata = -9999
-    # Add a unique identifier as a means for dissolving later on, bit pro forma here
-    sub_basins = basins.copy()
-    sub_basins["uid"] = range(len(sub_basins))
-
-    # Split based on administrative boundaries
-    if admin_bounds is not None:
-        sub_basins = basins.overlay(
-            admin_bounds,
-            how="union",
-        )
-        sub_basins = sub_basins[~sub_basins["value"].isna()]
-
-        # Remove unneccessary columns
-        cols = sub_basins.columns.drop(["value", "geometry", "admin_id"]).tolist()
-        sub_basins.drop(cols, axis=1, inplace=True)
-        # Use this uid to dissolve on later
-        sub_basins["uid"] = range(len(sub_basins))
-
-        # Dissolve cut pieces back
-        for _, row in sub_basins.iterrows():
-            if not str(row.admin_id).lower() == "nan":
-                continue
-            touched = sub_basins[sub_basins.touches(row.geometry)]
-            uid = touched[touched["value"] == row.value].uid.values[0]
-            sub_basins.loc[sub_basins["uid"] == row.uid, "uid"] = uid
-        sub_basins = sub_basins.dissolve("uid", sort=False, as_index=False)
-
-    # Set the contain flag per geom
-    # TODO figure out the code below for more precise allocation areas
-    sub_basins = sub_basins.explode(index_parts=False, ignore_index=True)
-    sub_basins["uid"] = range(len(sub_basins))
-    admin_bounds = None
-
-    # Setup the allocation grid based on exploded geometries
-    # TODO get exploded geometries from something raster based
-    alloc = full(da_like, fill_value=nodata, dtype=np.int32, name="allocation_areas")
-    alloc = alloc.raster.rasterize(sub_basins, col_name="uid", nodata=nodata)
-
-    # Get the areas per exploded area
-    alloc_area = alloc.to_dataset()
-    alloc_area["area"] = da_like.raster.area_grid()
-    area = alloc_area.groupby("uid").sum().area
-    del alloc_area
-    # To km2
-    area = area.drop_sel(uid=nodata) / 1e6
-
-    _count = 0
-    old_no_riv = None
-
-    # Solve the area iteratively
-    while True:
-        # Break if cannot be solved
-        if _count == 100:
-            break
-
-        # Define the surround matrix for convolution
-        kernel = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])
-
-        # Get ID's of basins containing a river and those that do not
-        riv = np.setdiff1d(
-            np.unique(alloc.where(da_like == 1, nodata)),
-            [-9999],
-        )
-        no_riv = np.setdiff1d(area.uid.values, riv)
-
-        # Also look at minimum areas
-        area_riv = area.sel(uid=riv)
-        area_mask = area_riv < min_area
-        no_riv = np.append(no_riv, area_riv.uid[area_mask].values)
-
-        # Solved, as there are not more areas with no river, so break
-        if no_riv.size == 0:
-            break
-
-        # Solve with corners included, as the areas remaining touch diagonally
-        if np.array_equal(np.sort(no_riv), old_no_riv):
-            kernel = np.array([[1, 1, 1], [1, 0, 1], [1, 1, 1]])
-
-        # Loop through all the area that have no river and merge those
-        # with the areas that lie besides it
-        for val in no_riv:
-            # Mask based on the value of the subbasin without a river
-            mask = alloc == val
-            # Find the ids of surrouding basins based on the kernel
-            rnd = convolve(mask.astype(int).values, kernel, mode="constant")
-            nbs = np.setdiff1d(
-                np.unique(alloc.values[np.where(rnd > 0)]), [nodata, val]
-            )
-            del rnd
-            # If none are found, continue to the next
-            if nbs.size == 0:
-                continue
-            # Set the new id of this subbasin based on the largest next to it
-            area_nbs = area.sel(uid=nbs)
-            new_uid = area_nbs.uid[area_nbs.argmax(dim="uid")].values
-            alloc = alloc.where(alloc != val, new_uid)
+    # Intersect the basins with the admin boundaries
+    subbasins = basins.overlay(waterareas, how="intersection")
+    # Create a unique index
+    subbasins.index = np.arange(1, len(subbasins) + 1)
+    subbasins.index.name = "uid"
+
+    # After intersection, some cells at the coast are not included in the subbasins
+    # convert to raster to fill na with nearest value
+    subbasins_raster = ds_like.raster.rasterize(subbasins, col_name="uid", nodata=0)
+    subbasins_raster = subbasins_raster.raster.interpolate_na(
+        method="nearest", extrapolate=True
+    )
+    # Mask the wflow subcatch after extrapolation
+    subbasins_raster = subbasins_raster.where(
+        ds_like["wflow_subcatch"] != ds_like["wflow_subcatch"].raster.nodata,
+        subbasins_raster.raster.nodata,
+    )
 
-        # Take out the merged areas and append counter
-        area = area.sel(uid=np.setdiff1d(np.unique(alloc.values), [nodata]))
-        _count += 1
-        old_no_riv = np.sort(no_riv)
+    # Convert back to geodataframe and prepare the new unique index for the subbasins
+    subbasins = subbasins_raster.raster.vectorize()
+    if priority_basins:
+        # Redefine the uid, equivalent to exploding the geometries
+        subbasins.index = np.arange(1, len(subbasins) + 1)
+    else:
+        # Keep the uid of the intersection with the admin bounds
+        subbasins.index = subbasins["value"].astype(int)
+    subbasins.index.name = "uid"
+
+    # Rasterize the subbasins
+    da_subbasins = ds_like.raster.rasterize(subbasins, col_name="uid", nodata=0)
+    # Mask with river cells
+    da_subbasins_to_keep = np.unique(
+        da_subbasins.raster.mask_nodata()
+        .where(ds_like["wflow_river"] > 0, np.nan)
+        .values
+    )
+    # Remove the nodata value from the list
+    da_subbasins_to_keep = np.int32(
+        da_subbasins_to_keep[~np.isnan(da_subbasins_to_keep)]
+    )
+
+    # Create the water allocation map starting with the subbasins that contain a river
+    allocation_areas = da_subbasins.where(
+        da_subbasins.isin(da_subbasins_to_keep), da_subbasins.raster.nodata
+    )
+    # Use nearest to fill the nodata values for subbasins without rivers
+    allocation_areas = allocation_areas.raster.interpolate_na(
+        method="nearest", extrapolate=True
+    )
+    allocation_areas = allocation_areas.where(
+        ds_like["wflow_subcatch"] != ds_like["wflow_subcatch"].raster.nodata,
+        allocation_areas.raster.nodata,
+    )
+    allocation_areas.name = "allocation_areas"
+
+    # Create the equivalent geodataframe
+    allocation_areas_gdf = allocation_areas.raster.vectorize()
+    allocation_areas_gdf.index = allocation_areas_gdf["value"].astype(int)
+    allocation_areas_gdf.index.name = "uid"
 
-    alloc.name = "allocation_areas"
-    return alloc
+    return allocation_areas, allocation_areas_gdf
 
 
 def surfacewaterfrac_used(
@@ -395,6 +356,7 @@ def surfacewaterfrac_used(
     gwbodies: Optional[xr.DataArray] = None,
     ncfrac: Optional[xr.DataArray] = None,
     interpolate: bool = False,
+    mask_and_scale_gwfrac: bool = True,
 ) -> xr.DataArray:
     """Create surface water fraction map.
 
@@ -412,6 +374,11 @@ def surfacewaterfrac_used(
         Non-conventional water fraction. If None, assumes 0.
     interpolate : bool
         Interpolate missing data values within wflow model domain.
+    mask_and_scale_gwfrac : bool, optional
+        If True, gwfrac will be masked for areas with no groundwater bodies. To keep
+        the average gwfrac used over waterareas similar after the masking, gwfrac
+        for areas with groundwater bodies can increase. If False, gwfrac will be
+        used as is. By default True.
 
     Returns
     -------
@@ -466,27 +433,32 @@ def surfacewaterfrac_used(
 
     # Get the fractions based on area count
     x, y = np.where(~np.isnan(gwfrac_raw_mr))
-    gw_pixels = np.take(
-        np.bincount(
+    if mask_and_scale_gwfrac:
+        gw_pixels = np.take(
+            np.bincount(
+                waterareas_mr.values[x, y],
+                weights=gwbodies_mr.values[x, y],
+            ),
             waterareas_mr.values[x, y],
-            weights=gwbodies_mr.values[x, y],
-        ),
-        waterareas_mr.values[x, y],
-    )
-    a_pixels = np.take(
-        np.bincount(
+        )
+        a_pixels = np.take(
+            np.bincount(
+                waterareas_mr.values[x, y],
+                weights=gwbodies_mr.values[x, y] * 0.0 + 1.0,
+            ),
             waterareas_mr.values[x, y],
-            weights=gwbodies_mr.values[x, y] * 0.0 + 1.0,
-        ),
-        waterareas_mr.values[x, y],
-    )
+        )
+        scale_factor = a_pixels / (gw_pixels + 0.01)
+    else:
+        scale_factor = 1.0
 
     # Determine the groundwater fraction
     gwfrac_val = np.minimum(
-        gwfrac_raw_mr.values[x, y] * (a_pixels / (gw_pixels + 0.01)),
+        gwfrac_raw_mr.values[x, y] * scale_factor,
         1 - ncfrac_mr.values[x, y],
     )
-    gwfrac_val[np.where(gwbodies_mr.values[x, y] == 0)] = 0
+    if mask_and_scale_gwfrac:
+        gwfrac_val[np.where(gwbodies_mr.values[x, y] == 0)] = 0
     # invert to get surface water frac
     swfrac_val = np.maximum(np.minimum(1 - gwfrac_val - ncfrac_mr.values[x, y], 1), 0)
 
diff --git a/tests/test_model_methods.py b/tests/test_model_methods.py
index d3ae24c4..c50d1bcd 100644
--- a/tests/test_model_methods.py
+++ b/tests/test_model_methods.py
@@ -906,24 +906,19 @@ def test_setup_allocation_areas(example_wflow_model, tmpdir):
 
     # Use the method
     example_wflow_model.setup_allocation_areas(
-        admin_bounds_fn="gadm_level2",
-        min_area=30,
+        waterareas_fn="gadm_level2",
+        priority_basins=True,
     )
 
     # Assert entries
     assert "allocation_areas" in example_wflow_model.geoms
     assert "allocation_areas" in example_wflow_model.grid
 
-    # Write to the drive
-    example_wflow_model.write_geoms()
-
     # Assert output values
-    assert Path(tmpdir, "staticgeoms", "allocation_areas.geojson").exists()
     assert len(example_wflow_model.geoms["allocation_areas"]) == 3
     # on unique values
-
     uni = example_wflow_model.geoms["allocation_areas"].value.unique()
-    assert np.all(np.sort(uni) == [36, 37, 57])
+    assert np.all(np.sort(uni) == [11, 16, 17])
 
 
 def test_setup_allocation_surfacewaterfrac(example_wflow_model, tmpdir):