Skip to content

Commit

Permalink
Merge pull request #95 from Deltares/94-bug-memory-error-when-loading…
Browse files Browse the repository at this point in the history
…-large-raster-datasets-clipped-to-a-geometry

94 bug memory error when loading large raster datasets clipped to a geometry
  • Loading branch information
DirkEilander authored Jul 12, 2023
2 parents 6e5f078 + 16f655a commit 81b85b8
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 89 deletions.
6 changes: 6 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Bugfix
- bugfix in `SfincsModel.write_forcing` to ensure all NetCDF files are written instead of only the first one. PR #86
- bugfix in `SfincsModel.read_config` & `SfincsInput.read` for relative paths in inp file. PR #88
- bugfix in `SfincsModel.setup_subgrid` to ensure that a 'big geotiff' will be written by default when 'write_dep_tif' or 'write_man_tif' are True
- fix memory issues caused by rasterizing the model region and reprojecting before clipping of rasters. PR #94
- bugfix in `Sfincs.read_forcing` when combining attributes from the locations stored in the gis folder with the actual forcing locations. PR #99
- bugfix in `SfincsModel.setup_discharge_from_grid` when snapping based on upstream area in case a src points is outside of the uparea grid domain. PR #99

Expand All @@ -21,10 +22,15 @@ New
- `SfincsModel.setup_cn_infiltration_with_kr` to setup three layers related to the curve number (maximum and effective infiltration capacity; seff and smax) and recovery rate (kr). PR#87
- `SfincsModelsetup_drainage_structures` to setup drainage structures (pumps,culverts) from a geodataframe. PR#90
- Added `SfincsModel.setup_wind_forcing`, `SfincsModel.setup_wind_forcing_from_grid` and `SfincsModel.setup_pressure_forcing_from_grid` methods to easily add wind and pressure forcing. PR #92
- `SfincsModel.read_config` allows to use a template input file from a directory different than the model root. PR #102
- Added the option to use landuse/landcover data combined with a reclass table to `SfincsModel.setup_constant_infiltration`. PR #103
- Enabled to provide locations only (so no timeseries) for `SfincsModel.setup_waterlevel_forcing` and `SfincsModel.setup_discharge_forcing` PR #104
- New optional buffer argument in `SfincsModel.setup_discharge_forcing` to select gauges around boundary only. PR #104

Changed
-------
- `SfincsModel.setup_mask_active` argument reset_mask default to True PR #94

v1.0 (17 April 2023)
====================

Expand Down
4 changes: 2 additions & 2 deletions hydromt_sfincs/regulargrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def create_mask_active(
drop_area: float = 0,
connectivity: int = 8,
all_touched: bool = True,
reset_mask: bool = False,
reset_mask: bool = True,
logger: logging.Logger = logger,
) -> xr.DataArray:
"""Create an integer mask with inactive (msk=0) and active (msk=1) cells, optionally bounded
Expand Down Expand Up @@ -179,7 +179,7 @@ def create_mask_active(
include (or exclude) geometries. If False, include a cell only if its center is
within one of the shapes, or if it is selected by Bresenham's line algorithm.
reset_mask: bool, optional
If True, reset existing mask layer. If False (default) updating existing mask.
If True (default), reset existing mask layer. If False updating existing mask.
Returns
-------
Expand Down
123 changes: 79 additions & 44 deletions hydromt_sfincs/sfincs.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,14 +344,15 @@ def setup_dep(
)

# check if no nan data is present in the bed levels
if not np.isnan(da_dep).any():
if np.isnan(da_dep).any():
self.logger.warning(
f"Interpolate data at {int(np.sum(np.isnan(da_dep.values)))} cells"
)
# TODO add extrapolate=True option when available in hydromt core
da_dep = da_dep.raster.interpolate_na(method="rio_idw")

self.set_grid(da_dep, name="dep")
# FIXME this shouldn't be necessary, since da_dep should already have the crs
# FIXME this shouldn't be necessary, since da_dep should already have a crs
if self.crs is not None and self.grid.raster.crs is None:
self.grid.set_crs(self.crs)

Expand All @@ -374,7 +375,7 @@ def setup_mask_active(
drop_area: float = 0.0,
connectivity: int = 8,
all_touched: bool = True,
reset_mask: bool = False,
reset_mask: bool = True,
):
"""Setup active model cells.
Expand Down Expand Up @@ -418,7 +419,7 @@ def setup_mask_active(
include (or exclude) geometries. If False, include a cell only if its center is
within one of the shapes, or if it is selected by Bresenham's line algorithm.
reset_mask: bool, optional
If True, reset existing mask layer. If False (default) updating existing mask.
If True (default), reset existing mask layer. If False updating existing mask.
"""
# read geometries
gdf_mask, gdf_include, gdf_exclude = None, None, None
Expand Down Expand Up @@ -481,9 +482,15 @@ def setup_mask_active(
if "indexfile" not in self.config:
self.config.update({"indexfile": "sfincs.ind"})
# update region
self.logger.info("Derive region geometry based on active cells.")
region = da_mask.where(da_mask <= 1, 1).raster.vectorize()
self.set_geoms(region, "region")
if np.any(da_mask >= 1):
self.logger.info("Derive region geometry based on active cells.")
# make mask with ones and zeros only -> vectorize ones
region = da_mask.where(da_mask <= 1, 1).raster.vectorize()
if region.empty:
raise ValueError("No region found.")
self.set_geoms(region, "region")
else:
self.logger.warning("No active cells found.")

def setup_mask_bounds(
self,
Expand Down Expand Up @@ -517,11 +524,12 @@ def setup_mask_bounds(
btype: {'waterlevel', 'outflow'}
Boundary type
include_mask, exclude_mask: str, Path, gpd.GeoDataFrame, optional
Path or data source name for geometries with areas to include/exclude from the model boundary.
Path or data source name for geometries with areas to include/exclude from
the model boundary.
zmin, zmax : float, optional
Minimum and maximum elevation thresholds for boundary cells.
Note that when include and exclude areas are used, the elevation range is only applied
on cells within the include area and outside the exclude area.
Note that when include and exclude areas are used, the elevation range is
only applied on cells within the include area and outside the exclude area.
reset_bounds: bool, optional
If True, reset existing boundary cells of the selected boundary
type (`btype`) before setting new boundary cells, by default False.
Expand All @@ -536,7 +544,7 @@ def setup_mask_bounds(

# get include / exclude geometries
gdf_include, gdf_exclude = None, None
bbox = self.region.to_crs(4326).total_bounds
bbox = self.mask.raster.transform_bounds(4326)
if include_mask is not None:
if not isinstance(include_mask, gpd.GeoDataFrame) and str(
include_mask
Expand Down Expand Up @@ -609,9 +617,15 @@ def setup_subgrid(
----------
datasets_dep : List[dict]
List of dictionaries with topobathy data.
Each should minimally contain a data catalog source name, data file path, or xarray raster object ('elevtn')
Optional merge arguments include 'zmin', 'zmax', 'mask', 'offset', 'reproj_method', and 'merge_method'.
e.g.: [{'elevtn': merit_hydro, 'zmin': 0.01}, {'elevtn': gebco, 'offset': 0, 'merge_method': 'first', reproj_method: 'bilinear'}]
Each should minimally contain a data catalog source name, data file path,
or xarray raster object ('elevtn')
Optional merge arguments include 'zmin', 'zmax', 'mask', 'offset',
'reproj_method', and 'merge_method'. e.g.:
[
{'elevtn': merit_hydro, 'zmin': 0.01},
{'elevtn': gebco, 'offset': 0, 'merge_method': 'first', reproj_method: 'bilinear'}
]
For a complete overview of all merge options, see :py:function:~hydromt.workflows.merge_multi_dataarrays
datasets_rgh : List[dict], optional
List of dictionaries with Manning's n datasets. Each dictionary should at least contain one of the following:
Expand Down Expand Up @@ -762,7 +776,7 @@ def setup_river_inflow(
if hydrography is not None:
ds = self.data_catalog.get_rasterdataset(
hydrography,
geom=self.region,
bbox=self.mask.raster.transform_bounds(4326),
variables=["uparea", "flwdir"],
buffer=5,
)
Expand Down Expand Up @@ -881,7 +895,7 @@ def setup_river_outflow(
if hydrography is not None:
ds = self.data_catalog.get_rasterdataset(
hydrography,
geom=self.region,
bbox=self.mask.raster.transform_bounds(4326),
variables=["uparea", "flwdir"],
buffer=5,
)
Expand Down Expand Up @@ -1040,7 +1054,9 @@ def setup_cn_infiltration(self, cn, antecedent_moisture="avg", reproj_method="me
By default 'med'. For more information see, :py:meth:`hydromt.raster.RasterDataArray.reproject_like`
"""
# get data
da_org = self.data_catalog.get_rasterdataset(cn, geom=self.region, buffer=10)
da_org = self.data_catalog.get_rasterdataset(
cn, bbox=self.mask.raster.transform_bounds(4326), buffer=10
)
# read variable
v = "cn"
if antecedent_moisture:
Expand Down Expand Up @@ -1089,10 +1105,14 @@ def setup_cn_infiltration_with_kr(

# Read the datafiles
da_landuse = self.data_catalog.get_rasterdataset(
lulc, geom=self.region, buffer=10
lulc, bbox=self.mask.raster.transform_bounds(4326), buffer=10
)
da_HSG = self.data_catalog.get_rasterdataset(
hsg, bbox=self.mask.raster.transform_bounds(4326), buffer=10
)
da_Ksat = self.data_catalog.get_rasterdataset(
ksat, bbox=self.mask.raster.transform_bounds(4326), buffer=10
)
da_HSG = self.data_catalog.get_rasterdataset(hsg, geom=self.region, buffer=10)
da_Ksat = self.data_catalog.get_rasterdataset(ksat, geom=self.region, buffer=10)
df_map = self.data_catalog.get_dataframe(reclass_table, index_col=0)

# Define outputs
Expand Down Expand Up @@ -1655,7 +1675,9 @@ def setup_waterlevel_forcing(
df_ts += offset
else:
da_offset = self.data_catalog.get_rasterdataset(
offset, geom=self.region, buffer=5
offset,
bbox=self.mask.raster.transform_bounds(4326),
buffer=5,
)
offset_pnts = da_offset.raster.sample(gdf_locs)
df_offset = offset_pnts.to_pandas().reindex(df_ts.columns).fillna(0)
Expand Down Expand Up @@ -1861,15 +1883,18 @@ def setup_discharge_forcing_from_grid(
# read data
ds = self.data_catalog.get_rasterdataset(
discharge,
geom=self.region,
bbox=self.mask.raster.transform_bounds(4326),
buffer=2,
time_tuple=self.get_model_time(), # model time
variables=["discharge"],
single_var_as_array=False,
)
if uparea is not None and "uparea" in gdf.columns:
da_upa = self.data_catalog.get_rasterdataset(
uparea, geom=self.region, buffer=2, variables=["uparea"]
uparea,
bbox=self.mask.raster.transform_bounds(4326),
buffer=2,
variables=["uparea"],
)
# make sure ds and da_upa align
ds["uparea"] = da_upa.raster.reproject_like(ds, method="nearest")
Expand Down Expand Up @@ -1929,7 +1954,7 @@ def setup_precip_forcing_from_grid(
# get data for model domain and config time range
precip = self.data_catalog.get_rasterdataset(
precip,
geom=self.region,
bbox=self.mask.raster.transform_bounds(4326),
buffer=2,
time_tuple=self.get_model_time(),
variables=["precip"],
Expand Down Expand Up @@ -3189,17 +3214,21 @@ def get_model_time(self):

## helper method
def _parse_datasets_dep(self, datasets_dep, res):
"""Parse filenames or paths of Datasets in list of dictionaries datasets_dep into xr.DataArray and gdf.GeoDataFrames:
"""Parse filenames or paths of Datasets in list of dictionaries datasets_dep
into xr.DataArray and gdf.GeoDataFrames:
* "elevtn" is parsed into da (xr.DataArray)
* "offset" is parsed into da_offset (xr.DataArray)
* "mask" is parsed into gdf (gpd.GeoDataFrame)
Parameters
----------
datasets_dep : List[dict]
List of dictionaries with topobathy data, each containing a dataset name or Path (dep) and optional merge arguments.
List of dictionaries with topobathy data, each containing a dataset name or
Path (dep) and optional merge arguments.
res : float
Resolution of the model grid in meters. Used to obtain the correct zoom level of the depth datasets.
Resolution of the model grid in meters. Used to obtain the correct zoom
level of the depth datasets.
"""
parse_keys = ["elevtn", "offset", "mask", "da"]
copy_keys = ["zmin", "zmax", "reproj_method", "merge_method"]
Expand All @@ -3212,18 +3241,17 @@ def _parse_datasets_dep(self, datasets_dep, res):
try:
da_elv = self.data_catalog.get_rasterdataset(
dataset.get("elevtn", dataset.get("da")),
geom=self.mask.raster.box,
bbox=self.mask.raster.transform_bounds(4326),
buffer=10,
variables=["elevtn"],
zoom_level=(res, "meter"),
)
dd.update({"da": da_elv})
except:
# TODO remove ValueError after fix in hydromt core
except (IndexError, ValueError):
data_name = dataset.get("elevtn")
self.logger.warning(
f"{data_name} not used; probably because all the data is outside of the mask."
)
self.logger.warning(f"No data in domain for {data_name}, skipped.")
continue
dd.update({"da": da_elv})
else:
raise ValueError(
"No 'elevtn' (topobathy) dataset provided in datasets_dep."
Expand All @@ -3234,16 +3262,16 @@ def _parse_datasets_dep(self, datasets_dep, res):
if "offset" in dataset and not isinstance(dataset["offset"], (float, int)):
da_offset = self.data_catalog.get_rasterdataset(
dataset.get("offset"),
geom=self.mask.raster.box,
buffer=20,
bbox=self.mask.raster.transform_bounds(4326),
buffer=10,
)
dd.update({"offset": da_offset})

# read geodataframes describing valid areas
if "mask" in dataset:
gdf_valid = self.data_catalog.get_geodataframe(
path_or_key=dataset.get("mask"),
geom=self.mask.raster.box,
bbox=self.mask.raster.transform_bounds(4326),
)
dd.update({"gdf_valid": gdf_valid})

Expand All @@ -3258,17 +3286,21 @@ def _parse_datasets_dep(self, datasets_dep, res):
return datasets_out

def _parse_datasets_rgh(self, datasets_rgh):
"""Parse filenames or paths of Datasets in list of dictionaries datasets_rgh into xr.DataArrays and gdf.GeoDataFrames:
"""Parse filenames or paths of Datasets in list of dictionaries datasets_rgh
into xr.DataArrays and gdf.GeoDataFrames:
* "manning" is parsed into da (xr.DataArray)
* "lulc" is parsed into da (xr.DataArray) using reclassify table in "reclass_table"
* "lulc" is parsed into da (xr.DataArray) using reclass table in "reclass_table"
* "mask" is parsed into gdf_valid (gpd.GeoDataFrame)
Parameters
----------
datasets_rgh : List[dict], optional
List of dictionaries with Manning's n datasets. Each dictionary should at least contain one of the following:
List of dictionaries with Manning's n datasets. Each dictionary should at
least contain one of the following:
* (1) manning: filename (or Path) of gridded data with manning values
* (2) lulc (and reclass_table) :a combination of a filename of gridded landuse/landcover and a reclassify table.
* (2) lulc (and reclass_table): a combination of a filename of gridded
landuse/landcover and a reclassify table.
In additon, optional merge arguments can be provided e.g.: merge_method, mask
"""
parse_keys = ["manning", "lulc", "reclass_table", "mask", "da"]
Expand All @@ -3281,7 +3313,7 @@ def _parse_datasets_rgh(self, datasets_rgh):
if "manning" in dataset or "da" in dataset:
da_man = self.data_catalog.get_rasterdataset(
dataset.get("manning", dataset.get("da")),
geom=self.mask.raster.box,
bbox=self.mask.raster.transform_bounds(4326),
buffer=10,
)
dd.update({"da": da_man})
Expand All @@ -3291,12 +3323,15 @@ def _parse_datasets_rgh(self, datasets_rgh):
reclass_table = dataset.get("reclass_table", None)
if reclass_table is None and isinstance(lulc, str):
reclass_table = join(DATADIR, "lulc", f"{lulc}_mapping.csv")
if not os.path.isfile(reclass_table) and isinstance(lulc, str):
if reclass_table is None:
raise IOError(
f"Manning roughness mapping file not found: {reclass_table}"
)
da_lulc = self.data_catalog.get_rasterdataset(
lulc, geom=self.mask.raster.box, buffer=10, variables=["lulc"]
lulc,
bbox=self.mask.raster.transform_bounds(4326),
buffer=10,
variables=["lulc"],
)
df_map = self.data_catalog.get_dataframe(reclass_table, index_col=0)
# reclassify
Expand All @@ -3309,7 +3344,7 @@ def _parse_datasets_rgh(self, datasets_rgh):
if "mask" in dataset:
gdf_valid = self.data_catalog.get_geodataframe(
path_or_key=dataset.get("mask"),
geom=self.mask.raster.box,
bbox=self.mask.raster.transform_bounds(4326),
)
dd.update({"gdf_valid": gdf_valid})

Expand Down
Loading

0 comments on commit 81b85b8

Please sign in to comment.