From ac42882381af5ef40748faffbdce1062a5eab5cd Mon Sep 17 00:00:00 2001 From: "Leaf, Andrew T" Date: Tue, 29 Aug 2023 13:06:10 -0500 Subject: [PATCH 1/4] Improvements to lake package * Add automatic writing of lake polygon and lake cell connection shapefiles * Add name_column arg to shapefile input (which adds names to lake auxiliary tables) * Allow PRISM input to be specified for all lakes (via single filename instead of dictionary) * Allow specification of lakes_shapefile: without include_ids: item * Move output tables to tables/ folder --- mfsetup/lakes.py | 65 +++++++++++++++++++++------ mfsetup/mf6_defaults.yml | 3 +- mfsetup/mfmodel.py | 18 +++++--- mfsetup/mfnwt_defaults.yml | 5 ++- mfsetup/tests/data/pfl_nwt_test.yml | 1 + mfsetup/tests/test_pfl_mfnwt_inset.py | 5 ++- 6 files changed, 74 insertions(+), 23 deletions(-) diff --git a/mfsetup/lakes.py b/mfsetup/lakes.py index f6047ee5..4d3f901c 100644 --- a/mfsetup/lakes.py +++ b/mfsetup/lakes.py @@ -1,6 +1,7 @@ import os import time import warnings +from pathlib import Path import flopy import geopandas as gpd @@ -10,7 +11,7 @@ from shapely.geometry import Polygon fm = flopy.modflow -from gisutils import get_proj_str, project, shp2df +from gisutils import project, shp2df from mfsetup.evaporation import hamon_evaporation from mfsetup.fileio import save_array @@ -26,7 +27,7 @@ def make_lakarr2d(grid, lakesdata, - include_ids, id_column='hydroid'): + include_ids=None, id_column='hydroid'): """ Make a nrow x ncol array with lake package extent for each lake, using the numbers in the 'id' column in the lakes shapefile. @@ -44,7 +45,10 @@ def make_lakarr2d(grid, lakesdata, id_column = id_column.lower() lakes.columns = [c.lower() for c in lakes.columns] lakes.index = lakes[id_column] - lakes = lakes.loc[include_ids] + if include_ids is not None: + lakes = lakes.loc[include_ids] + else: + include_ids = lakes[id_column].tolist() lakes['lakid'] = np.arange(1, len(lakes) + 1) lakes['geometry'] = [Polygon(g.exterior) for g in lakes.geometry] arr = rasterize(lakes, grid=grid, id_column='lakid') @@ -155,15 +159,15 @@ def setup_lake_info(model): source_data = model.cfg.get('lak', {}).get('source_data') if source_data is None or 'lak' not in model.package_list: return - lakesdata = model.load_features(**source_data['lakes_shapefile']) - lakesdata_proj_str = get_proj_str(source_data['lakes_shapefile']['filename']) + kwargs = get_input_arguments(source_data['lakes_shapefile'], model.load_features) + lakesdata = model.load_features(**kwargs) id_column = source_data['lakes_shapefile']['id_column'].lower() name_column = source_data['lakes_shapefile'].get('name_column', 'name').lower() nlakes = len(lakesdata) # make dataframe with lake IDs, names and locations centroids = project([g.centroid for g in lakesdata.geometry], - lakesdata_proj_str, 'epsg:4269') + lakesdata.crs, 4269) # boundnames for lakes # from source shapefile lak_ids = np.arange(1, nlakes + 1) @@ -194,8 +198,16 @@ def setup_lake_info(model): df['strt'] = np.array(stages) # save a lookup file mapping lake ids to hydroids - lookup_file = model.cfg['lak']['output_files']['lookup_file'].format(model.name) - df.drop('geometry', axis=1).to_csv(os.path.join(model._tables_path, lookup_file), index=False) + lookup_file = Path( + model.cfg['lak']['output_files']['lookup_file'].format(model.name)).name + out_table = Path(model._tables_path) / lookup_file + df.drop('geometry', axis=1).to_csv(out_table) + print(f'wrote {out_table}') + poly_shp = Path( + model.cfg['lak']['output_files']['lak_polygons_shapefile'].format(model.name)).name + out_shapefile = Path(model._shapefiles_path) / poly_shp + df.to_file(out_shapefile) + print(f'wrote {out_shapefile}') # clean up names #df['name'].replace('nan', '', inplace=True) @@ -245,6 +257,15 @@ def setup_lake_fluxes(model, block='lak'): if format == 'prism': sd = PrismSourceData.from_config(cfg, dest_model=model) data = sd.get_data() + # if PRISM source data were specified without any lake IDs + # assign to all lakes + if data['lake_id'].sum() == 0: + dfs = [] + for lake_id in model.lake_info['lak_id']: + data_lake_id = data.copy() + data_lake_id['lak_id'] = lake_id + dfs.append(data_lake_id) + data = pd.concat(dfs, axis=0) tmid = data['start_datetime'] + (data['end_datetime'] - data['start_datetime'])/2 data['day_of_year'] = tmid.dt.dayofyear id_lookup = {'feat_id': dict(zip(model.lake_info['lak_id'], @@ -262,9 +283,12 @@ def setup_lake_fluxes(model, block='lak'): data['latitude'], # DD dest_length_units=model.length_units) # update flux dataframe - for c in columns: - if c in data: - df[c] = data[c] + data.sort_values(by=['lak_id', 'per'], inplace=True) + df.sort_values(by=['lak_id', 'per'], inplace=True) + assert np.all(data[['lak_id', 'per']].values == \ + df[['lak_id', 'per']].values) + data.reset_index(drop=True, inplace=True) + df.update(data) else: # TODO: option 3; general csv input for lake fluxes @@ -414,6 +438,21 @@ def setup_lake_connectiondata(model, for_external_file=True, connections_lookup_file = os.path.join(model._tables_path, os.path.split(connections_lookup_file)[1]) model.cfg['lak']['output_files']['connections_lookup_file'] = connections_lookup_file df.to_csv(connections_lookup_file, index=False) + # write shapefile version + k, i, j = map(np.array, zip(*df['cellid'])) + df['k'] = k + df['i'] = i + df['j'] = j + ncol = model.modelgrid.ncol + gwf_nodes = i * ncol + j + gdf = gpd.GeoDataFrame(df.drop('cellid', axis=1), + geometry=np.array(model.modelgrid.polygons)[gwf_nodes], + crs=model.modelgrid.crs) + connections_shapefile = Path( + model.cfg['lak']['output_files']['connections_shapefile'].format(model.name)).name + out_shapefile = Path(model._shapefiles_path) / connections_shapefile + gdf.to_file(out_shapefile) + print(f'wrote {out_shapefile}') # convert to one-based and comment out header if df will be written straight to external file if for_external_file: @@ -698,7 +737,7 @@ def get_data(self): # aggregate the data from multiple files dfs = [] - for id, f in self.filenames.items(): + for lake_id, f in self.filenames.items(): meta = self.parse_header(f) df = pd.read_csv(f, skiprows=meta['skiprows'], header=None, names=meta['column_names']) @@ -727,7 +766,7 @@ def get_data(self): df[meta['column_names'][2]] = meta['temp_conversion'](df[meta['column_names'][2]]) # record lake ID - df[self.id_column] = id + df[self.id_column] = lake_id dfs.append(df) df = pd.concat(dfs) diff --git a/mfsetup/mf6_defaults.yml b/mfsetup/mf6_defaults.yml index 868cf3e4..ce9b0e43 100644 --- a/mfsetup/mf6_defaults.yml +++ b/mfsetup/mf6_defaults.yml @@ -142,8 +142,9 @@ lak: littoral_zone_buffer_width: 20 output_files: lookup_file: '{}_lak_lookup.csv' # output file that maps lake ids to source polygon feature ids + lak_polygons_shapefile: '{}_lak_polygons.shp' connections_lookup_file: '{}_lak_connections_lookup.csv' # output file that maps lake/gw connections to zones - + connections_shapefile: '{}_lak_cells.shp' mvr: options: print_flows: True diff --git a/mfsetup/mfmodel.py b/mfsetup/mfmodel.py index 673d33d5..0fe3ccbd 100644 --- a/mfsetup/mfmodel.py +++ b/mfsetup/mfmodel.py @@ -573,8 +573,8 @@ def load_features(self, filename, bbox_filter=None, bbox = self.bbox elif self.parent.modelgrid is not None: bbox = self.parent.modelgrid.bbox - model_proj_str = self.parent.modelgrid.proj_str - assert model_proj_str is not None + model_crs = self.parent.modelgrid.crs + assert model_crs is not None if features_crs != self.modelgrid.crs: bbox_filter = project(bbox, self.modelgrid.crs, features_crs).bounds @@ -600,10 +600,14 @@ def load_features(self, filename, bbox_filter=None, return else: df = self._features[f] - if id_column is not None and include_ids is not None: + if id_column is not None: id_column = id_column.lower() + # convert any floating point dtypes to integer + if df[id_column].dtype == float: + df[id_column] = df[id_column].astype(int) df.index = df[id_column] - df = df.loc[include_ids] + if include_ids is not None: + df = df.loc[include_ids].copy() dfs_list.append(df) df = pd.concat(dfs_list) if len(df) == 0: @@ -942,10 +946,12 @@ def _set_lakarr2d(self): if 'lak' in self.package_list: lakes_shapefile = self.cfg['lak'].get('source_data', {}).get('lakes_shapefile', {}).copy() if lakes_shapefile: - lakesdata = self.load_features(**lakes_shapefile) # caches loaded features + kwargs = get_input_arguments(lakes_shapefile, self.load_features) + lakesdata = self.load_features(**kwargs) # caches loaded features lakes_shapefile['lakesdata'] = lakesdata lakes_shapefile.pop('filename') - lakarr2d = make_lakarr2d(self.modelgrid, **lakes_shapefile) + kwargs = get_input_arguments(lakes_shapefile, make_lakarr2d) + lakarr2d = make_lakarr2d(self.modelgrid, **kwargs) self._lakarr_2d = lakarr2d self._set_isbc2d() diff --git a/mfsetup/mfnwt_defaults.yml b/mfsetup/mfnwt_defaults.yml index edfa1d1b..0daae2fd 100644 --- a/mfsetup/mfnwt_defaults.yml +++ b/mfsetup/mfnwt_defaults.yml @@ -92,7 +92,10 @@ lak: source_data: littoral_zone_buffer_width: 20 output_files: - lookup_file: 'lak_lookup.csv' + lookup_file: '{}_lak_lookup.csv' # output file that maps lake ids to source polygon feature ids + lak_polygons_shapefile: '{}_lak_polygons.shp' + connections_lookup_file: '{}_lak_connections_lookup.csv' # output file that maps lake/gw connections to zones + connections_shapefile: '{}_lak_cells.shp' sfr: diff --git a/mfsetup/tests/data/pfl_nwt_test.yml b/mfsetup/tests/data/pfl_nwt_test.yml index 9620c8ae..2334a674 100644 --- a/mfsetup/tests/data/pfl_nwt_test.yml +++ b/mfsetup/tests/data/pfl_nwt_test.yml @@ -167,6 +167,7 @@ lak: filename: 'plainfieldlakes/source_data/all_lakes.shp' id_column: 'HYDROID' include_ids: [600054357, 600054355, 600054434, 600054319] # list of WDNR HYDROIDs + name_column: 'Name' bathymetry_raster: filename: 'plainfieldlakes/source_data/pfl_bathymetry.tif' length_units: 'meters' diff --git a/mfsetup/tests/test_pfl_mfnwt_inset.py b/mfsetup/tests/test_pfl_mfnwt_inset.py index d2f23251..c3002bf5 100644 --- a/mfsetup/tests/test_pfl_mfnwt_inset.py +++ b/mfsetup/tests/test_pfl_mfnwt_inset.py @@ -461,7 +461,8 @@ def test_lak_setup(pfl_nwt_with_dis): assert not np.any(np.isnan(lak.bdlknc.array)) assert np.any(lak.bdlknc.array == m.cfg['lak']['source_data']['littoral_leakance']) assert np.any(lak.bdlknc.array == m.cfg['lak']['source_data']['profundal_leakance']) - assert os.path.exists(m.cfg['lak']['output_files']['lookup_file']) + lookup_file = Path(m._tables_path, Path(m.cfg['lak']['output_files']['lookup_file']).name) + assert lookup_file.exists() assert lak.lakarr.array.sum() > 0 tabfiles = m.cfg['lak']['tab_files'] for f in tabfiles: @@ -492,7 +493,7 @@ def test_lak_setup(pfl_nwt_with_dis): assert len(ds9_entries) == 6 # check that order in lake lookup file is same as specified in include_ids - lookup = pd.read_csv(m.cfg['lak']['output_files']['lookup_file']) + lookup = pd.read_csv(lookup_file) include_ids = m.cfg['lak']['source_data']['lakes_shapefile']['include_ids'] assert lookup.feat_id.tolist() == include_ids From 3f8ac2efb95574f1ef623d6fe67df06b6b72aada Mon Sep 17 00:00:00 2001 From: "Leaf, Andrew T" Date: Tue, 29 Aug 2023 15:49:07 -0500 Subject: [PATCH 2/4] fix(mf6model): issue related to numpy change, where axis=0 now has to be specified for evaluating np.any along the 0th axis; the returned scalar 'True' was resulting in no head observation input getting written --- mfsetup/mf6model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mfsetup/mf6model.py b/mfsetup/mf6model.py index 99dc4d58..6ce25a53 100644 --- a/mfsetup/mf6model.py +++ b/mfsetup/mf6model.py @@ -747,7 +747,7 @@ def setup_obs(self, **kwargs): # including lake package lakes and non lake, non well BCs # (high-K lakes are excluded, since we may want head obs at those locations, # to serve as pseudo lake stage observations) - iobs_domain = ~((self.isbc == 1) | np.any(self.isbc > 2)) + iobs_domain = ~((self.isbc == 1) | np.any(self.isbc > 2, axis=0)) # munge the observation data df = setup_head_observations(self, From d5638b56729dcb0c911358ab36eba8a883bb8765 Mon Sep 17 00:00:00 2001 From: "Leaf, Andrew T" Date: Tue, 29 Aug 2023 20:39:32 -0500 Subject: [PATCH 3/4] feat(utils.get_input_arguments): add errors arg so that function calls can be set to fail if invalid arguments are passed, with the valid args printed --- mfsetup/tdis.py | 3 ++- mfsetup/utils.py | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/mfsetup/tdis.py b/mfsetup/tdis.py index cd37a014..2ce95dc4 100644 --- a/mfsetup/tdis.py +++ b/mfsetup/tdis.py @@ -102,7 +102,8 @@ def parse_perioddata_groups(perioddata_dict, data = defaults.copy() data.update(v) if is_valid_perioddata(data): - data = get_input_arguments(data, setup_perioddata_group) + data = get_input_arguments(data, setup_perioddata_group, + errors='raise') perioddata_groups.append(data) else: print_item(k, data) diff --git a/mfsetup/utils.py b/mfsetup/utils.py index 99e0835a..10aceeb9 100644 --- a/mfsetup/utils.py +++ b/mfsetup/utils.py @@ -41,7 +41,8 @@ def update(d, u): return d -def get_input_arguments(kwargs, function, verbose=False, warn=False, exclude=None): +def get_input_arguments(kwargs, function, verbose=False, warn=False, errors='coerce', + exclude=None): """Return subset of keyword arguments in kwargs dict that are valid parameters to a function or method. @@ -91,6 +92,11 @@ def get_input_arguments(kwargs, function, verbose=False, warn=False, exclude=Non for k, v in not_arguments.items(): #print('{}: {}'.format(k, v)) print_item(k, v) + if errors == 'raise' and len(not_arguments) > 0: + raise ValueError( + f'Invalid input arguments to {function.__name__}(): ' + f"{', '.join(not_arguments.keys())}\n" + f"Valid arguments: {', '.join(params.parameters.keys())}") if verbose: print('\n') return input_kwargs From bf13083003ca498812fe7d540932d039bbbc17ed Mon Sep 17 00:00:00 2001 From: "Leaf, Andrew T" Date: Thu, 21 Sep 2023 09:43:23 -0500 Subject: [PATCH 4/4] fix(tdis.py::aggregate_dataframe_to_stress_period): cast start_datetime and end_datetime cols to datetime64[ns] so that pd.concat doesn't fail with heterogenous dtypes --- mfsetup/tdis.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mfsetup/tdis.py b/mfsetup/tdis.py index 2ce95dc4..81d7d7fd 100644 --- a/mfsetup/tdis.py +++ b/mfsetup/tdis.py @@ -708,7 +708,12 @@ def aggregate_dataframe_to_stress_period(data, id_column, data_column, datetime_ aggregated.reset_index(inplace=True) # add datetime back in - aggregated['start_datetime'] = pd.Timestamp(start) if start is not None else start_datetime + aggregated['start_datetime'] = start if start is not None else start_datetime + # enforce consistent datetime dtypes + # (otherwise pd.concat of multiple outputs from this function may fail) + for col in 'start_datetime', 'end_datetime': + if col in aggregated.columns: + aggregated[col] = aggregated[col].astype('datetime64[ns]') # drop original datetime column, which doesn't reflect dates for period averages drop_cols = [datetime_column]