From ac42882381af5ef40748faffbdce1062a5eab5cd Mon Sep 17 00:00:00 2001
From: "Leaf, Andrew T" <aleaf@usgs.gov>
Date: Tue, 29 Aug 2023 13:06:10 -0500
Subject: [PATCH 1/4] Improvements to lake package  * Add automatic writing of
 lake polygon and lake cell connection shapefiles  * Add name_column arg to
 shapefile input (which adds names to lake auxiliary tables)  * Allow PRISM
 input to be specified for all lakes (via single filename instead of
 dictionary)  * Allow specification of lakes_shapefile: without include_ids:
 item  * Move output tables to tables/ folder

---
 mfsetup/lakes.py                      | 65 +++++++++++++++++++++------
 mfsetup/mf6_defaults.yml              |  3 +-
 mfsetup/mfmodel.py                    | 18 +++++---
 mfsetup/mfnwt_defaults.yml            |  5 ++-
 mfsetup/tests/data/pfl_nwt_test.yml   |  1 +
 mfsetup/tests/test_pfl_mfnwt_inset.py |  5 ++-
 6 files changed, 74 insertions(+), 23 deletions(-)

diff --git a/mfsetup/lakes.py b/mfsetup/lakes.py
index f6047ee5..4d3f901c 100644
--- a/mfsetup/lakes.py
+++ b/mfsetup/lakes.py
@@ -1,6 +1,7 @@
 import os
 import time
 import warnings
+from pathlib import Path
 
 import flopy
 import geopandas as gpd
@@ -10,7 +11,7 @@
 from shapely.geometry import Polygon
 
 fm = flopy.modflow
-from gisutils import get_proj_str, project, shp2df
+from gisutils import project, shp2df
 
 from mfsetup.evaporation import hamon_evaporation
 from mfsetup.fileio import save_array
@@ -26,7 +27,7 @@
 
 
 def make_lakarr2d(grid, lakesdata,
-                  include_ids, id_column='hydroid'):
+                  include_ids=None, id_column='hydroid'):
     """
     Make a nrow x ncol array with lake package extent for each lake,
     using the numbers in the 'id' column in the lakes shapefile.
@@ -44,7 +45,10 @@ def make_lakarr2d(grid, lakesdata,
     id_column = id_column.lower()
     lakes.columns = [c.lower() for c in lakes.columns]
     lakes.index = lakes[id_column]
-    lakes = lakes.loc[include_ids]
+    if include_ids is not None:
+        lakes = lakes.loc[include_ids]
+    else:
+        include_ids = lakes[id_column].tolist()
     lakes['lakid'] = np.arange(1, len(lakes) + 1)
     lakes['geometry'] = [Polygon(g.exterior) for g in lakes.geometry]
     arr = rasterize(lakes, grid=grid, id_column='lakid')
@@ -155,15 +159,15 @@ def setup_lake_info(model):
     source_data = model.cfg.get('lak', {}).get('source_data')
     if source_data is None or 'lak' not in model.package_list:
         return
-    lakesdata = model.load_features(**source_data['lakes_shapefile'])
-    lakesdata_proj_str = get_proj_str(source_data['lakes_shapefile']['filename'])
+    kwargs = get_input_arguments(source_data['lakes_shapefile'], model.load_features)
+    lakesdata = model.load_features(**kwargs)
     id_column = source_data['lakes_shapefile']['id_column'].lower()
     name_column = source_data['lakes_shapefile'].get('name_column', 'name').lower()
     nlakes = len(lakesdata)
 
     # make dataframe with lake IDs, names and locations
     centroids = project([g.centroid for g in lakesdata.geometry],
-                        lakesdata_proj_str, 'epsg:4269')
+                        lakesdata.crs, 4269)
     # boundnames for lakes
     # from source shapefile
     lak_ids = np.arange(1, nlakes + 1)
@@ -194,8 +198,16 @@ def setup_lake_info(model):
     df['strt'] = np.array(stages)
 
     # save a lookup file mapping lake ids to hydroids
-    lookup_file = model.cfg['lak']['output_files']['lookup_file'].format(model.name)
-    df.drop('geometry', axis=1).to_csv(os.path.join(model._tables_path, lookup_file), index=False)
+    lookup_file = Path(
+        model.cfg['lak']['output_files']['lookup_file'].format(model.name)).name
+    out_table = Path(model._tables_path) / lookup_file
+    df.drop('geometry', axis=1).to_csv(out_table)
+    print(f'wrote {out_table}')
+    poly_shp = Path(
+        model.cfg['lak']['output_files']['lak_polygons_shapefile'].format(model.name)).name
+    out_shapefile = Path(model._shapefiles_path) / poly_shp
+    df.to_file(out_shapefile)
+    print(f'wrote {out_shapefile}')
 
     # clean up names
     #df['name'].replace('nan', '', inplace=True)
@@ -245,6 +257,15 @@ def setup_lake_fluxes(model, block='lak'):
         if format == 'prism':
             sd = PrismSourceData.from_config(cfg, dest_model=model)
             data = sd.get_data()
+            # if PRISM source data were specified without any lake IDs
+            # assign to all lakes
+            if data['lake_id'].sum() == 0:
+                dfs = []
+                for lake_id in model.lake_info['lak_id']:
+                    data_lake_id = data.copy()
+                    data_lake_id['lak_id'] = lake_id
+                    dfs.append(data_lake_id)
+                data = pd.concat(dfs, axis=0)
             tmid = data['start_datetime'] + (data['end_datetime'] - data['start_datetime'])/2
             data['day_of_year'] = tmid.dt.dayofyear
             id_lookup = {'feat_id': dict(zip(model.lake_info['lak_id'],
@@ -262,9 +283,12 @@ def setup_lake_fluxes(model, block='lak'):
                                              data['latitude'],  # DD
                                              dest_length_units=model.length_units)
             # update flux dataframe
-            for c in columns:
-                if c in data:
-                    df[c] = data[c]
+            data.sort_values(by=['lak_id', 'per'], inplace=True)
+            df.sort_values(by=['lak_id', 'per'], inplace=True)
+            assert np.all(data[['lak_id', 'per']].values == \
+                df[['lak_id', 'per']].values)
+            data.reset_index(drop=True, inplace=True)
+            df.update(data)
 
         else:
             # TODO: option 3; general csv input for lake fluxes
@@ -414,6 +438,21 @@ def setup_lake_connectiondata(model, for_external_file=True,
     connections_lookup_file = os.path.join(model._tables_path, os.path.split(connections_lookup_file)[1])
     model.cfg['lak']['output_files']['connections_lookup_file'] = connections_lookup_file
     df.to_csv(connections_lookup_file, index=False)
+    # write shapefile version
+    k, i, j = map(np.array, zip(*df['cellid']))
+    df['k'] = k
+    df['i'] = i
+    df['j'] = j
+    ncol = model.modelgrid.ncol
+    gwf_nodes = i * ncol + j
+    gdf = gpd.GeoDataFrame(df.drop('cellid', axis=1),
+                          geometry=np.array(model.modelgrid.polygons)[gwf_nodes],
+                          crs=model.modelgrid.crs)
+    connections_shapefile = Path(
+        model.cfg['lak']['output_files']['connections_shapefile'].format(model.name)).name
+    out_shapefile = Path(model._shapefiles_path) / connections_shapefile
+    gdf.to_file(out_shapefile)
+    print(f'wrote {out_shapefile}')
 
     # convert to one-based and comment out header if df will be written straight to external file
     if for_external_file:
@@ -698,7 +737,7 @@ def get_data(self):
 
         # aggregate the data from multiple files
         dfs = []
-        for id, f in self.filenames.items():
+        for lake_id, f in self.filenames.items():
             meta = self.parse_header(f)
             df = pd.read_csv(f, skiprows=meta['skiprows'],
                              header=None, names=meta['column_names'])
@@ -727,7 +766,7 @@ def get_data(self):
             df[meta['column_names'][2]] = meta['temp_conversion'](df[meta['column_names'][2]])
 
             # record lake ID
-            df[self.id_column] = id
+            df[self.id_column] = lake_id
             dfs.append(df)
         df = pd.concat(dfs)
 
diff --git a/mfsetup/mf6_defaults.yml b/mfsetup/mf6_defaults.yml
index 868cf3e4..ce9b0e43 100644
--- a/mfsetup/mf6_defaults.yml
+++ b/mfsetup/mf6_defaults.yml
@@ -142,8 +142,9 @@ lak:
     littoral_zone_buffer_width: 20
   output_files:
     lookup_file: '{}_lak_lookup.csv'  # output file that maps lake ids to source polygon feature ids
+    lak_polygons_shapefile: '{}_lak_polygons.shp'
     connections_lookup_file: '{}_lak_connections_lookup.csv'  # output file that maps lake/gw connections to zones
-
+    connections_shapefile: '{}_lak_cells.shp'
 mvr:
   options:
     print_flows: True
diff --git a/mfsetup/mfmodel.py b/mfsetup/mfmodel.py
index 673d33d5..0fe3ccbd 100644
--- a/mfsetup/mfmodel.py
+++ b/mfsetup/mfmodel.py
@@ -573,8 +573,8 @@ def load_features(self, filename, bbox_filter=None,
                             bbox = self.bbox
                         elif self.parent.modelgrid is not None:
                             bbox = self.parent.modelgrid.bbox
-                            model_proj_str = self.parent.modelgrid.proj_str
-                            assert model_proj_str is not None
+                            model_crs = self.parent.modelgrid.crs
+                            assert model_crs is not None
 
                         if features_crs != self.modelgrid.crs:
                             bbox_filter = project(bbox, self.modelgrid.crs, features_crs).bounds
@@ -600,10 +600,14 @@ def load_features(self, filename, bbox_filter=None,
                     return
             else:
                 df = self._features[f]
-            if id_column is not None and include_ids is not None:
+            if id_column is not None:
                 id_column = id_column.lower()
+                # convert any floating point dtypes to integer
+                if df[id_column].dtype == float:
+                    df[id_column] = df[id_column].astype(int)
                 df.index = df[id_column]
-                df = df.loc[include_ids]
+            if include_ids is not None:
+                df = df.loc[include_ids].copy()
             dfs_list.append(df)
         df = pd.concat(dfs_list)
         if len(df) == 0:
@@ -942,10 +946,12 @@ def _set_lakarr2d(self):
             if 'lak' in self.package_list:
                 lakes_shapefile = self.cfg['lak'].get('source_data', {}).get('lakes_shapefile', {}).copy()
                 if lakes_shapefile:
-                    lakesdata = self.load_features(**lakes_shapefile)  # caches loaded features
+                    kwargs = get_input_arguments(lakes_shapefile, self.load_features)
+                    lakesdata = self.load_features(**kwargs)  # caches loaded features
                     lakes_shapefile['lakesdata'] = lakesdata
                     lakes_shapefile.pop('filename')
-                    lakarr2d = make_lakarr2d(self.modelgrid, **lakes_shapefile)
+                    kwargs = get_input_arguments(lakes_shapefile, make_lakarr2d)
+                    lakarr2d = make_lakarr2d(self.modelgrid, **kwargs)
             self._lakarr_2d = lakarr2d
             self._set_isbc2d()
 
diff --git a/mfsetup/mfnwt_defaults.yml b/mfsetup/mfnwt_defaults.yml
index edfa1d1b..0daae2fd 100644
--- a/mfsetup/mfnwt_defaults.yml
+++ b/mfsetup/mfnwt_defaults.yml
@@ -92,7 +92,10 @@ lak:
   source_data:
     littoral_zone_buffer_width: 20
   output_files:
-    lookup_file: 'lak_lookup.csv'
+    lookup_file: '{}_lak_lookup.csv'  # output file that maps lake ids to source polygon feature ids
+    lak_polygons_shapefile: '{}_lak_polygons.shp'
+    connections_lookup_file: '{}_lak_connections_lookup.csv'  # output file that maps lake/gw connections to zones
+    connections_shapefile: '{}_lak_cells.shp'
 
 sfr:
 
diff --git a/mfsetup/tests/data/pfl_nwt_test.yml b/mfsetup/tests/data/pfl_nwt_test.yml
index 9620c8ae..2334a674 100644
--- a/mfsetup/tests/data/pfl_nwt_test.yml
+++ b/mfsetup/tests/data/pfl_nwt_test.yml
@@ -167,6 +167,7 @@ lak:
       filename: 'plainfieldlakes/source_data/all_lakes.shp'
       id_column: 'HYDROID'
       include_ids: [600054357, 600054355, 600054434, 600054319] # list of WDNR HYDROIDs
+      name_column: 'Name'
     bathymetry_raster:
       filename: 'plainfieldlakes/source_data/pfl_bathymetry.tif'
       length_units: 'meters'
diff --git a/mfsetup/tests/test_pfl_mfnwt_inset.py b/mfsetup/tests/test_pfl_mfnwt_inset.py
index d2f23251..c3002bf5 100644
--- a/mfsetup/tests/test_pfl_mfnwt_inset.py
+++ b/mfsetup/tests/test_pfl_mfnwt_inset.py
@@ -461,7 +461,8 @@ def test_lak_setup(pfl_nwt_with_dis):
     assert not np.any(np.isnan(lak.bdlknc.array))
     assert np.any(lak.bdlknc.array == m.cfg['lak']['source_data']['littoral_leakance'])
     assert np.any(lak.bdlknc.array == m.cfg['lak']['source_data']['profundal_leakance'])
-    assert os.path.exists(m.cfg['lak']['output_files']['lookup_file'])
+    lookup_file = Path(m._tables_path, Path(m.cfg['lak']['output_files']['lookup_file']).name)
+    assert lookup_file.exists()
     assert lak.lakarr.array.sum() > 0
     tabfiles = m.cfg['lak']['tab_files']
     for f in tabfiles:
@@ -492,7 +493,7 @@ def test_lak_setup(pfl_nwt_with_dis):
     assert len(ds9_entries) == 6
 
     # check that order in lake lookup file is same as specified in include_ids
-    lookup = pd.read_csv(m.cfg['lak']['output_files']['lookup_file'])
+    lookup = pd.read_csv(lookup_file)
     include_ids = m.cfg['lak']['source_data']['lakes_shapefile']['include_ids']
     assert lookup.feat_id.tolist() == include_ids
 

From 3f8ac2efb95574f1ef623d6fe67df06b6b72aada Mon Sep 17 00:00:00 2001
From: "Leaf, Andrew T" <aleaf@usgs.gov>
Date: Tue, 29 Aug 2023 15:49:07 -0500
Subject: [PATCH 2/4] fix(mf6model): issue related to numpy change, where
 axis=0 now has to be specified for evaluating np.any along the 0th axis; the
 returned scalar 'True' was resulting in no head observation input getting
 written

---
 mfsetup/mf6model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mfsetup/mf6model.py b/mfsetup/mf6model.py
index 99dc4d58..6ce25a53 100644
--- a/mfsetup/mf6model.py
+++ b/mfsetup/mf6model.py
@@ -747,7 +747,7 @@ def setup_obs(self, **kwargs):
             # including lake package lakes and non lake, non well BCs
             # (high-K lakes are excluded, since we may want head obs at those locations,
             #  to serve as pseudo lake stage observations)
-            iobs_domain = ~((self.isbc == 1) | np.any(self.isbc > 2))
+            iobs_domain = ~((self.isbc == 1) | np.any(self.isbc > 2, axis=0))
 
         # munge the observation data
         df = setup_head_observations(self,

From d5638b56729dcb0c911358ab36eba8a883bb8765 Mon Sep 17 00:00:00 2001
From: "Leaf, Andrew T" <aleaf@usgs.gov>
Date: Tue, 29 Aug 2023 20:39:32 -0500
Subject: [PATCH 3/4] feat(utils.get_input_arguments): add errors arg so that
 function calls can be set to fail if invalid arguments are passed, with the
 valid args printed

---
 mfsetup/tdis.py  | 3 ++-
 mfsetup/utils.py | 8 +++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/mfsetup/tdis.py b/mfsetup/tdis.py
index cd37a014..2ce95dc4 100644
--- a/mfsetup/tdis.py
+++ b/mfsetup/tdis.py
@@ -102,7 +102,8 @@ def parse_perioddata_groups(perioddata_dict,
             data = defaults.copy()
             data.update(v)
             if is_valid_perioddata(data):
-                data = get_input_arguments(data, setup_perioddata_group)
+                data = get_input_arguments(data, setup_perioddata_group,
+                                           errors='raise')
                 perioddata_groups.append(data)
             else:
                 print_item(k, data)
diff --git a/mfsetup/utils.py b/mfsetup/utils.py
index 99e0835a..10aceeb9 100644
--- a/mfsetup/utils.py
+++ b/mfsetup/utils.py
@@ -41,7 +41,8 @@ def update(d, u):
     return d
 
 
-def get_input_arguments(kwargs, function, verbose=False, warn=False, exclude=None):
+def get_input_arguments(kwargs, function, verbose=False, warn=False, errors='coerce',
+                        exclude=None):
     """Return subset of keyword arguments in kwargs dict
     that are valid parameters to a function or method.
 
@@ -91,6 +92,11 @@ def get_input_arguments(kwargs, function, verbose=False, warn=False, exclude=Non
         for k, v in not_arguments.items():
             #print('{}: {}'.format(k, v))
             print_item(k, v)
+    if errors == 'raise' and len(not_arguments) > 0:
+        raise ValueError(
+            f'Invalid input arguments to {function.__name__}(): '
+            f"{', '.join(not_arguments.keys())}\n"
+            f"Valid arguments: {', '.join(params.parameters.keys())}")
     if verbose:
         print('\n')
     return input_kwargs

From bf13083003ca498812fe7d540932d039bbbc17ed Mon Sep 17 00:00:00 2001
From: "Leaf, Andrew T" <aleaf@usgs.gov>
Date: Thu, 21 Sep 2023 09:43:23 -0500
Subject: [PATCH 4/4] fix(tdis.py::aggregate_dataframe_to_stress_period): cast
 start_datetime and end_datetime cols to datetime64[ns] so that pd.concat
 doesn't fail with heterogenous dtypes

---
 mfsetup/tdis.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/mfsetup/tdis.py b/mfsetup/tdis.py
index 2ce95dc4..81d7d7fd 100644
--- a/mfsetup/tdis.py
+++ b/mfsetup/tdis.py
@@ -708,7 +708,12 @@ def aggregate_dataframe_to_stress_period(data, id_column, data_column, datetime_
     aggregated.reset_index(inplace=True)
 
     # add datetime back in
-    aggregated['start_datetime'] = pd.Timestamp(start) if start is not None else start_datetime
+    aggregated['start_datetime'] = start if start is not None else start_datetime
+    # enforce consistent datetime dtypes
+    # (otherwise pd.concat of multiple outputs from this function may fail)
+    for col in 'start_datetime', 'end_datetime':
+        if col in aggregated.columns:
+            aggregated[col] = aggregated[col].astype('datetime64[ns]')
 
     # drop original datetime column, which doesn't reflect dates for period averages
     drop_cols = [datetime_column]