Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lake Package improvements; some fixes for recent numpy and pandas changes #72

Merged
merged 4 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 52 additions & 13 deletions mfsetup/lakes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import time
import warnings
from pathlib import Path

import flopy
import geopandas as gpd
Expand All @@ -10,7 +11,7 @@
from shapely.geometry import Polygon

fm = flopy.modflow
from gisutils import get_proj_str, project, shp2df
from gisutils import project, shp2df

from mfsetup.evaporation import hamon_evaporation
from mfsetup.fileio import save_array
Expand All @@ -26,7 +27,7 @@


def make_lakarr2d(grid, lakesdata,
include_ids, id_column='hydroid'):
include_ids=None, id_column='hydroid'):
"""
Make a nrow x ncol array with lake package extent for each lake,
using the numbers in the 'id' column in the lakes shapefile.
Expand All @@ -44,7 +45,10 @@ def make_lakarr2d(grid, lakesdata,
id_column = id_column.lower()
lakes.columns = [c.lower() for c in lakes.columns]
lakes.index = lakes[id_column]
lakes = lakes.loc[include_ids]
if include_ids is not None:
lakes = lakes.loc[include_ids]
else:
include_ids = lakes[id_column].tolist()
lakes['lakid'] = np.arange(1, len(lakes) + 1)
lakes['geometry'] = [Polygon(g.exterior) for g in lakes.geometry]
arr = rasterize(lakes, grid=grid, id_column='lakid')
Expand Down Expand Up @@ -155,15 +159,15 @@ def setup_lake_info(model):
source_data = model.cfg.get('lak', {}).get('source_data')
if source_data is None or 'lak' not in model.package_list:
return
lakesdata = model.load_features(**source_data['lakes_shapefile'])
lakesdata_proj_str = get_proj_str(source_data['lakes_shapefile']['filename'])
kwargs = get_input_arguments(source_data['lakes_shapefile'], model.load_features)
lakesdata = model.load_features(**kwargs)
id_column = source_data['lakes_shapefile']['id_column'].lower()
name_column = source_data['lakes_shapefile'].get('name_column', 'name').lower()
nlakes = len(lakesdata)

# make dataframe with lake IDs, names and locations
centroids = project([g.centroid for g in lakesdata.geometry],
lakesdata_proj_str, 'epsg:4269')
lakesdata.crs, 4269)
# boundnames for lakes
# from source shapefile
lak_ids = np.arange(1, nlakes + 1)
Expand Down Expand Up @@ -194,8 +198,16 @@ def setup_lake_info(model):
df['strt'] = np.array(stages)

# save a lookup file mapping lake ids to hydroids
lookup_file = model.cfg['lak']['output_files']['lookup_file'].format(model.name)
df.drop('geometry', axis=1).to_csv(os.path.join(model._tables_path, lookup_file), index=False)
lookup_file = Path(
model.cfg['lak']['output_files']['lookup_file'].format(model.name)).name
out_table = Path(model._tables_path) / lookup_file
df.drop('geometry', axis=1).to_csv(out_table)
print(f'wrote {out_table}')
poly_shp = Path(
model.cfg['lak']['output_files']['lak_polygons_shapefile'].format(model.name)).name
out_shapefile = Path(model._shapefiles_path) / poly_shp
df.to_file(out_shapefile)
print(f'wrote {out_shapefile}')

# clean up names
#df['name'].replace('nan', '', inplace=True)
Expand Down Expand Up @@ -245,6 +257,15 @@ def setup_lake_fluxes(model, block='lak'):
if format == 'prism':
sd = PrismSourceData.from_config(cfg, dest_model=model)
data = sd.get_data()
# if PRISM source data were specified without any lake IDs
# assign to all lakes
if data['lake_id'].sum() == 0:
dfs = []
for lake_id in model.lake_info['lak_id']:
data_lake_id = data.copy()
data_lake_id['lak_id'] = lake_id
dfs.append(data_lake_id)
data = pd.concat(dfs, axis=0)
tmid = data['start_datetime'] + (data['end_datetime'] - data['start_datetime'])/2
data['day_of_year'] = tmid.dt.dayofyear
id_lookup = {'feat_id': dict(zip(model.lake_info['lak_id'],
Expand All @@ -262,9 +283,12 @@ def setup_lake_fluxes(model, block='lak'):
data['latitude'], # DD
dest_length_units=model.length_units)
# update flux dataframe
for c in columns:
if c in data:
df[c] = data[c]
data.sort_values(by=['lak_id', 'per'], inplace=True)
df.sort_values(by=['lak_id', 'per'], inplace=True)
assert np.all(data[['lak_id', 'per']].values == \
df[['lak_id', 'per']].values)
data.reset_index(drop=True, inplace=True)
df.update(data)

else:
# TODO: option 3; general csv input for lake fluxes
Expand Down Expand Up @@ -414,6 +438,21 @@ def setup_lake_connectiondata(model, for_external_file=True,
connections_lookup_file = os.path.join(model._tables_path, os.path.split(connections_lookup_file)[1])
model.cfg['lak']['output_files']['connections_lookup_file'] = connections_lookup_file
df.to_csv(connections_lookup_file, index=False)
# write shapefile version
k, i, j = map(np.array, zip(*df['cellid']))
df['k'] = k
df['i'] = i
df['j'] = j
ncol = model.modelgrid.ncol
gwf_nodes = i * ncol + j
gdf = gpd.GeoDataFrame(df.drop('cellid', axis=1),
geometry=np.array(model.modelgrid.polygons)[gwf_nodes],
crs=model.modelgrid.crs)
connections_shapefile = Path(
model.cfg['lak']['output_files']['connections_shapefile'].format(model.name)).name
out_shapefile = Path(model._shapefiles_path) / connections_shapefile
gdf.to_file(out_shapefile)
print(f'wrote {out_shapefile}')

# convert to one-based and comment out header if df will be written straight to external file
if for_external_file:
Expand Down Expand Up @@ -698,7 +737,7 @@ def get_data(self):

# aggregate the data from multiple files
dfs = []
for id, f in self.filenames.items():
for lake_id, f in self.filenames.items():
meta = self.parse_header(f)
df = pd.read_csv(f, skiprows=meta['skiprows'],
header=None, names=meta['column_names'])
Expand Down Expand Up @@ -727,7 +766,7 @@ def get_data(self):
df[meta['column_names'][2]] = meta['temp_conversion'](df[meta['column_names'][2]])

# record lake ID
df[self.id_column] = id
df[self.id_column] = lake_id
dfs.append(df)
df = pd.concat(dfs)

Expand Down
3 changes: 2 additions & 1 deletion mfsetup/mf6_defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,9 @@ lak:
littoral_zone_buffer_width: 20
output_files:
lookup_file: '{}_lak_lookup.csv' # output file that maps lake ids to source polygon feature ids
lak_polygons_shapefile: '{}_lak_polygons.shp'
connections_lookup_file: '{}_lak_connections_lookup.csv' # output file that maps lake/gw connections to zones

connections_shapefile: '{}_lak_cells.shp'
mvr:
options:
print_flows: True
Expand Down
2 changes: 1 addition & 1 deletion mfsetup/mf6model.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ def setup_obs(self, **kwargs):
# including lake package lakes and non lake, non well BCs
# (high-K lakes are excluded, since we may want head obs at those locations,
# to serve as pseudo lake stage observations)
iobs_domain = ~((self.isbc == 1) | np.any(self.isbc > 2))
iobs_domain = ~((self.isbc == 1) | np.any(self.isbc > 2, axis=0))

# munge the observation data
df = setup_head_observations(self,
Expand Down
18 changes: 12 additions & 6 deletions mfsetup/mfmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,8 +573,8 @@ def load_features(self, filename, bbox_filter=None,
bbox = self.bbox
elif self.parent.modelgrid is not None:
bbox = self.parent.modelgrid.bbox
model_proj_str = self.parent.modelgrid.proj_str
assert model_proj_str is not None
model_crs = self.parent.modelgrid.crs
assert model_crs is not None

if features_crs != self.modelgrid.crs:
bbox_filter = project(bbox, self.modelgrid.crs, features_crs).bounds
Expand All @@ -600,10 +600,14 @@ def load_features(self, filename, bbox_filter=None,
return
else:
df = self._features[f]
if id_column is not None and include_ids is not None:
if id_column is not None:
id_column = id_column.lower()
# convert any floating point dtypes to integer
if df[id_column].dtype == float:
df[id_column] = df[id_column].astype(int)
df.index = df[id_column]
df = df.loc[include_ids]
if include_ids is not None:
df = df.loc[include_ids].copy()
dfs_list.append(df)
df = pd.concat(dfs_list)
if len(df) == 0:
Expand Down Expand Up @@ -942,10 +946,12 @@ def _set_lakarr2d(self):
if 'lak' in self.package_list:
lakes_shapefile = self.cfg['lak'].get('source_data', {}).get('lakes_shapefile', {}).copy()
if lakes_shapefile:
lakesdata = self.load_features(**lakes_shapefile) # caches loaded features
kwargs = get_input_arguments(lakes_shapefile, self.load_features)
lakesdata = self.load_features(**kwargs) # caches loaded features
lakes_shapefile['lakesdata'] = lakesdata
lakes_shapefile.pop('filename')
lakarr2d = make_lakarr2d(self.modelgrid, **lakes_shapefile)
kwargs = get_input_arguments(lakes_shapefile, make_lakarr2d)
lakarr2d = make_lakarr2d(self.modelgrid, **kwargs)
self._lakarr_2d = lakarr2d
self._set_isbc2d()

Expand Down
5 changes: 4 additions & 1 deletion mfsetup/mfnwt_defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,10 @@ lak:
source_data:
littoral_zone_buffer_width: 20
output_files:
lookup_file: 'lak_lookup.csv'
lookup_file: '{}_lak_lookup.csv' # output file that maps lake ids to source polygon feature ids
lak_polygons_shapefile: '{}_lak_polygons.shp'
connections_lookup_file: '{}_lak_connections_lookup.csv' # output file that maps lake/gw connections to zones
connections_shapefile: '{}_lak_cells.shp'

sfr:

Expand Down
10 changes: 8 additions & 2 deletions mfsetup/tdis.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ def parse_perioddata_groups(perioddata_dict,
data = defaults.copy()
data.update(v)
if is_valid_perioddata(data):
data = get_input_arguments(data, setup_perioddata_group)
data = get_input_arguments(data, setup_perioddata_group,
errors='raise')
perioddata_groups.append(data)
else:
print_item(k, data)
Expand Down Expand Up @@ -707,7 +708,12 @@ def aggregate_dataframe_to_stress_period(data, id_column, data_column, datetime_
aggregated.reset_index(inplace=True)

# add datetime back in
aggregated['start_datetime'] = pd.Timestamp(start) if start is not None else start_datetime
aggregated['start_datetime'] = start if start is not None else start_datetime
# enforce consistent datetime dtypes
# (otherwise pd.concat of multiple outputs from this function may fail)
for col in 'start_datetime', 'end_datetime':
if col in aggregated.columns:
aggregated[col] = aggregated[col].astype('datetime64[ns]')

# drop original datetime column, which doesn't reflect dates for period averages
drop_cols = [datetime_column]
Expand Down
1 change: 1 addition & 0 deletions mfsetup/tests/data/pfl_nwt_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ lak:
filename: 'plainfieldlakes/source_data/all_lakes.shp'
id_column: 'HYDROID'
include_ids: [600054357, 600054355, 600054434, 600054319] # list of WDNR HYDROIDs
name_column: 'Name'
bathymetry_raster:
filename: 'plainfieldlakes/source_data/pfl_bathymetry.tif'
length_units: 'meters'
Expand Down
5 changes: 3 additions & 2 deletions mfsetup/tests/test_pfl_mfnwt_inset.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,8 @@ def test_lak_setup(pfl_nwt_with_dis):
assert not np.any(np.isnan(lak.bdlknc.array))
assert np.any(lak.bdlknc.array == m.cfg['lak']['source_data']['littoral_leakance'])
assert np.any(lak.bdlknc.array == m.cfg['lak']['source_data']['profundal_leakance'])
assert os.path.exists(m.cfg['lak']['output_files']['lookup_file'])
lookup_file = Path(m._tables_path, Path(m.cfg['lak']['output_files']['lookup_file']).name)
assert lookup_file.exists()
assert lak.lakarr.array.sum() > 0
tabfiles = m.cfg['lak']['tab_files']
for f in tabfiles:
Expand Down Expand Up @@ -492,7 +493,7 @@ def test_lak_setup(pfl_nwt_with_dis):
assert len(ds9_entries) == 6

# check that order in lake lookup file is same as specified in include_ids
lookup = pd.read_csv(m.cfg['lak']['output_files']['lookup_file'])
lookup = pd.read_csv(lookup_file)
include_ids = m.cfg['lak']['source_data']['lakes_shapefile']['include_ids']
assert lookup.feat_id.tolist() == include_ids

Expand Down
8 changes: 7 additions & 1 deletion mfsetup/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def update(d, u):
return d


def get_input_arguments(kwargs, function, verbose=False, warn=False, exclude=None):
def get_input_arguments(kwargs, function, verbose=False, warn=False, errors='coerce',
exclude=None):
"""Return subset of keyword arguments in kwargs dict
that are valid parameters to a function or method.

Expand Down Expand Up @@ -91,6 +92,11 @@ def get_input_arguments(kwargs, function, verbose=False, warn=False, exclude=Non
for k, v in not_arguments.items():
#print('{}: {}'.format(k, v))
print_item(k, v)
if errors == 'raise' and len(not_arguments) > 0:
raise ValueError(
f'Invalid input arguments to {function.__name__}(): '
f"{', '.join(not_arguments.keys())}\n"
f"Valid arguments: {', '.join(params.parameters.keys())}")
if verbose:
print('\n')
return input_kwargs
Expand Down