NOAA-OWP · kumdonoaa · Dec 16, 2022 · shorvath-noaa · Dec 19, 2022 · kumdonoaa
diff --git a/src/troute-network/troute/hyfeature_network_utilities.py b/src/troute-network/troute/hyfeature_network_utilities.py
@@ -11,6 +11,7 @@
 from joblib import delayed, Parallel
 import pyarrow as pa
 import pyarrow.parquet as pq
+import xarray as xr
 
 import troute.nhd_io as nhd_io
 
@@ -23,11 +24,12 @@ def build_forcing_sets(
     t0
 ):
 
-    run_sets           = forcing_parameters.get("qlat_forcing_sets", None)
-    nexus_input_folder = forcing_parameters.get("nexus_input_folder", None)
-    nts                = forcing_parameters.get("nts", None)
-    max_loop_size      = forcing_parameters.get("max_loop_size", 12)
-    dt                 = forcing_parameters.get("dt", None)
+    run_sets                         = forcing_parameters.get("qlat_forcing_sets", None)
+    nexus_input_folder               = forcing_parameters.get("nexus_input_folder", None)
+    downstream_boundary_input_folder = forcing_parameters.get("downstream_boundary_input_folder", None)
+    nts                              = forcing_parameters.get("nts", None)
+    max_loop_size                    = forcing_parameters.get("max_loop_size", 12)
+    dt                               = forcing_parameters.get("dt", None)
 
     try:
         nexus_input_folder = pathlib.Path(nexus_input_folder)
@@ -38,7 +40,8 @@ def build_forcing_sets(
         raise AssertionError("Aborting simulation because the nexus_input_folder:", qlat_input_folder,"does not exist. Please check the the nexus_input_folder variable is correctly entered in the .yaml control file") from None
 
     forcing_glob_filter = forcing_parameters.get("nexus_file_pattern_filter", "*.NEXOUT")
-
+    downstream_boundary_glob_filter = forcing_parameters.get("downstream_boundary_file_pattern_filter", "*SCHISM.nc")
+
     if forcing_glob_filter=="nex-*":
         print("Reformating qlat nexus files as hourly binary files...")
         binary_folder = forcing_parameters.get('binary_nexus_file_folder', None)
@@ -161,6 +164,64 @@ def build_forcing_sets(
             k += max_loop_size
             j += 1
 
+    # creates downstream boundary forcing file list
+    if downstream_boundary_input_folder:
+        # get the first and seconded files from an ordered list of all forcing files
+        downstream_boundary_input_folder = pathlib.Path(downstream_boundary_input_folder)
+        all_files = sorted(downstream_boundary_input_folder.glob(downstream_boundary_glob_filter))
+        first_file = all_files[0]
+        second_file = all_files[1]
+
+        # Deduce the timeinterval of the forcing data from the output timestamps of the first
+        df     = read_file(first_file)
+        t1_str = pd.to_datetime(df.time.iloc[0]).strftime("%Y-%m-%d_%H:%M:%S")
+        t1     = datetime.strptime(t1_str,"%Y-%m-%d_%H:%M:%S")
+        df     = read_file(second_file)
+        t2_str = pd.to_datetime(df.time.iloc[0]).strftime("%Y-%m-%d_%H:%M:%S")
+        t2     = datetime.strptime(t2_str,"%Y-%m-%d_%H:%M:%S")
+        dt_downstream_boundary_timedelta = t2 - t1
+        dt_downstream_boundary           = dt_downstream_boundary_timedelta.seconds   
+
+        bts_subdivisions = dt_downstream_boundary / dt
+
+        # the number of files required for the simulation
+        # For example, for 4 hrs total simulation and 2 hr max_loop, two sets of boundary data one at (t0, t0+1hr, t0+2hr) and 
+        # the other at (t0+2hr, t0+3hr, t0+4hr) are needed.
+        nfiles = int(np.ceil(nts / bts_subdivisions)) + 1
+
+        # list of downstream boundary file datetimes
+        datetime_list = [t0 + dt_downstream_boundary_timedelta * (n) for n in
+                         range(nfiles)]        
+        datetime_list_str = [datetime.strftime(d, '%Y%m%d%H%M') for d in
+                             datetime_list]
+        # list of downstream boundary files
+        downstream_boundary_filename_list = [d_str + downstream_boundary_glob_filter[1:] for d_str in
+                                 datetime_list_str]  
+
+        # check that all forcing files exist
+        for f in downstream_boundary_filename_list:
+            try:
+                J = pathlib.Path(downstream_boundary_input_folder.joinpath(f))     
+                assert J.is_file() == True
+            except AssertionError:
+                raise AssertionError("Aborting simulation because downstream boundary file", J, "cannot be not found.") from None
+
+        # build run sets list
+        #run_sets = []
+        k = 0
+        j = 0
+        nts_accum = 0
+        nts_last = 0
+
+        while k < len(downstream_boundary_filename_list)-1:
+            if k + max_loop_size < len(downstream_boundary_filename_list):
+                run_sets[j]['downstream_boundary_files'] = downstream_boundary_filename_list[k:k
+                    + max_loop_size+1]                
+            else:
+                run_sets[j]['downstream_boundary_files'] = downstream_boundary_filename_list[k:]
+            k += max_loop_size
+            j += 1
+
     return run_sets
 
 def build_qlateral_array(
@@ -296,5 +357,8 @@ def read_file(file_name):
     elif extension=='.parquet':
         df = pq.read_table(file_name).to_pandas().reset_index()
         df.index.name = None
+    elif extension=='.nc':
+        df = xr.open_dataset(file_name)
+        df = df.to_dataframe()    
 
     return df
diff --git a/src/troute-network/troute/hyfeature_preprocess.py b/src/troute-network/troute/hyfeature_preprocess.py
@@ -694,30 +694,31 @@ def hyfeature_forcing(
     """
 
     # Unpack user-specified forcing parameters
-    dt                           = forcing_parameters.get("dt", None)
-    qts_subdivisions             = forcing_parameters.get("qts_subdivisions", None)
-    nexus_input_folder           = forcing_parameters.get("nexus_input_folder", None)
-    qlat_file_index_col          = forcing_parameters.get("qlat_file_index_col", "feature_id")
-    qlat_file_value_col          = forcing_parameters.get("qlat_file_value_col", "q_lateral")
-    qlat_file_gw_bucket_flux_col = forcing_parameters.get("qlat_file_gw_bucket_flux_col", "qBucket")
-    qlat_file_terrain_runoff_col = forcing_parameters.get("qlat_file_terrain_runoff_col", "qSfcLatRunoff")
+    dt                               = forcing_parameters.get("dt", None)
+    qts_subdivisions                 = forcing_parameters.get("qts_subdivisions", None)
+    nexus_input_folder               = forcing_parameters.get("nexus_input_folder", None)
+    qlat_file_index_col              = forcing_parameters.get("qlat_file_index_col", "feature_id")
+    qlat_file_value_col              = forcing_parameters.get("qlat_file_value_col", "q_lateral")
+    qlat_file_gw_bucket_flux_col     = forcing_parameters.get("qlat_file_gw_bucket_flux_col", "qBucket")
+    qlat_file_terrain_runoff_col     = forcing_parameters.get("qlat_file_terrain_runoff_col", "qSfcLatRunoff")
+    downstream_boundary_input_folder = forcing_parameters.get("downstream_boundary_input_folder", None)
 
 
     # TODO: find a better way to deal with these defaults and overrides.
-    run["t0"]                           = run.get("t0", t0)
-    run["nts"]                          = run.get("nts")
-    run["dt"]                           = run.get("dt", dt)
-    run["qts_subdivisions"]             = run.get("qts_subdivisions", qts_subdivisions)
-    run["nexus_input_folder"]           = run.get("nexus_input_folder", nexus_input_folder)
-    run["qlat_file_index_col"]          = run.get("qlat_file_index_col", qlat_file_index_col)
-    run["qlat_file_value_col"]          = run.get("qlat_file_value_col", qlat_file_value_col)
-    run["qlat_file_gw_bucket_flux_col"] = run.get("qlat_file_gw_bucket_flux_col", qlat_file_gw_bucket_flux_col)
-    run["qlat_file_terrain_runoff_col"] = run.get("qlat_file_terrain_runoff_col", qlat_file_terrain_runoff_col)
+    run["t0"]                               = run.get("t0", t0)
+    run["nts"]                              = run.get("nts")
+    run["dt"]                               = run.get("dt", dt)
+    run["qts_subdivisions"]                 = run.get("qts_subdivisions", qts_subdivisions)
+    run["nexus_input_folder"]               = run.get("nexus_input_folder", nexus_input_folder)
+    run["qlat_file_index_col"]              = run.get("qlat_file_index_col", qlat_file_index_col)
+    run["qlat_file_value_col"]              = run.get("qlat_file_value_col", qlat_file_value_col)
+    run["qlat_file_gw_bucket_flux_col"]     = run.get("qlat_file_gw_bucket_flux_col", qlat_file_gw_bucket_flux_col)
+    run["qlat_file_terrain_runoff_col"]     = run.get("qlat_file_terrain_runoff_col", qlat_file_terrain_runoff_col)
+    run["downstream_boundary_input_folder"] = run.get("downstream_boundary_input_folder", downstream_boundary_input_folder)
 
     #---------------------------------------------------------------------------
     # Assemble lateral inflow data
     #---------------------------------------------------------------------------
-
     start_time = time.time()
     LOG.info("Creating a DataFrame of lateral inflow forcings ...")
 
@@ -741,7 +742,8 @@ def hyfeature_forcing(
     # Assemble coastal coupling data [WIP]
     #---------------------------------------------------------------------
     # Run if coastal_boundary_depth_df has not already been created:
-    if coastal_boundary_depth_df.empty:
+    #if coastal_boundary_depth_df.empty:
+    if 1==1:
         coastal_boundary_elev_files = forcing_parameters.get('coastal_boundary_input_file', None) 
         coastal_boundary_domain_files = hybrid_parameters.get('coastal_boundary_domain', None)    
 
@@ -750,11 +752,20 @@ def hyfeature_forcing(
             LOG.info("creating coastal dataframe ...")
 
             coastal_boundary_domain   = nhd_io.read_coastal_boundary_domain(coastal_boundary_domain_files)          
+
+            # create dataframe for hourly schism data
+            coastal_boundary_depth_df = nhd_io.build_coastal_dataframe(
+                                                run, 
+                                                coastal_boundary_domain,
+            )                 
+
+            # create dataframe for multi hourly schism data         
+            '''          
             coastal_boundary_depth_df = nhd_io.build_coastal_ncdf_dataframe(
                 coastal_boundary_elev_files,
                 coastal_boundary_domain,
             )
-
+            '''     
             LOG.debug(
                 "coastal boundary elevation observation DataFrame creation complete in %s seconds." \
                 % (time.time() - start_time)

diff --git a/src/troute-network/troute/nhd_io.py b/src/troute-network/troute/nhd_io.py
@@ -1563,11 +1563,12 @@ def build_coastal_dataframe(coastal_boundary_elev):
     )
     return coastal_df
 
-
+'''
 def build_coastal_ncdf_dataframe(coastal_ncdf):
     with xr.open_dataset(coastal_ncdf) as ds:
         coastal_ncdf_df = ds[["elev", "depth"]]
         return coastal_ncdf_df.to_dataframe()
+'''
 
 def build_coastal_ncdf_dataframe(
                                 coastal_files, 
@@ -1594,7 +1595,7 @@ def build_coastal_ncdf_dataframe(
     tfin         =  start_date + dt_timeslice*len(timesteps)
     timestamps   = pd.date_range(start_date, tfin, freq=dt_timeslice)
     timestamps   = timestamps.strftime('%Y-%m-%d %H:%M:%S')
-
+    import pdb; pdb.set_trace()
     # create a dataframe of water depth at coastal domain nodes
     timeslice_schism_list=[]
     for t in range(0, len(timesteps)+1):
@@ -1619,6 +1620,45 @@ def build_coastal_ncdf_dataframe(
     # linearly extrapolate depth value at start date
     coastal_boundary_depth_df.iloc[:,0] = 2.0*coastal_boundary_depth_df.iloc[:,1] - coastal_boundary_depth_df.iloc[:,2]   
 
+    return coastal_boundary_depth_df   
+
+def build_coastal_dataframe(
+                            run, 
+                            coastal_boundary_domain,
+                            ):
+
+    downstream_boundary_input_folder = run.get("downstream_boundary_input_folder",None)
+
+    if downstream_boundary_input_folder:
+        downstream_boundary_input_folder = pathlib.Path(downstream_boundary_input_folder)
+        if "downstream_boundary_files" in run:
+            downstream_boundary_files = run.get("downstream_boundary_files")
+            downstream_boundary_files = [downstream_boundary_input_folder.joinpath(f) for f in downstream_boundary_files]
+
+    timeslice_schism_list=[]        
+    for f in downstream_boundary_files:
+        ds = xr.open_dataset(f)
+        df = ds.to_dataframe()
+        tws   = []
+        timestamps = []
+        depths= []
+        for tw, boundary_node in coastal_boundary_domain.items():
+            tws.append(tw)
+            df2 = df[df['schism_id']==boundary_node]
+            date = df2.time.iloc[0]
+            timestamps.append(pd.to_datetime(date).strftime('%Y-%m-%d %H:%M:%S'))
+            depths.append(df2.elev.iat[0] + df2.depth.iat[0])             
+        timeslice_schism  = (pd.DataFrame({
+                                            'stationId' : tws,
+                                            'datetime'  : timestamps,
+                                            'depth'     : depths
+                                            }).
+                                             set_index(['stationId', 'datetime']).
+                                             unstack(1, fill_value = np.nan)['depth'])
+        timeslice_schism_list.append(timeslice_schism)               
+    import pdb; pdb.set_trace()
+    coastal_boundary_depth_df = pd.concat(timeslice_schism_list, axis=1, ignore_index=False)          
+
     return coastal_boundary_depth_df    
 
 def lastobs_df_output(

diff --git a/src/troute-network/troute/nhd_network_utilities_v02.py b/src/troute-network/troute/nhd_network_utilities_v02.py
@@ -8,6 +8,7 @@
 import numpy as np
 import netCDF4
 from joblib import delayed, Parallel
+import xarray as xr
 
 import troute.nhd_io as nhd_io
 import troute.nhd_network as nhd_network
@@ -253,6 +254,7 @@ def build_forcing_sets(
 
     run_sets = forcing_parameters.get("qlat_forcing_sets", None)
     qlat_input_folder = forcing_parameters.get("qlat_input_folder", None)
+    downstream_boundary_input_folder = forcing_parameters.get("downstream_boundary_input_folder", None)
     nts = forcing_parameters.get("nts", None)
     max_loop_size = forcing_parameters.get("max_loop_size", 12)
     dt = forcing_parameters.get("dt", None)
@@ -266,6 +268,7 @@ def build_forcing_sets(
         raise AssertionError("Aborting simulation because the qlat_input_folder:", qlat_input_folder,"does not exist. Please check the the qlat_input_folder variable is correctly entered in the .yaml control file") from None
 
     forcing_glob_filter = forcing_parameters.get("qlat_file_pattern_filter", "*.CHRTOUT_DOMAIN1")
+    downstream_boundary_glob_filter = forcing_parameters.get("downstream_boundary_file_pattern_filter", "*SCHISM.nc")
 
     # TODO: Throw errors if insufficient input data are available
     if run_sets:
@@ -357,6 +360,64 @@ def build_forcing_sets(
             nts_last = nts_accum
             k += max_loop_size
             j += 1
+
+    # creates downstream boundary forcing file list
+    if downstream_boundary_input_folder:
+        # get the first and seconded files from an ordered list of all forcing files
+        downstream_boundary_input_folder = pathlib.Path(downstream_boundary_input_folder)
+        all_files = sorted(downstream_boundary_input_folder.glob(downstream_boundary_glob_filter))
+        first_file = all_files[0]
+        second_file = all_files[1]
+
+        # Deduce the timeinterval of the forcing data from the output timestamps of the first
+        df     = read_file(first_file)
+        t1_str = pd.to_datetime(df.time.iloc[0]).strftime("%Y-%m-%d_%H:%M:%S")
+        t1     = datetime.strptime(t1_str,"%Y-%m-%d_%H:%M:%S")
+        df     = read_file(second_file)
+        t2_str = pd.to_datetime(df.time.iloc[0]).strftime("%Y-%m-%d_%H:%M:%S")
+        t2     = datetime.strptime(t2_str,"%Y-%m-%d_%H:%M:%S")
+        dt_downstream_boundary_timedelta = t2 - t1
+        dt_downstream_boundary           = dt_downstream_boundary_timedelta.seconds   
+
+        bts_subdivisions = dt_downstream_boundary / dt
+        # the number of files required for the simulation
+        nfiles = int(np.ceil(nts / bts_subdivisions)) + 1
+
+        # list of downstream boundary file datetimes
+        # n+1 because downstream boundary values, for example, during next 2hr simulation are 
+        # values at t0, t0+1hr, t0+2hr and schism currently do not store value at t0 (to be interpolated)
+        datetime_list = [t0 + dt_downstream_boundary_timedelta * (n+1) for n in
+                         range(nfiles)]        
+        datetime_list_str = [datetime.strftime(d, '%Y%m%d%H%M') for d in
+                             datetime_list]
+        # list of downstream boundary files
+        downstream_boundary_filename_list = [d_str + downstream_boundary_glob_filter[1:] for d_str in
+                                 datetime_list_str]  
+
+        # check that all forcing files exist
+        for f in downstream_boundary_filename_list:
+            try:
+                J = pathlib.Path(downstream_boundary_input_folder.joinpath(f))     
+                assert J.is_file() == True
+            except AssertionError:
+                raise AssertionError("Aborting simulation because downstream boundary file", J, "cannot be not found.") from None
+
+        # build run sets list
+        k = 0
+        j = 0
+        nts_accum = 0
+        nts_last = 0
+        while k < len(downstream_boundary_filename_list)-1:
+            #run_sets.append({})
+
+            if k + max_loop_size < len(downstream_boundary_filename_list):
+                run_sets[j]['downstream_boundary_files'] = downstream_boundary_filename_list[k:k
+                    + max_loop_size+1]                
+            else:
+                run_sets[j]['downstream_boundary_files'] = downstream_boundary_filename_list[k:]
+            k += max_loop_size
+            j += 1
+
     return run_sets
 
 def build_qlateral_array(
@@ -872,3 +933,14 @@ def build_refac_connections(diff_network_parameters):
     )
 
     return connections
+
+def read_file(file_name):
+    extension = file_name.suffix
+    if extension=='.csv':
+        df = pd.read_csv(file_name)
+    elif extension=='.parquet':
+        df = pq.read_table(file_name).to_pandas().reset_index()
+        df.index.name = None
+    elif extension=='.nc':
+        df = xr.open_dataset(file_name)
+        df = df.to_dataframe()