NOAA-OWP · JoshCu · Aug 22, 2024 · Aug 23, 2024 · Aug 23, 2024 · Aug 27, 2024
diff --git a/.github/workflows/ubuntu-latest.yml b/.github/workflows/ubuntu-latest.yml
@@ -40,7 +40,7 @@ jobs:
         sudo apt-get install libstdc++-10-dev libgfortran-10-dev glibc-source openmpi-bin openmpi-common libopenmpi-dev libopenmpi3 libgtk-3-bin libgtk-3-common libgtk-3-dev -y
         sudo apt-get install netcdf-bin libnetcdf-dev libnetcdff-dev libnetcdf-c++4 libnetcdf-c++4-dev -y
         python -m pip install --upgrade pip
-        pip3 install wheel dask pyproj fiona bmipy opencv-contrib-python-headless
+        pip3 install wheel dask pyproj bmipy opencv-contrib-python-headless
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
 
     - name: Install t-route

diff --git a/.gitignore b/.gitignore
@@ -53,6 +53,7 @@ __pycache__/
 .env
 .python-version
 .ipynb_checkpoints/
+build/
 
 # pyenv #
 #########

diff --git a/readme.md b/readme.md
@@ -66,7 +66,6 @@ joblib
 toolz
 Cython
 pyyaml
-geopandas
 pyarrow
 deprecated
 ```
@@ -84,7 +83,7 @@ To get a sense of the operation of the routing scheme, follow this sequence of c
 
 ```shell
 # install required python modules
-pip3 install numpy pandas xarray netcdf4 joblib toolz pyyaml Cython>3,!=3.0.4 geopandas pyarrow deprecated wheel
+pip3 install numpy pandas xarray netcdf4 joblib toolz pyyaml Cython>3,!=3.0.4 pyarrow deprecated wheel
 
 # clone t-toute
 git clone --progress --single-branch --branch master http://github.com/NOAA-OWP/t-route.git

diff --git a/requirements.txt b/requirements.txt
@@ -11,4 +11,3 @@ toolz
 joblib
 deprecated
 pyarrow<12.0
-geopandas
diff --git a/src/troute-network/pyproject.toml b/src/troute-network/pyproject.toml
@@ -7,8 +7,6 @@ name = "troute.network"
 version = "0.0.0"
 dependencies = [
     "deprecated",
-    "geopandas",
-    "fiona",
     "joblib",
     "netcdf4",
     "numpy",

diff --git a/src/troute-network/tests/make_test_network.py b/src/troute-network/tests/make_test_network.py
@@ -1,5 +1,5 @@
-import geopandas as gpd
 import pandas as pd
+import sqlite3
 from pathlib import Path
 import sys
 
@@ -45,9 +45,11 @@ def make_network_from_segment(flowpaths, edges, attributes, depth, segment):
     sub_edges.drop('geometry', axis=1).to_json("flowpath_edge_list.json", orient='records', indent=2)
 
 def make_network_from_geopkg(file_path, depth, segment=None):
-    flowpaths = gpd.read_file(file_path, layer="flowpaths")
-    attributes = gpd.read_file(file_path, layer="flowpath_attributes")
-    edges = gpd.read_file(file_path, layer="flowpath_edge_list")
+    with sqlite3.connect(file_path) as conn:
+        flowpaths = pd.read_sql_query("SELECT * FROM flowpaths", file_path)
+        attributes = pd.read_sql_query("SELECT * FROM flowpath_attributes", file_path)
+        edges = pd.read_sql_query("SELECT * FROM flowpath_edge_list", file_path)
+
     if segment is None:
         segment = flowpaths[flowpaths['toid'].str.startswith('tnex')].iloc[0]['id']
     make_network_from_segment(flowpaths, edges, attributes, depth, segment)

diff --git a/src/troute-network/troute/AbstractNetwork.py b/src/troute-network/troute/AbstractNetwork.py
@@ -926,13 +926,7 @@ def filter_diffusive_nexus_pts(self,):
         diff_tw_ids = ['nex-' + str(s) for s in diff_tw_ids]
         nexus_latlon = nexus_latlon[nexus_latlon['id'].isin(diff_tw_ids)]
         nexus_latlon['id'] = nexus_latlon['id'].str.split('-',expand=True).loc[:,1].astype(float).astype(int)
-        lat_lon_crs = nexus_latlon[['id','geometry']]
-        lat_lon_crs = lat_lon_crs.to_crs(crs=4326)
-        lat_lon_crs['lon'] = lat_lon_crs.geometry.x
-        lat_lon_crs['lat'] = lat_lon_crs.geometry.y
-        lat_lon_crs['crs'] = str(lat_lon_crs.crs)
-        lat_lon_crs = lat_lon_crs[['lon','lat','crs']]
-        self._nexus_latlon = nexus_latlon[['id']].join(lat_lon_crs)
+        self._nexus_latlon = nexus_latlon[['id','lon','lat', 'crs']]
 
 def get_timesteps_from_nex(nexus_files):
     # Return a list of output files

diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py
@@ -1,7 +1,6 @@
 from .AbstractNetwork import AbstractNetwork
 import pandas as pd
 import numpy as np
-import geopandas as gpd
 import time
 import json
 from pathlib import Path
@@ -13,7 +12,7 @@
 from datetime import datetime
 from pprint import pformat
 import os
-import fiona
+import sqlite3
 import troute.nhd_io as nhd_io #FIXME
 from troute.nhd_network import reverse_dict, extract_connections, reverse_network, reachable
 from .rfc_lake_gage_crosswalk import get_rfc_lake_gage_crosswalk, get_great_lakes_climatology
@@ -31,12 +30,17 @@ def find_layer_name(layers, pattern):
     return None
 
 def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool):
-    # Retrieve available layers from the GeoPackage
-    available_layers = fiona.listlayers(file_path)
+    # Retrieve available layers from the GeoPackage 
+    with sqlite3.connect(file_path) as conn:
+        # gpkg_contents, gpkg_ogr_contents, and sqlite_sequence all contain the layer names
+        result = conn.execute("SELECT table_name FROM gpkg_contents;").fetchall()
+    # fetchall returns a list tuples
+    available_layers = [r[0] for r in result]
 
     # patterns for the layers we want to find
     layer_patterns = {
-        'flowpaths': r'flow[-_]?paths?|flow[-_]?lines?',
+        # without $ flowpaths would match also flowpath_attributes
+        'flowpaths': r'flow[-_]?paths?$|flow[-_]?lines?$',
         'flowpath_attributes': r'flow[-_]?path[-_]?attributes?|flow[-_]?line[-_]?attributes?',
         'lakes': r'lakes?',
         'nexus': r'nexus?',
@@ -65,15 +69,43 @@ def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool):
     if hybrid_parameters.get('run_hybrid_routing', False) and 'nexus' not in layers_to_read:
         layers_to_read.append('nexus')
 
-    # Function that read a layer from the geopackage
+    # Function that reads a layer from the geopackage
     def read_layer(layer_name):
-        if layer_name:
-            try:
-                return gpd.read_file(file_path, layer=layer_name)
-            except Exception as e:
-                print(f"Error reading {layer_name}: {e}")
-                return pd.DataFrame()
-        return pd.DataFrame()
+        if not layer_name:
+            return pd.DataFrame()
+        try:
+            with sqlite3.connect(file_path) as conn:
+                has_spatial_metadata = False
+                # try and get the name of the geometry column and its crs
+                geometry_columns = conn.execute(f"""
+                SELECT c.column_name,g.definition
+                FROM gpkg_geometry_columns AS c
+                JOIN gpkg_spatial_ref_sys AS g ON c.srs_id = g.srs_id 
+                WHERE c.table_name = '{layer_name}'""").fetchall()
+
+                if len(geometry_columns) > 0:
+                    has_spatial_metadata = True
+                    geom_column = geometry_columns[0][0]
+                    crs = geometry_columns[0][1]
+
+                if has_spatial_metadata:
+                    # select everything from the layer, + the midpoint of its bounding box
+                    # decoding the geometry blob can be done as it's just gpkg header + WKB
+                    # the rtree table calculation is much faster
+                    sql_query = f"""SELECT d.*,
+                        (r.minx + r.maxx) / 2.0 AS lon,
+                        (r.miny + r.maxy) / 2.0 AS lat
+                        FROM {layer_name} AS d
+                        JOIN rtree_{layer_name}_{geom_column} AS r ON d.fid = r.id"""
+                    df = pd.read_sql_query(sql_query, conn)
+                    df['crs'] = crs
+                    return df
+                else:
+                    return pd.read_sql_query(f"SELECT * FROM {layer_name}", conn)
+        except Exception as e:
+            print(f"Error reading layer {layer_name} from {file_path}: {e}")
+            return pd.DataFrame()
+
 
     # Retrieve geopackage information using matched layer names
     if cpu_pool > 1:
@@ -83,26 +115,34 @@ def read_layer(layer_name):
         table_dict = {layers_to_read[i]: gpkg_list[i] for i in range(len(layers_to_read))}
     else:
         table_dict = {layer: read_layer(matched_layers[layer]) for layer in layers_to_read}
-
-    # Handle different key column names between flowpaths and flowpath_attributes
-    flowpaths_df = table_dict.get('flowpaths', pd.DataFrame())
-    flowpath_attributes_df = table_dict.get('flowpath_attributes', pd.DataFrame())
-
-    # Check if 'link' column exists and rename it to 'id'
-    if 'link' in flowpath_attributes_df.columns:
-        flowpath_attributes_df.rename(columns={'link': 'id'}, inplace=True) 
-
-    # Merge flowpaths and flowpath_attributes 
-    flowpaths = pd.merge(
-        flowpaths_df, 
-        flowpath_attributes_df, 
-        on='id', 
-        how='inner'
-    )
 
     lakes = table_dict.get('lakes', pd.DataFrame())
     network = table_dict.get('network', pd.DataFrame())
     nexus = table_dict.get('nexus', pd.DataFrame())
+
+    # Handle different key column names between flowpaths and flowpath_attributes
+    flowpaths_df = table_dict.get('flowpaths', None)
+    flowpath_attributes_df = table_dict.get('flowpath_attributes', None)
+
+    if flowpath_attributes_df is not None:
+        # Check if 'link' column exists and rename it to 'id'
+        if 'link' in flowpath_attributes_df.columns:
+            flowpath_attributes_df.rename(columns={'link': 'id'}, inplace=True) 
+
+    if flowpaths_df is not None and flowpath_attributes_df is not None:
+        # Merge flowpaths and flowpath_attributes 
+        flowpaths = pd.merge(
+            flowpaths_df, 
+            flowpath_attributes_df, 
+            on='id', 
+            how='inner'
+        )
+    elif flowpaths_df is not None:
+        flowpaths = flowpaths_df
+    elif flowpath_attributes_df is not None:
+        flowpaths = flowpath_attributes_df
+    else:
+        raise ValueError("No flowpaths or flowpath_attributes found in the geopackage")
 
     return flowpaths, lakes, network, nexus
 
@@ -126,8 +166,17 @@ def read_json(file_path, edge_list):
     return df_main
 
 def read_geojson(file_path):
-    flowpaths = gpd.read_file(file_path)
-    return flowpaths
+    with open(file_path) as f:
+        data = json.load(f)
+    data = data['features']
+    df = pd.json_normalize(data,max_level=1)
+    df.columns = df.columns.str.replace('properties.','')
+    df = df.drop(columns=['type'])
+    # Geometry type e.g. MULTIPOLYGON, is stored in geometry.type
+    # and the coordinates are stored in geometry.coordinates
+    # crs stored in data['crs'] e.g.
+    # data['crs'] = { "type": "name", "properties": { "name": "urn:ogc:def:crs:EPSG::5070" } }
+    return df
 
 def numeric_id(flowpath):
     id = flowpath['key'].split('-')[-1]
@@ -138,15 +187,18 @@ def numeric_id(flowpath):
 
 def read_ngen_waterbody_df(parm_file, lake_index_field="wb-id", lake_id_mask=None):
     """
+    FIXME FUNCTION NEVER CALLED
     Reads .gpkg or lake.json file and prepares a dataframe, filtered
     to the relevant reservoirs, to provide the parameters
     for level-pool reservoir computation.
     """
     def node_key_func(x):
         return int( x.split('-')[-1] )
     if Path(parm_file).suffix=='.gpkg':
-        df = gpd.read_file(parm_file, layer='lakes')
-
+        # This can be made more efficient by reading only the necessary columns
+        # but I don't have a reference for what columns remain after dropping
+        with sqlite3.connect(parm_file) as conn:
+            df = pd.read_sql_query('SELECT * from lakes', conn)
         df = (
             df.drop(['id','toid','hl_id','hl_reference','hl_uri','geometry'], axis=1)
             .rename(columns={'hl_link': 'lake_id'})
@@ -164,6 +216,7 @@ def node_key_func(x):
 
 def read_ngen_waterbody_type_df(parm_file, lake_index_field="wb-id", lake_id_mask=None):
     """
+    FIXME FUNCTION NEVER CALLED
     """
     #FIXME: this function is likely not correct. Unclear how we will get 
     # reservoir type from the gpkg files. Information should be in 'crosswalk'
@@ -173,7 +226,8 @@ def node_key_func(x):
         return int( x.split('-')[-1] )
 
     if Path(parm_file).suffix=='.gpkg':
-        df = gpd.read_file(parm_file, layer="crosswalk").set_index('id')
+        with sqlite3.connect(parm_file) as conn:
+            df = pd.read_sql_query('SELECT * FROM crosswalk', conn)
     elif Path(parm_file).suffix=='.json':
         df = pd.read_json(parm_file, orient="index")
 

diff --git a/test/LowerColorado_TX_v4/run_BMI_Coastal.py b/test/LowerColorado_TX_v4/run_BMI_Coastal.py
@@ -3,8 +3,6 @@
 import os
 import numpy as np
 import pandas as pd
-import geopandas as gpd
-import pickle
 from datetime import datetime, timedelta
 import time
 
@@ -20,7 +18,7 @@
 #import troute_model
 
 from troute.HYFeaturesNetwork import HYFeaturesNetwork
-from troute.AbstractNetwork import *
+from troute.AbstractNetwork import read_coastal_output
 
 import bmi_df2array as df2a
 

diff --git a/test/LowerColorado_TX_v4/run_with_BMI.py b/test/LowerColorado_TX_v4/run_with_BMI.py
@@ -3,8 +3,6 @@
 import os
 import numpy as np
 import pandas as pd
-import geopandas as gpd
-import pickle
 from datetime import datetime, timedelta
 import time