From 75111870f2f63a82eaae6b65cfd52d9e2e7c4d9d Mon Sep 17 00:00:00 2001
From: Robert  Wilson <robertwilson190@gmail.com>
Date: Wed, 13 Nov 2024 14:09:50 +0000
Subject: [PATCH] add benthic oxygen consumption

---
 ecoval/__init__.py               |  2 ++
 ecoval/data/point_template.ipynb | 35 ++++++++++++++++++--------------
 ecoval/matchall.py               | 18 +++++++++++-----
 ecoval/parsers.py                | 14 ++++++++++++-
 4 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/ecoval/__init__.py b/ecoval/__init__.py
index b87812b..bc59329 100755
--- a/ecoval/__init__.py
+++ b/ecoval/__init__.py
@@ -587,6 +587,8 @@ def validate(title="Automated model evaluation", author=None, variables = "all",
                     Variable = "pCO2"
                 if vv == "benbio":
                     Variable = "macrobenthos biomass"
+                if vv == "oxycons":
+                    Variable = "oxygen consumption"
                 if vv == "susfrac":
                     Variable = "suspension feeding fraction"
                 if vv == "pft":
diff --git a/ecoval/data/point_template.ipynb b/ecoval/data/point_template.ipynb
index 2ceef51..3bb4117 100644
--- a/ecoval/data/point_template.ipynb
+++ b/ecoval/data/point_template.ipynb
@@ -48,7 +48,7 @@
    "source": [
     "variable = \"point_variable\".lower()\n",
     "vv_name = variable\n",
-    "if vv_name in [\"benbio\", \"carbon\", \"susfrac\"]:\n",
+    "if vv_name in [\"benbio\", \"carbon\", \"susfrac\", \"oxycons\"]:\n",
     "    compact = True\n",
     "if vv_name.lower() == \"ph\":\n",
     "    vv_name = \"pH\"\n",
@@ -58,6 +58,8 @@
     "    vv_name = \"biomass of macrobenthos\"\n",
     "if vv_name == \"susfrac\":\n",
     "    vv_name = \"Suspension feeding fraction\"\n",
+    "if vv_name == \"oxycons\":\n",
+    "    vv_name = \"Benthic oxygen consumption\"\n",
     "layer = \"point_layer\"\n",
     "# get the units. File inspection could be randomized in case people have put loose files in there...\n",
     "import glob\n",
@@ -132,7 +134,7 @@
     "df[\"lat\"] = df[\"lat\"].apply(lambda x: bin_value(x, 0.5))\n",
     "if variable == \"benbio\":\n",
     "    df = df.assign(observation = lambda x: 1000 * 0.45 * x.observation) \n",
-    "if variable not in  [\"carbon\", \"benbio\", \"susfrac\"]:\n",
+    "if variable not in  [\"carbon\", \"benbio\", \"susfrac\", \"oxycons\"]:\n",
     "    df = df.groupby([\"lon\", \"lat\", \"year\", \"month\"]).mean().reset_index()\n",
     "else:\n",
     "    df = df.groupby([\"lon\", \"lat\"]).mean().reset_index()"
@@ -382,7 +384,7 @@
     "  )+\n",
     "    theme(\n",
     "\n",
-    "    legend.position = \"bottom\", legend.direction = \"horizontal\", legend.box = \"horizontal\", legend.key.width = unit(6.0, \"cm\"),\n",
+    "    legend.position = \"bottom\", legend.direction = \"horizontal\", legend.box = \"horizontal\", legend.key.width = unit(3.0, \"cm\"),\n",
     "    legend.key.height = unit(1.0, \"cm\"))\n",
     "    # use ggtext to ensure things are superscripted\n",
     "    #theme(legend.title = element_markdown()) \n",
@@ -783,7 +785,7 @@
    },
    "outputs": [],
    "source": [
-    "if variable not in [\"carbon\", \"benbio\", \"susfrac\"]:\n",
+    "if variable not in [\"carbon\", \"benbio\", \"susfrac\", \"oxycons\"]:\n",
     "    if compact is False:\n",
     "        if layer == \"surface\":\n",
     "            md(f\"**Figure {chapter}{i_figure}**: Simulated versus observed {vv_name} in the top 5 m of the water column. The blue curve is a generalized additive model (GAM) fit to the data, and the black line represents 1-1 relationship between the simulation and observations. The data has been binned to 0.5 degree resolution.\") \n",
@@ -890,7 +892,7 @@
    },
    "outputs": [],
    "source": [
-    "if variable not in [\"carbon\", \"benbio\", \"susfrac\"]:\n",
+    "if variable not in [\"carbon\", \"benbio\", \"susfrac\", \"oxycons\"]:\n",
     "    df_bias = (\n",
     "        df_raw\n",
     "        .assign(bias = lambda x: x.model - x.observation)\n",
@@ -910,7 +912,7 @@
     "else:\n",
     "    # only want annual\n",
     "    df_bias = pd.DataFrame({\"month\": [\"All\"], \"bias\": [df_raw.model.mean() - df_raw.observation.mean()]})\n",
-    "if variable not in [\"carbon\", \"benbio\", \"susfrac\"]:\n",
+    "if variable not in [\"carbon\", \"benbio\", \"susfrac\", \"oxycons\"]:\n",
     "    # now create an rmse dataframe\n",
     "    df_rmse = (\n",
     "        df_raw\n",
@@ -933,7 +935,7 @@
     "df_table = copy.deepcopy(df_bias).merge(df_rmse)\n",
     "df_table = df_table.round(2)\n",
     "# create df_corr\n",
-    "if variable not in [\"carbon\", \"benbio\", \"susfrac\"]:\n",
+    "if variable not in [\"carbon\", \"benbio\", \"susfrac\", \"oxycons\"]:\n",
     "    df_corr = (\n",
     "        df_raw\n",
     "        .groupby(\"month\")\n",
@@ -960,7 +962,7 @@
     "# change Month to Period\n",
     "df_table = df_table.rename(columns={\"Month\": \"Time period\"})\n",
     "\n",
-    "if variable not in [\"carbon\", \"benbio\", \"susfrac\"]:\n",
+    "if variable not in [\"carbon\", \"benbio\", \"susfrac\", \"oxycons\"]:\n",
     "    # add commas to bias and rmse\n",
     "    df_number = df_raw.groupby(\"month\").count().reset_index().loc[:,[\"month\", \"observation\"]]\n",
     "# convert month number to name\n",
@@ -971,7 +973,7 @@
     "\n",
     "# add total number of observations\n",
     "annual_number = len(df_raw)\n",
-    "if variable not in [\"carbon\", \"benbio\", \"susfrac\"]:\n",
+    "if variable not in [\"carbon\", \"benbio\", \"susfrac\", \"oxycons\"]:\n",
     "    df_number = pd.concat([df_number, pd.DataFrame({\"Time period\": [\"All\"], \"Number of observations\": [annual_number]})])\n",
     "# df_number = df_number.append({\"Time period\": \"All\", \"Number of observations\": annual_number}, ignore_index=True)\n",
     "df_table = df_table.merge(df_number)\n",
@@ -1008,7 +1010,7 @@
    },
    "outputs": [],
    "source": [
-    "if variable not in [\"carbon\", \"benbio\", \"susfrac\"]:\n",
+    "if variable not in [\"carbon\", \"benbio\", \"susfrac\", \"oxycons\"]:\n",
     "    df_bias = (\n",
     "        df\n",
     "        .assign(bias = lambda x: x.model - x.observation)\n",
@@ -1028,7 +1030,7 @@
     "else:\n",
     "    # only want annual\n",
     "    df_bias = pd.DataFrame({\"month\": [\"All\"], \"bias\": [df.model.mean() - df.observation.mean()]})\n",
-    "if variable not in [\"carbon\", \"benbio\", \"susfrac\"]:\n",
+    "if variable not in [\"carbon\", \"benbio\", \"susfrac\", \"oxycons\"]:\n",
     "    # now create an rmse dataframe\n",
     "    df_rmse = (\n",
     "        df\n",
@@ -1051,7 +1053,7 @@
     "df_table = copy.deepcopy(df_bias).merge(df_rmse)\n",
     "df_table = df_table.round(2)\n",
     "# create df_corr\n",
-    "if variable not in [\"carbon\", \"benbio\", \"susfrac\"]:\n",
+    "if variable not in [\"carbon\", \"benbio\", \"susfrac\", \"oxycons\"]:\n",
     "    df_corr = (\n",
     "        df\n",
     "        .groupby(\"month\")\n",
@@ -1078,7 +1080,7 @@
     "# change Month to Period\n",
     "df_table = df_table.rename(columns={\"Month\": \"Time period\"})\n",
     "\n",
-    "if variable not in [\"carbon\", \"benbio\", \"susfrac\"]:\n",
+    "if variable not in [\"carbon\", \"benbio\", \"susfrac\", \"oxycons\"]:\n",
     "    # add commas to bias and rmse\n",
     "    df_number = df.groupby(\"month\").count().reset_index().loc[:,[\"month\", \"observation\"]]\n",
     "# convert month number to name\n",
@@ -1089,7 +1091,7 @@
     "\n",
     "# add total number of observations\n",
     "annual_number = len(df)\n",
-    "if variable not in [\"carbon\", \"benbio\", \"susfrac\"]:\n",
+    "if variable not in [\"carbon\", \"benbio\", \"susfrac\", \"oxycons\"]:\n",
     "    df_number = pd.concat([df_number, pd.DataFrame({\"Time period\": [\"All\"], \"Number of observations\": [annual_number]})])\n",
     "# df_number = df_number.append({\"Time period\": \"All\", \"Number of observations\": annual_number}, ignore_index=True)\n",
     "df_table = df_table.merge(df_number)\n",
@@ -1319,7 +1321,10 @@
    "outputs": [],
    "source": [
     "if variable in [\"benbio\", \"susfrac\"]:\n",
-    "    md(\"URL: https://www.vliz.be/vmdcdata/nsbs/about.php\")"
+    "    md(\"URL: https://www.vliz.be/vmdcdata/nsbs/about.php\")\n",
+    "if variable == \"oxycons\":\n",
+    "    md(\"Stratmann, Tanja; Soetaert, Karline; Wei, Chih-Lin et al. (2022). Data from: The SCOC database – a large, open and global database with sediment community oxygen consumption rates [Dataset]. Dryad. https://doi.org/10.5061/dryad.25nd083\")\n",
+    "\n"
    ]
   },
   {
diff --git a/ecoval/matchall.py b/ecoval/matchall.py
index db2a19d..abd0d64 100644
--- a/ecoval/matchall.py
+++ b/ecoval/matchall.py
@@ -38,7 +38,8 @@
     "carbon",
     "benbio",
     "benthic_carbon_flux",
-    "mesozoo"
+    "mesozoo",
+    "oxycons"
 
 ]
 
@@ -152,7 +153,11 @@ def mm_match(
 
                         ds.set_units({"total":"kg/m3"})
                     else:
-                        ds.sum_all()
+                        if variable == "oxycons":
+                            #Y2_fYG3c/12.011 + Y3_fYG3c/12.011 + Y4_fYG3c/12.011 + H1_fHG3c/12.011 + H2_fHG3c/12.011 + 2.0 * ben_nit_nrate
+                            ds.assign(total = lambda x: x.Ymacro_fYG3c_result/12.011 + x.Y4_fYG3c/12.011 + x.H1_fHG3c/12.011 + x.H2_fHG3c/12.011 + 2.0 * x.ben_nit_nrate, drop = True) 
+                        else:
+                            ds.sum_all()
 
             if len(df_locs) > 0:
                 if top_layer:
@@ -285,7 +290,7 @@ def extract_variable_mapping(folder, exclude=[], n_check=None, fvcom = False):
                         pass
         options = glob.glob(new_directory + "/**.nc")
         if True:
-            options = [x for x in options if "part" not in os.path.basename(x)]
+            #options = [x for x in options if "part" not in os.path.basename(x)]
             options = [x for x in options if "restart" not in os.path.basename(x)]
 
         if len([x for x in options if ".nc" in x]) > 0:
@@ -1288,7 +1293,7 @@ def write_report(x):
                 return None
     
     # check if benbio is in all_df variable column
-    if "benbio" in list(all_df.variable):
+    if "benbio" in point_benthic:
         # add another column using benbio row
         df_benbio = all_df.query("variable == 'benbio'").reset_index(drop=True)
         df_benbio["variable"] = ["susfrac"]
@@ -1617,6 +1622,8 @@ def point_match(variable, layer="all", ds_depths=None, df_times = None):
                                     "variable == @point_variable"
                                 ).model_variable
                             )[0]
+
+
                             if session_info["user_dir"]:
                                 paths = glob.glob(
                                     f"{obs_dir}/point/user/**/{variable}/**{variable}**.feather"
@@ -1629,6 +1636,7 @@ def point_match(variable, layer="all", ds_depths=None, df_times = None):
                                 paths = glob.glob(
                                     f"{obs_dir}/point/nws/**/{variable}/**{variable}**.feather"
                                 )
+
                             if variable == "pft":
                                 point_variable = "pft"
                             source = os.path.basename(paths[0]).split("_")[0]
@@ -1707,7 +1715,7 @@ def point_match(variable, layer="all", ds_depths=None, df_times = None):
                                 df = df.merge(df_include).reset_index(drop=True)
                             sel_these = point_time_res
                             sel_these = [x for x in df.columns if x in sel_these]
-                            if variable not in ["carbon", "benbio", "susfrac"]:
+                            if variable not in ["carbon", "benbio", "susfrac", "oxycons"]:
                                 paths = list(
                                     set(
                                         df.loc[:, sel_these]
diff --git a/ecoval/parsers.py b/ecoval/parsers.py
index a923222..0c26eae 100644
--- a/ecoval/parsers.py
+++ b/ecoval/parsers.py
@@ -56,7 +56,8 @@ def generate_mapping(ds, fvcom = False):
         "nano",
         "pico",
         "benthic_carbon_flux",
-        "mesozoo"
+        "mesozoo",
+        "oxycons"
     ]
     if fvcom is False:
         ds1 = nc.open_data(ds[0], checks=False)
@@ -221,6 +222,14 @@ def generate_mapping(ds, fvcom = False):
                 for x in ds_contents.long_name
                 if "chloroph" in x.lower() and "pico" in x
             ]
+        
+        if vv == "oxycons":
+            oxy_con_vars = list(set(["Ymacro_fYG3c_result", "Y4_fYG3c", "H1_fHG3c", "H2_fHG3c", "ben_nit_nrate"]))
+            the_vars = [x for x in ds_contents_top.variable if x in oxy_con_vars]
+            print(the_vars)
+            if len(the_vars) != len(oxy_con_vars):
+                the_vars = []
+
 
         if vv in ["carbon", "benbio", "benthic_carbon_flux"]:
             model_vars = ds_contents_top.query("long_name in @the_vars").variable
@@ -241,6 +250,8 @@ def generate_mapping(ds, fvcom = False):
                     ).variable
 
         add = True
+        if vv == "oxycons":
+            model_vars = the_vars
 
         if len(model_vars) > 1 and vv not in [
             "doc",
@@ -248,6 +259,7 @@ def generate_mapping(ds, fvcom = False):
             "carbon",
             "benbio",
             "micro",
+            "oxycons"
         ]:
             add = False