Review changes, black/ruff formats. Updated tests. Fixes #300

CABLE-LSM · Oct 9, 2024 · 88056b0 · 88056b0
1 parent 0508cdb
commit 88056b0
Show file tree

Hide file tree

Showing 18 changed files with 285 additions and 178 deletions.
diff --git a/.conda/meta.yaml b/.conda/meta.yaml
@@ -29,5 +29,5 @@ requirements:
         - cerberus >=1.3.5
         - gitpython
         - jinja2
-        - hpcpy>=0.3.0
+        - hpcpy>=0.5.0
         - meorg_client
diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md
@@ -68,6 +68,7 @@ fluxsite:
     walltime: 06:00:00
     storage: [scratch/a00, gdata/xy11]
   multiprocess: True
+  meorg_model_output_id: XXXXXXXX
 ```
 
 ### [experiment](#experiment)
@@ -154,7 +155,7 @@ fluxsite:
 
 ### [multiprocess](#multiprocess)
 
-: **Default:** True, _optional key_. :octicons-dash-24: Enables or disables multiprocessing for executing embarrassingly parallel tasks.
+
 
 ```yaml
 
@@ -163,6 +164,14 @@ fluxsites:
 
 ```
 
+### [meorg_model_output_id](#meorg_model_output_id)
+
+: **Default:** False, _optional key_. :octicons-dash-24: The unique Model Output ID from modelevaluation.org to which output files will be automatically uploaded for analysis.
+
+A separate upload job will be submitted at the successful completion of benchcab tasks if this key is present, however, the validity is not checked by benchcab at this stage.
+
+Note: It is the user's responsbility to ensure the model output is configured on modelevaluation.org.
+
 ## spatial
 
 Contains settings specific to spatial tests.
@@ -493,4 +502,14 @@ codecov:
 [f90nml-github]: https://github.com/marshallward/f90nml
 [environment-modules]: https://modules.sourceforge.net/
 [nci-pbs-directives]: https://opus.nci.org.au/display/Help/PBS+Directives+Explained
-[cable-github]: https://github.com/CABLE-LSM/CABLE
+[cable-github]: https://github.com/CABLE-LSM/CABLE
+
+## meorg_bin
+
+: **Default:** False, _optional key. :octicons-dash-24: Specifies the absolute system path to the ME.org client executable. In the absence of this key it will be inferred from the same directory as benchcab should `meorg_model_output_id` be set in `fluxsite` above.
+
+``` yaml
+
+meorg_bin: /path/to/meorg
+
+```
diff --git a/src/benchcab/benchcab.py b/src/benchcab/benchcab.py
@@ -11,6 +11,7 @@
 from subprocess import CalledProcessError
 from typing import Optional
 
+import benchcab.utils.meorg as bm
 from benchcab import fluxsite, internal, spatial
 from benchcab.comparison import run_comparisons, run_comparisons_in_parallel
 from benchcab.config import read_config
@@ -24,7 +25,6 @@
 from benchcab.model import Model
 from benchcab.utils import is_verbose, task_summary
 from benchcab.utils.fs import mkdir, next_path
-import benchcab.utils.meorg as bm
 from benchcab.utils.pbs import render_job_script
 from benchcab.utils.repo import create_repo
 from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface
@@ -245,15 +245,14 @@ def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None:
         logger.info(f"{internal.FLUXSITE_DIRS['TASKS']}/<task_name>/out.txt")
         logger.info("The NetCDF output for each task is written to:")
         logger.info(f"{internal.FLUXSITE_DIRS['OUTPUT']}/<task_name>_out.nc")
-        
+
         # Upload to meorg by default
         bm.do_meorg(
             config,
-            upload_dir=internal.FLUXSITE_DIRS['OUTPUT'],
+            upload_dir=internal.FLUXSITE_DIRS["OUTPUT"],
             benchcab_bin=str(self.benchcab_exe_path),
-            benchcab_job_id=job_id
+            benchcab_job_id=job_id,
         )
-
 
     def gen_codecov(self, config_path: str):
         """Endpoint for `benchcab codecov`."""
@@ -360,7 +359,7 @@ def fluxsite_run_tasks(self, config_path: str):
         else:
             fluxsite.run_tasks(tasks)
 
-        n_tasks, n_success, n_failed, all_complete = task_summary(tasks)
+        _, n_success, n_failed, _ = task_summary(tasks)
         logger.info(f"{n_failed} failed, {n_success} passed")
 
     def fluxsite_bitwise_cmp(self, config_path: str):
@@ -386,9 +385,9 @@ def fluxsite_bitwise_cmp(self, config_path: str):
             ncpus = config["fluxsite"]["pbs"]["ncpus"]
             run_comparisons_in_parallel(comparisons, n_processes=ncpus)
         else:
-            run_comparisons(comparisons)        
-        
-        n_tasks, n_success, n_failed, all_complete = task_summary(comparisons)
+            run_comparisons(comparisons)
+
+        _, n_success, n_failed, _ = task_summary(comparisons)
         logger.info(f"{n_failed} failed, {n_success} passed")
 
     def fluxsite(self, config_path: str, no_submit: bool, skip: list[str]):

diff --git a/src/benchcab/config.py b/src/benchcab/config.py
@@ -119,6 +119,9 @@ def read_optional_key(config: dict):
     config["fluxsite"]["pbs"] = internal.FLUXSITE_DEFAULT_PBS | config["fluxsite"].get(
         "pbs", {}
     )
+    config["fluxsite"]["meorg_model_output_id"] = config["fluxsite"].get(
+        "meorg_model_output_id", internal.FLUXSITE_DEFAULT_MEORG_MODEL_OUTPUT_ID
+    )
 
     config["codecov"] = config.get("codecov", False)
 

diff --git a/src/benchcab/data/config-schema.yml b/src/benchcab/data/config-schema.yml
@@ -108,8 +108,11 @@ fluxsite:
             type: "string"
             required: false
     meorg_model_output_id:
-      type: "string"
+      type: 
+       - "boolean"
+       - "string"
       required: false
+      default: false
 
 spatial:
   type: "dict"
@@ -137,4 +140,11 @@ spatial:
 
 codecov:
   type: "boolean"
-  required: false
+  required: false
+
+meorg_bin:
+  type:
+   - "boolean"
+   - "string"
+  required: False
+  default: False
diff --git a/src/benchcab/data/test/config-optional.yml b/src/benchcab/data/test/config-optional.yml
@@ -3,6 +3,7 @@ project: hh5
 
 fluxsite:
   experiment: AU-Tum
+  meorg_model_output_id: False
   multiprocess: False
   pbs:
     ncpus: 6

diff --git a/src/benchcab/data/test/integration_meorg.sh b/src/benchcab/data/test/integration_meorg.sh
@@ -46,6 +46,7 @@ fluxsite:
     storage:
       - scratch/$PROJECT
       - gdata/$PROJECT
+  # This ID is currently configured on the me.org server.
   meorg_model_output_id: Sss7qupAHEZ8ovbCv
 EOL
 

diff --git a/src/benchcab/internal.py b/src/benchcab/internal.py
@@ -252,6 +252,7 @@
 }
 
 FLUXSITE_DEFAULT_EXPERIMENT = "forty-two-site-test"
+FLUXSITE_DEFAULT_MEORG_MODEL_OUTPUT_ID = False
 
 OPTIONAL_COMMANDS = ["fluxsite-bitwise-cmp", "gen_codecov"]
 
@@ -275,11 +276,12 @@ def get_met_forcing_file_names(experiment: str) -> list[str]:
 
     return file_names
 
+
 # Configuration for the client upload
 MEORG_CLIENT = dict(
-    num_threads=1, # Parallel uploads over 4 cores
-    cache_delay=60*5, # 5mins between upload and analysis triggering
+    num_threads=1,  # Parallel uploads over 4 cores
+    cache_delay=60 * 5,  # 5mins between upload and analysis triggering
     mem="8G",
     walltime="01:00:00",
-    storage=["gdata/ks32", "gdata/hh5", "gdata/wd9", "gdata/rp23"]
-)
+    storage=["gdata/ks32", "gdata/hh5", "gdata/wd9", "gdata/rp23"],
+)
diff --git a/src/benchcab/utils/__init__.py b/src/benchcab/utils/__init__.py
@@ -11,7 +11,7 @@
 import sys
 from importlib import resources
 from pathlib import Path
-from typing import Union, Iterable
+from typing import Iterable, Union
 
 import yaml
 from jinja2 import BaseLoader, Environment
@@ -162,9 +162,10 @@ def task_summary(tasks: Iterable) -> tuple:
     -------
     tuple
         num_tasks, num_complete, num_failed, all_complete
+
     """
     num_tasks = len(tasks)
     num_complete = len([task for task in tasks if task.is_done()])
     num_failed = num_tasks - num_complete
-    
-    return num_tasks, num_complete, num_failed, num_complete == num_tasks
+
+    return num_tasks, num_complete, num_failed, num_complete == num_tasks
diff --git a/src/benchcab/utils/meorg.py b/src/benchcab/utils/meorg.py
@@ -1,10 +1,13 @@
 """Utility methods for interacting with the ME.org client."""
-from benchcab.internal import MEORG_CLIENT
-from meorg_client.client import Client as MeorgClient
+
+import os
+
 from hpcpy import get_client
+from meorg_client.client import Client as MeorgClient
+
 import benchcab.utils as bu
-import os
-from glob import glob
+from benchcab.internal import MEORG_CLIENT
+
 
 def do_meorg(config: dict, upload_dir: str, benchcab_bin: str, benchcab_job_id: str):
     """Perform the upload of model outputs to modelevaluation.org
@@ -22,29 +25,29 @@ def do_meorg(config: dict, upload_dir: str, benchcab_bin: str, benchcab_job_id:
     -------
     bool
         True if successful, False otherwise
-    """
 
+    """
     logger = bu.get_logger()
 
-    model_output_id = config.get("fluxsite").get("meorg_model_output_id", False)
+    model_output_id = config["fluxsite"]["meorg_model_output_id"]
     num_threads = MEORG_CLIENT["num_threads"]
-    
+
     # Check if a model output id has been assigned
     if model_output_id == False:
         logger.info("No model_output_id found in fluxsite configuration.")
         logger.info("NOT uploading to modelevaluation.org")
         return False
-    
+
     # Allow the user to specify an absolute path to the meorg bin in config
     meorg_bin = config.get("meorg_bin", False)
-    
+
     # Otherwise infer the path from the benchcab installation
     if meorg_bin == False:
         logger.debug(f"Inferring meorg bin from {benchcab_bin}")
         bin_segments = benchcab_bin.split("/")
         bin_segments[-1] = "meorg"
         meorg_bin = "/".join(bin_segments)
-    
+
     logger.debug(f"meorg_bin = {meorg_bin}")
 
     # Now, check if that actually exists
@@ -56,41 +59,47 @@ def do_meorg(config: dict, upload_dir: str, benchcab_bin: str, benchcab_job_id:
     # Also only run if the client is initialised
     if MeorgClient().is_initialised() == False:
 
-        logger.warn("A model_output_id has been supplied, but the meorg_client is not initialised.")
-        logger.warn("To initialise, run `meorg initialise` in the installation environment.")
-        logger.warn("Once initialised, the outputs from this run can be uploaded with the following command:")
-        logger.warn(f"meorg file upload {upload_dir}/*.nc -n {num_threads} --attach_to {model_output_id}")
+        logger.warn(
+            "A model_output_id has been supplied, but the meorg_client is not initialised."
+        )
+        logger.warn(
+            "To initialise, run `meorg initialise` in the installation environment."
+        )
+        logger.warn(
+            "Once initialised, the outputs from this run can be uploaded with the following command:"
+        )
+        logger.warn(
+            f"meorg file upload {upload_dir}/*.nc -n {num_threads} --attach_to {model_output_id}"
+        )
         logger.warn("Then the analysis can be triggered with:")
         logger.warn(f"meorg analysis start {model_output_id}")
         return False
 
     # Finally, attempt the upload!
     else:
-    
+
         logger.info("Uploading outputs to modelevaluation.org")
 
         # Submit the outputs
         client = get_client()
         meorg_jobid = client.submit(
-
             bu.get_installed_root() / "data" / "meorg_jobscript.j2",
             render=True,
             dry_run=False,
             depends_on=benchcab_job_id,
-
             # Interpolate into the job script
             model_output_id=model_output_id,
             data_dir=upload_dir,
             cache_delay=MEORG_CLIENT["cache_delay"],
             mem=MEORG_CLIENT["mem"],
             num_threads=MEORG_CLIENT["num_threads"],
             walltime=MEORG_CLIENT["walltime"],
-            storage=MEORG_CLIENT['storage'],
-            project=config['project'],
-            modules=config['modules'],
+            storage=MEORG_CLIENT["storage"],
+            project=config["project"],
+            modules=config["modules"],
             purge_outputs=True,
-            meorg_bin=meorg_bin
+            meorg_bin=meorg_bin,
         )
 
         logger.info(f"Upload job submitted: {meorg_jobid}")
-        return True
+        return True
diff --git a/tests/test_comparison.py b/tests/test_comparison.py
@@ -8,6 +8,7 @@
 from pathlib import Path
 
 import pytest
+
 from benchcab import internal
 from benchcab.comparison import ComparisonTask
 

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -75,6 +75,7 @@ def all_optional_default_config(no_optional_config) -> dict:
             "experiment": bi.FLUXSITE_DEFAULT_EXPERIMENT,
             "multiprocess": bi.FLUXSITE_DEFAULT_MULTIPROCESS,
             "pbs": bi.FLUXSITE_DEFAULT_PBS,
+            "meorg_model_output_id": bi.FLUXSITE_DEFAULT_MEORG_MODEL_OUTPUT_ID
         },
         "science_configurations": bi.DEFAULT_SCIENCE_CONFIGURATIONS,
         "spatial": {
@@ -106,6 +107,7 @@ def all_optional_custom_config(no_optional_config) -> dict:
                 "walltime": "10:00:00",
                 "storage": ["scratch/$PROJECT"],
             },
+            "meorg_model_output_id": False
         },
         "science_configurations": [
             {

diff --git a/tests/test_fluxsite.py b/tests/test_fluxsite.py
@@ -10,6 +10,7 @@
 import f90nml
 import netCDF4
 import pytest
+
 from benchcab import __version__, internal
 from benchcab.fluxsite import (
     CableError,

diff --git a/tests/test_fs.py b/tests/test_fs.py
@@ -10,6 +10,7 @@
 from pathlib import Path
 
 import pytest
+
 from benchcab.utils.fs import chdir, mkdir, next_path, prepend_path
 
 

diff --git a/tests/test_spatial.py b/tests/test_spatial.py
@@ -10,6 +10,7 @@
 import f90nml
 import pytest
 import yaml
+
 from benchcab import internal
 from benchcab.model import Model
 from benchcab.spatial import SpatialTask, get_spatial_tasks

diff --git a/tests/test_state.py b/tests/test_state.py
@@ -1,9 +1,10 @@
 import time
 from pathlib import Path
+from tempfile import TemporaryDirectory
 
 import pytest
+
 from benchcab.utils.state import State, StateAttributeError
-from tempfile import TemporaryDirectory
 
 
 def test_state_is_set():
@@ -39,4 +40,4 @@ def test_state_get_raises_exception():
     with TemporaryDirectory() as tmp_dir:
         state = State(state_dir=Path(tmp_dir))
         with pytest.raises(StateAttributeError):
-            state.get()
+            state.get()