From 2f6da74131a0114b9d38ed76c41c42e9a76919f7 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Mon, 18 Dec 2023 11:31:13 +1100 Subject: [PATCH 01/50] Added baseline logging implementation, tests relying on stdout need to be refactored to take advantage of this. #103 --- benchcab/__init__.py | 3 +- benchcab/benchcab.py | 88 +++++++++++++------------- benchcab/utils/__init__.py | 123 +++++++++++++++++++++++++++++++++++- benchcab/utils/repo.py | 9 +-- benchcab/utils/singleton.py | 12 ++++ tests/test_utils.py | 10 ++- 6 files changed, 195 insertions(+), 50 deletions(-) create mode 100644 benchcab/utils/singleton.py diff --git a/benchcab/__init__.py b/benchcab/__init__.py index ef612f0c..8617e75f 100644 --- a/benchcab/__init__.py +++ b/benchcab/__init__.py @@ -2,11 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 import importlib.metadata +from benchcab.utils import get_logger try: __version__ = importlib.metadata.version("benchcab") except importlib.metadata.PackageNotFoundError: __version__ = "" - print("Warning: unable to interrogate version string from installed distribution.") + get_logger().warn('unable to interrogate version string from installed distribution.') # Note: cannot re-raise exception here as this will break pytest # when running without first installing the package diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 6b2a7ead..cf27c599 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -28,6 +28,7 @@ from benchcab.utils.repo import SVNRepo, create_repo from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface from benchcab.workdir import setup_fluxsite_directory_tree +from benchcab.utils import get_logger class Benchcab: @@ -48,35 +49,38 @@ def __init__( self._models: list[Model] = [] self.tasks: list[Task] = [] # initialise fluxsite tasks lazily + # Get the logger object + self.logger = get_logger() + def _validate_environment(self, project: str, modules: list): """Performs checks on current user environment.""" if not self.validate_env: return if "gadi.nci" not in internal.NODENAME: - print("Error: benchcab is currently implemented only on Gadi") + self.logger.error("benchcab is currently implemented only on Gadi") sys.exit(1) namelist_dir = Path(internal.CWD / internal.NAMELIST_DIR) if not namelist_dir.exists(): - print( - "Error: cannot find 'namelists' directory in current working directory" + self.logger.error( + "Cannot find 'namelists' directory in current working directory" ) sys.exit(1) required_groups = [project, "ks32", "hh5"] groups = [grp.getgrgid(gid).gr_name for gid in os.getgroups()] if not set(required_groups).issubset(groups): - print( - "Error: user does not have the required group permissions.", - "The required groups are:", - ", ".join(required_groups), - ) + self.logger.error([ + 'User does not have the required group permissions.', + 'The required groups are:', + ' ,'.join(required_groups) + ]) sys.exit(1) for modname in modules: if not self.modules_handler.module_is_avail(modname): - print(f"Error: module ({modname}) is not available.") + self.logger.error(f"Module ({modname}) is not available.") sys.exit(1) all_site_ids = set( @@ -86,14 +90,14 @@ def _validate_environment(self, project: str, modules: list): for site_id in all_site_ids: paths = list(internal.MET_DIR.glob(f"{site_id}*")) if not paths: - print( - f"Error: failed to infer met file for site id '{site_id}' in " + self.logger.error([ + f"Failed to infer met file for site id '{site_id}' in " f"{internal.MET_DIR}." - ) + ]) sys.exit(1) if len(paths) > 1: - print( - f"Error: multiple paths infered for site id: '{site_id}' in {internal.MET_DIR}." + self.logger.error( + f"Multiple paths infered for site id: '{site_id}' in {internal.MET_DIR}." ) sys.exit(1) @@ -143,10 +147,10 @@ def fluxsite_submit_job( raise RuntimeError(msg) job_script_path = Path(internal.QSUB_FNAME) - print( + self.logger.debug([ "Creating PBS job script to run fluxsite tasks on compute " f"nodes: {job_script_path}" - ) + ]) with job_script_path.open("w", encoding="utf-8") as file: contents = render_job_script( project=config["project"], @@ -166,19 +170,20 @@ def fluxsite_submit_job( verbose=verbose, ) except CalledProcessError as exc: - print("Error when submitting job to NCI queue") - print(exc.output) + self.logger.error("when submitting job to NCI queue, details to follow") + self.logger.error(exc.output) raise - print( - f"PBS job submitted: {proc.stdout.strip()}\n" - "The CABLE log file for each task is written to " - f"{internal.FLUXSITE_DIRS['LOG']}/_log.txt\n" - "The CABLE standard output for each task is written to " - f"{internal.FLUXSITE_DIRS['TASKS']}//out.txt\n" - "The NetCDF output for each task is written to " + self.logger.debug([ + f"PBS job submitted: {proc.stdout.strip()}", + "The CABLE log file for each task is written to", + f"{internal.FLUXSITE_DIRS['LOG']}/_log.txt", + "The CABLE standard output for each task is written to", + f"{internal.FLUXSITE_DIRS['TASKS']}//out.txt", + "The NetCDF output for each task is written to", f"{internal.FLUXSITE_DIRS['OUTPUT']}/_out.nc" - ) + ]) + def checkout(self, config_path: str, verbose: bool): """Endpoint for `benchcab checkout`.""" @@ -187,7 +192,7 @@ def checkout(self, config_path: str, verbose: bool): mkdir(internal.SRC_DIR, exist_ok=True, verbose=True) - print("Checking out repositories...") + self.logger.debug("Checking out repositories...") rev_number_log = "" for model in self._get_models(config): model.repo.checkout(verbose=verbose) @@ -202,11 +207,10 @@ def checkout(self, config_path: str, verbose: bool): cable_aux_repo.checkout(verbose=verbose) rev_number_log_path = next_path("rev_number-*.log") - print(f"Writing revision number info to {rev_number_log_path}") + self.loffer.debug(f"Writing revision number info to {rev_number_log_path}") with rev_number_log_path.open("w", encoding="utf-8") as file: file.write(rev_number_log) - print("") def build(self, config_path: str, verbose: bool): """Endpoint for `benchcab build`.""" @@ -215,19 +219,19 @@ def build(self, config_path: str, verbose: bool): for repo in self._get_models(config): if repo.build_script: - print( + self.logger.debug([ "Compiling CABLE using custom build script for " f"realisation {repo.name}..." - ) + ]) repo.custom_build(modules=config["modules"], verbose=verbose) else: build_mode = "with MPI" if internal.MPI else "serially" - print(f"Compiling CABLE {build_mode} for realisation {repo.name}...") + self.logger.debug(f"Compiling CABLE {build_mode} for realisation {repo.name}...") repo.pre_build(verbose=verbose) repo.run_build(modules=config["modules"], verbose=verbose) repo.post_build(verbose=verbose) - print(f"Successfully compiled CABLE for realisation {repo.name}") - print("") + self.logger.debug(f"Successfully compiled CABLE for realisation {repo.name}") + def fluxsite_setup_work_directory(self, config_path: str, verbose: bool): """Endpoint for `benchcab fluxsite-setup-work-dir`.""" @@ -235,13 +239,12 @@ def fluxsite_setup_work_directory(self, config_path: str, verbose: bool): self._validate_environment(project=config["project"], modules=config["modules"]) tasks = self.tasks if self.tasks else self._initialise_tasks(config) - print("Setting up run directory tree for fluxsite tests...") + self.logger.debug("Setting up run directory tree for fluxsite tests...") setup_fluxsite_directory_tree(verbose=verbose) - print("Setting up tasks...") + self.logger.debug("Setting up tasks...") for task in tasks: task.setup_task(verbose=verbose) - print("Successfully setup fluxsite tasks") - print("") + self.logger.debug("Successfully setup fluxsite tasks") def fluxsite_run_tasks(self, config_path: str, verbose: bool): """Endpoint for `benchcab fluxsite-run-tasks`.""" @@ -249,7 +252,7 @@ def fluxsite_run_tasks(self, config_path: str, verbose: bool): self._validate_environment(project=config["project"], modules=config["modules"]) tasks = self.tasks if self.tasks else self._initialise_tasks(config) - print("Running fluxsite tasks...") + self.logger.debug("Running fluxsite tasks...") try: multiprocess = config["fluxsite"]["multiprocess"] except KeyError: @@ -261,8 +264,7 @@ def fluxsite_run_tasks(self, config_path: str, verbose: bool): run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=verbose) else: run_tasks(tasks, verbose=verbose) - print("Successfully ran fluxsite tasks") - print("") + self.logger.debug("Successfully ran fluxsite tasks") def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool): """Endpoint for `benchcab fluxsite-bitwise-cmp`.""" @@ -277,7 +279,7 @@ def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool): tasks = self.tasks if self.tasks else self._initialise_tasks(config) comparisons = get_fluxsite_comparisons(tasks) - print("Running comparison tasks...") + self.logger.debug("Running comparison tasks...") try: multiprocess = config["fluxsite"]["multiprocess"] except KeyError: @@ -290,7 +292,7 @@ def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool): run_comparisons_in_parallel(comparisons, n_processes=ncpus, verbose=verbose) else: run_comparisons(comparisons, verbose=verbose) - print("Successfully ran comparison tasks") + self.logger.debug("Successfully ran comparison tasks") def fluxsite( self, config_path: str, no_submit: bool, verbose: bool, skip: list[str] diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index 9ac1e955..37b41ff5 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -6,9 +6,12 @@ import json import yaml import os +import sys from importlib import resources from pathlib import Path - +import logging +from benchcab.utils.singleton import Singleton +from typing import Union # List of one-argument decoding functions. PACKAGE_DATA_DECODERS = dict(json=json.loads, yml=yaml.safe_load) @@ -44,3 +47,121 @@ def load_package_data(filename: str) -> dict: # Decode and return. return PACKAGE_DATA_DECODERS[ext](raw) + + +def get_logger(name='benchcab', level='debug'): + """Get a singleton logger object. + + Parameters + ---------- + name : str, optional + Name of the logger, by default 'benchcab' + level : str, optional + Level of logging, by default 'debug' + + Returns + ------- + benchcab.utils.SingletonLogger + Logger instance. + """ + return SingletonLogger(name=name, level=level) + + +class SingletonLogger(logging.Logger, metaclass=Singleton): + + def __init__(self, name : str = 'benchcab', level : str = 'debug'): + + super(SingletonLogger, self).__init__(name=name) + + # Set level + level = getattr(logging, level.upper()) + self.setLevel(level) + + # Create the formatter + log_format = '%(asctime)s - %(levelname)s - %(module)s.%(filename)s:%(lineno)s - %(message)s' + formatter = logging.Formatter(log_format) + + # Create the handler + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter(formatter) + self.addHandler(handler) + + + def _check_multiline(self, msg : Union[str, list, tuple]) -> str: + """Automatically join multiline output. + + Parameters + ---------- + msg : str, list or tuple + Message or message fragments. + + Returns + ------- + str + Original string or fragments joined with \n + """ + + if type(msg) in [list, tuple]: + return '\n'.join([str(m) for m in msg]) + + return msg + + + def debug(self, msg, *args, **kwargs): + """Emit a debug line, with multiline support. + + Parameters + ---------- + msg : str or list + Message or message fragments for additional detail. + """ + msg = self._check_multiline(msg) + super().debug(msg, *args, **kwargs) + + + def info(self, msg, *args, **kwargs): + """Emit a info line, with multiline support. + + Parameters + ---------- + msg : str or list + Message or message fragments for additional detail. + """ + msg = self._check_multiline(msg) + super().info(msg, *args, **kwargs) + + + def warn(self, msg, *args, **kwargs): + """Emit a warn line, with multiline support. + + Parameters + ---------- + msg : str or list + Message or message fragments for additional detail. + """ + msg = self._check_multiline(msg) + super().warn(msg, *args, **kwargs) + + + def error(self, msg, *args, **kwargs): + """Emit a error line, with multiline support. + + Parameters + ---------- + msg : str or list + Message or message fragments for additional detail. + """ + msg = self._check_multiline(msg) + super().error(msg, *args, **kwargs) + + + def critical(self, msg, *args, **kwargs): + """Emit a critical line, with multiline support. + + Parameters + ---------- + msg : str or list + Message or message fragments for additional detail. + """ + msg = self._check_multiline(msg) + super().critical(msg, *args, **kwargs) \ No newline at end of file diff --git a/benchcab/utils/repo.py b/benchcab/utils/repo.py index f2d40a3c..45aa6b7b 100644 --- a/benchcab/utils/repo.py +++ b/benchcab/utils/repo.py @@ -9,6 +9,7 @@ from benchcab import internal from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface +from benchcab.utils import get_logger class Repo(AbstractBaseClass): @@ -83,6 +84,7 @@ def __init__( self.branch = branch self.path = path / branch if path.is_dir() else path self.commit = commit + self.logger = get_logger() def checkout(self, verbose=False): """Checkout the source code. @@ -100,11 +102,10 @@ def checkout(self, verbose=False): verbose=verbose, ) if self.commit: - if verbose: - print(f"Reset to commit {self.commit} (hard reset)") + self.logger.debug(f"Reset to commit {self.commit} (hard reset)") repo = git.Repo(self.path) repo.head.reset(self.commit, working_tree=True) - print(f"Successfully checked out {self.branch} - {self.get_revision()}") + self.logger.debug(f"Successfully checked out {self.branch} - {self.get_revision()}") def get_revision(self) -> str: """Return the latest revision of the source code. @@ -185,7 +186,7 @@ def checkout(self, verbose=False): self.subprocess_handler.run_cmd(cmd, verbose=verbose) - print(f"Successfully checked out {self.path.name} - {self.get_revision()}") + self.logger.debug(f"Successfully checked out {self.path.name} - {self.get_revision()}") def get_revision(self) -> str: """Return the latest revision of the source code. diff --git a/benchcab/utils/singleton.py b/benchcab/utils/singleton.py new file mode 100644 index 00000000..e76d08bf --- /dev/null +++ b/benchcab/utils/singleton.py @@ -0,0 +1,12 @@ +"""Singleton Object.""" + +class Singleton(type): + + _instances = {} + + def __call__(cls, *args, **kwargs): + + if cls not in cls._instances: + cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) + + return cls._instances[cls] \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py index 848161db..f97d93d5 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -20,4 +20,12 @@ def test_load_package_data_fail(): """Test load_package_data() fails as expected.""" with pytest.raises(FileNotFoundError): - missing = bu.load_package_data('config-missing.yml') \ No newline at end of file + missing = bu.load_package_data('config-missing.yml') + + +def test_get_logger_singleton(): + """Test get_logger() returns a singleton object...""" + logger1 = bu.get_logger(name='benchcab') + logger2 = bu.get_logger(name='benchcab') + + assert logger1 is logger2 \ No newline at end of file From 59e56feb22f56b9e87f3afc44cdbec6b46a89be1 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Wed, 20 Dec 2023 11:53:23 +1100 Subject: [PATCH 02/50] Reorganised logger, updated most debugs to infos. Fixes #103 --- benchcab/benchcab.py | 28 +++---- benchcab/utils/__init__.py | 104 +------------------------- benchcab/utils/repo.py | 4 +- benchcab/utils/singleton_logger.py | 115 +++++++++++++++++++++++++++++ 4 files changed, 134 insertions(+), 117 deletions(-) create mode 100644 benchcab/utils/singleton_logger.py diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index cf27c599..34a17119 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -147,7 +147,7 @@ def fluxsite_submit_job( raise RuntimeError(msg) job_script_path = Path(internal.QSUB_FNAME) - self.logger.debug([ + self.logger.info([ "Creating PBS job script to run fluxsite tasks on compute " f"nodes: {job_script_path}" ]) @@ -174,7 +174,7 @@ def fluxsite_submit_job( self.logger.error(exc.output) raise - self.logger.debug([ + self.logger.info([ f"PBS job submitted: {proc.stdout.strip()}", "The CABLE log file for each task is written to", f"{internal.FLUXSITE_DIRS['LOG']}/_log.txt", @@ -192,7 +192,7 @@ def checkout(self, config_path: str, verbose: bool): mkdir(internal.SRC_DIR, exist_ok=True, verbose=True) - self.logger.debug("Checking out repositories...") + self.logger.info("Checking out repositories...") rev_number_log = "" for model in self._get_models(config): model.repo.checkout(verbose=verbose) @@ -207,7 +207,7 @@ def checkout(self, config_path: str, verbose: bool): cable_aux_repo.checkout(verbose=verbose) rev_number_log_path = next_path("rev_number-*.log") - self.loffer.debug(f"Writing revision number info to {rev_number_log_path}") + self.logger.info(f"Writing revision number info to {rev_number_log_path}") with rev_number_log_path.open("w", encoding="utf-8") as file: file.write(rev_number_log) @@ -219,18 +219,18 @@ def build(self, config_path: str, verbose: bool): for repo in self._get_models(config): if repo.build_script: - self.logger.debug([ + self.logger.info([ "Compiling CABLE using custom build script for " f"realisation {repo.name}..." ]) repo.custom_build(modules=config["modules"], verbose=verbose) else: build_mode = "with MPI" if internal.MPI else "serially" - self.logger.debug(f"Compiling CABLE {build_mode} for realisation {repo.name}...") + self.logger.info(f"Compiling CABLE {build_mode} for realisation {repo.name}...") repo.pre_build(verbose=verbose) repo.run_build(modules=config["modules"], verbose=verbose) repo.post_build(verbose=verbose) - self.logger.debug(f"Successfully compiled CABLE for realisation {repo.name}") + self.logger.info(f"Successfully compiled CABLE for realisation {repo.name}") def fluxsite_setup_work_directory(self, config_path: str, verbose: bool): @@ -239,12 +239,12 @@ def fluxsite_setup_work_directory(self, config_path: str, verbose: bool): self._validate_environment(project=config["project"], modules=config["modules"]) tasks = self.tasks if self.tasks else self._initialise_tasks(config) - self.logger.debug("Setting up run directory tree for fluxsite tests...") + self.logger.info("Setting up run directory tree for fluxsite tests...") setup_fluxsite_directory_tree(verbose=verbose) - self.logger.debug("Setting up tasks...") + self.logger.info("Setting up tasks...") for task in tasks: task.setup_task(verbose=verbose) - self.logger.debug("Successfully setup fluxsite tasks") + self.logger.info("Successfully setup fluxsite tasks") def fluxsite_run_tasks(self, config_path: str, verbose: bool): """Endpoint for `benchcab fluxsite-run-tasks`.""" @@ -252,7 +252,7 @@ def fluxsite_run_tasks(self, config_path: str, verbose: bool): self._validate_environment(project=config["project"], modules=config["modules"]) tasks = self.tasks if self.tasks else self._initialise_tasks(config) - self.logger.debug("Running fluxsite tasks...") + self.logger.info("Running fluxsite tasks...") try: multiprocess = config["fluxsite"]["multiprocess"] except KeyError: @@ -264,7 +264,7 @@ def fluxsite_run_tasks(self, config_path: str, verbose: bool): run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=verbose) else: run_tasks(tasks, verbose=verbose) - self.logger.debug("Successfully ran fluxsite tasks") + self.logger.info("Successfully ran fluxsite tasks") def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool): """Endpoint for `benchcab fluxsite-bitwise-cmp`.""" @@ -279,7 +279,7 @@ def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool): tasks = self.tasks if self.tasks else self._initialise_tasks(config) comparisons = get_fluxsite_comparisons(tasks) - self.logger.debug("Running comparison tasks...") + self.logger.info("Running comparison tasks...") try: multiprocess = config["fluxsite"]["multiprocess"] except KeyError: @@ -292,7 +292,7 @@ def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool): run_comparisons_in_parallel(comparisons, n_processes=ncpus, verbose=verbose) else: run_comparisons(comparisons, verbose=verbose) - self.logger.debug("Successfully ran comparison tasks") + self.logger.info("Successfully ran comparison tasks") def fluxsite( self, config_path: str, no_submit: bool, verbose: bool, skip: list[str] diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index 37b41ff5..cf5e354d 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -11,8 +11,10 @@ from pathlib import Path import logging from benchcab.utils.singleton import Singleton +from benchcab.utils.singleton_logger import SingletonLogger from typing import Union + # List of one-argument decoding functions. PACKAGE_DATA_DECODERS = dict(json=json.loads, yml=yaml.safe_load) @@ -64,104 +66,4 @@ def get_logger(name='benchcab', level='debug'): benchcab.utils.SingletonLogger Logger instance. """ - return SingletonLogger(name=name, level=level) - - -class SingletonLogger(logging.Logger, metaclass=Singleton): - - def __init__(self, name : str = 'benchcab', level : str = 'debug'): - - super(SingletonLogger, self).__init__(name=name) - - # Set level - level = getattr(logging, level.upper()) - self.setLevel(level) - - # Create the formatter - log_format = '%(asctime)s - %(levelname)s - %(module)s.%(filename)s:%(lineno)s - %(message)s' - formatter = logging.Formatter(log_format) - - # Create the handler - handler = logging.StreamHandler(sys.stdout) - handler.setFormatter(formatter) - self.addHandler(handler) - - - def _check_multiline(self, msg : Union[str, list, tuple]) -> str: - """Automatically join multiline output. - - Parameters - ---------- - msg : str, list or tuple - Message or message fragments. - - Returns - ------- - str - Original string or fragments joined with \n - """ - - if type(msg) in [list, tuple]: - return '\n'.join([str(m) for m in msg]) - - return msg - - - def debug(self, msg, *args, **kwargs): - """Emit a debug line, with multiline support. - - Parameters - ---------- - msg : str or list - Message or message fragments for additional detail. - """ - msg = self._check_multiline(msg) - super().debug(msg, *args, **kwargs) - - - def info(self, msg, *args, **kwargs): - """Emit a info line, with multiline support. - - Parameters - ---------- - msg : str or list - Message or message fragments for additional detail. - """ - msg = self._check_multiline(msg) - super().info(msg, *args, **kwargs) - - - def warn(self, msg, *args, **kwargs): - """Emit a warn line, with multiline support. - - Parameters - ---------- - msg : str or list - Message or message fragments for additional detail. - """ - msg = self._check_multiline(msg) - super().warn(msg, *args, **kwargs) - - - def error(self, msg, *args, **kwargs): - """Emit a error line, with multiline support. - - Parameters - ---------- - msg : str or list - Message or message fragments for additional detail. - """ - msg = self._check_multiline(msg) - super().error(msg, *args, **kwargs) - - - def critical(self, msg, *args, **kwargs): - """Emit a critical line, with multiline support. - - Parameters - ---------- - msg : str or list - Message or message fragments for additional detail. - """ - msg = self._check_multiline(msg) - super().critical(msg, *args, **kwargs) \ No newline at end of file + return SingletonLogger(name=name, level=level) \ No newline at end of file diff --git a/benchcab/utils/repo.py b/benchcab/utils/repo.py index 45aa6b7b..b583c771 100644 --- a/benchcab/utils/repo.py +++ b/benchcab/utils/repo.py @@ -105,7 +105,7 @@ def checkout(self, verbose=False): self.logger.debug(f"Reset to commit {self.commit} (hard reset)") repo = git.Repo(self.path) repo.head.reset(self.commit, working_tree=True) - self.logger.debug(f"Successfully checked out {self.branch} - {self.get_revision()}") + self.logger.info(f"Successfully checked out {self.branch} - {self.get_revision()}") def get_revision(self) -> str: """Return the latest revision of the source code. @@ -186,7 +186,7 @@ def checkout(self, verbose=False): self.subprocess_handler.run_cmd(cmd, verbose=verbose) - self.logger.debug(f"Successfully checked out {self.path.name} - {self.get_revision()}") + self.logger.info(f"Successfully checked out {self.path.name} - {self.get_revision()}") def get_revision(self) -> str: """Return the latest revision of the source code. diff --git a/benchcab/utils/singleton_logger.py b/benchcab/utils/singleton_logger.py new file mode 100644 index 00000000..6c1e715a --- /dev/null +++ b/benchcab/utils/singleton_logger.py @@ -0,0 +1,115 @@ +"""Singleton Logging Object.""" +import sys +import logging +from typing import Union +from benchcab.utils.singleton import Singleton + + +class SingletonLogger(logging.Logger, metaclass=Singleton): + + def __init__(self, name : str = 'benchcab', level : str = 'debug'): + """Singleton logging instance. + + Parameters + ---------- + name : str, optional + Name of the logger, by default 'benchcab' (this ensures singleton-ness) + level : str, optional + Log level, by default 'debug' + """ + + # Initialise the logger + super(SingletonLogger, self).__init__(name=name) + + # Set level + level = getattr(logging, level.upper()) + self.setLevel(level) + + # Create the formatter + log_format = '%(asctime)s - %(levelname)s - %(module)s.%(filename)s:%(lineno)s - %(message)s' + formatter = logging.Formatter(log_format) + + # Create/set the handler to point to stdout + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter(formatter) + self.addHandler(handler) + + + def _check_multiline(self, msg : Union[str, list, tuple]) -> str: + """Automatically join multiline output. + + Parameters + ---------- + msg : str, list or tuple + Message or message fragments. + + Returns + ------- + str + Original string or fragments joined with \n + """ + + if type(msg) in [list, tuple]: + return '\n'.join([str(m) for m in msg]) + + return msg + + + def debug(self, msg, *args, **kwargs): + """Emit a debug line, with multiline support. + + Parameters + ---------- + msg : str or list + Message or message fragments for additional detail. + """ + msg = self._check_multiline(msg) + super().debug(msg, *args, **kwargs) + + + def info(self, msg, *args, **kwargs): + """Emit a info line, with multiline support. + + Parameters + ---------- + msg : str or list + Message or message fragments for additional detail. + """ + msg = self._check_multiline(msg) + super().info(msg, *args, **kwargs) + + + def warn(self, msg, *args, **kwargs): + """Emit a warn line, with multiline support. + + Parameters + ---------- + msg : str or list + Message or message fragments for additional detail. + """ + msg = self._check_multiline(msg) + super().warn(msg, *args, **kwargs) + + + def error(self, msg, *args, **kwargs): + """Emit a error line, with multiline support. + + Parameters + ---------- + msg : str or list + Message or message fragments for additional detail. + """ + msg = self._check_multiline(msg) + super().error(msg, *args, **kwargs) + + + def critical(self, msg, *args, **kwargs): + """Emit a critical line, with multiline support. + + Parameters + ---------- + msg : str or list + Message or message fragments for additional detail. + """ + msg = self._check_multiline(msg) + super().critical(msg, *args, **kwargs) \ No newline at end of file From 43deac85d620b4d10c7b4365c5ac7d0892d55c00 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Wed, 20 Dec 2023 12:01:35 +1100 Subject: [PATCH 03/50] Added logger instantiation to CLI. Fixes #103 --- benchcab/main.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/benchcab/main.py b/benchcab/main.py index 3962b6b6..57576a62 100644 --- a/benchcab/main.py +++ b/benchcab/main.py @@ -5,6 +5,7 @@ from benchcab.benchcab import Benchcab from benchcab.cli import generate_parser +from benchcab.utils import get_logger def parse_and_dispatch(parser): @@ -16,6 +17,13 @@ def parse_and_dispatch(parser): Parser object. """ args = vars(parser.parse_args(sys.argv[1:] if sys.argv[1:] else ["-h"])) + + # Intercept the verbosity flag to engage the logger + log_level = 'debug' if args.get('verbose', False) == True else 'info' + + # We just need to instantiate this with the desired level + logger = get_logger(level=log_level) + func = args.pop("func") func(**args) From 63b41f8f3a376cfd46fb536507db82671cd9df15 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Thu, 21 Dec 2023 11:48:05 +1100 Subject: [PATCH 04/50] Automatic application of ruff/black suggestions. Manual edits to follow. #103 --- benchcab/__init__.py | 5 ++- benchcab/benchcab.py | 71 +++++++++++++++++------------- benchcab/config.py | 5 +-- benchcab/main.py | 2 +- benchcab/utils/__init__.py | 15 +++---- benchcab/utils/fs.py | 2 - benchcab/utils/repo.py | 10 +++-- benchcab/utils/singleton.py | 9 ++-- benchcab/utils/singleton_logger.py | 26 ++++------- setup.py | 4 +- tests/test_config.py | 13 +++--- tests/test_utils.py | 14 +++--- 12 files changed, 87 insertions(+), 89 deletions(-) diff --git a/benchcab/__init__.py b/benchcab/__init__.py index 8617e75f..3de2bfe5 100644 --- a/benchcab/__init__.py +++ b/benchcab/__init__.py @@ -2,12 +2,15 @@ # SPDX-License-Identifier: Apache-2.0 import importlib.metadata + from benchcab.utils import get_logger try: __version__ = importlib.metadata.version("benchcab") except importlib.metadata.PackageNotFoundError: __version__ = "" - get_logger().warn('unable to interrogate version string from installed distribution.') + get_logger().warn( + "unable to interrogate version string from installed distribution." + ) # Note: cannot re-raise exception here as this will break pytest # when running without first installing the package diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 34a17119..cae441a7 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -23,12 +23,12 @@ ) from benchcab.internal import get_met_forcing_file_names from benchcab.model import Model +from benchcab.utils import get_logger from benchcab.utils.fs import mkdir, next_path from benchcab.utils.pbs import render_job_script from benchcab.utils.repo import SVNRepo, create_repo from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface from benchcab.workdir import setup_fluxsite_directory_tree -from benchcab.utils import get_logger class Benchcab: @@ -71,11 +71,13 @@ def _validate_environment(self, project: str, modules: list): required_groups = [project, "ks32", "hh5"] groups = [grp.getgrgid(gid).gr_name for gid in os.getgroups()] if not set(required_groups).issubset(groups): - self.logger.error([ - 'User does not have the required group permissions.', - 'The required groups are:', - ' ,'.join(required_groups) - ]) + self.logger.error( + [ + "User does not have the required group permissions.", + "The required groups are:", + " ,".join(required_groups), + ] + ) sys.exit(1) for modname in modules: @@ -90,10 +92,12 @@ def _validate_environment(self, project: str, modules: list): for site_id in all_site_ids: paths = list(internal.MET_DIR.glob(f"{site_id}*")) if not paths: - self.logger.error([ - f"Failed to infer met file for site id '{site_id}' in " - f"{internal.MET_DIR}." - ]) + self.logger.error( + [ + f"Failed to infer met file for site id '{site_id}' in " + f"{internal.MET_DIR}." + ] + ) sys.exit(1) if len(paths) > 1: self.logger.error( @@ -147,10 +151,12 @@ def fluxsite_submit_job( raise RuntimeError(msg) job_script_path = Path(internal.QSUB_FNAME) - self.logger.info([ - "Creating PBS job script to run fluxsite tasks on compute " - f"nodes: {job_script_path}" - ]) + self.logger.info( + [ + "Creating PBS job script to run fluxsite tasks on compute " + f"nodes: {job_script_path}" + ] + ) with job_script_path.open("w", encoding="utf-8") as file: contents = render_job_script( project=config["project"], @@ -174,16 +180,17 @@ def fluxsite_submit_job( self.logger.error(exc.output) raise - self.logger.info([ - f"PBS job submitted: {proc.stdout.strip()}", - "The CABLE log file for each task is written to", - f"{internal.FLUXSITE_DIRS['LOG']}/_log.txt", - "The CABLE standard output for each task is written to", - f"{internal.FLUXSITE_DIRS['TASKS']}//out.txt", - "The NetCDF output for each task is written to", - f"{internal.FLUXSITE_DIRS['OUTPUT']}/_out.nc" - ]) - + self.logger.info( + [ + f"PBS job submitted: {proc.stdout.strip()}", + "The CABLE log file for each task is written to", + f"{internal.FLUXSITE_DIRS['LOG']}/_log.txt", + "The CABLE standard output for each task is written to", + f"{internal.FLUXSITE_DIRS['TASKS']}//out.txt", + "The NetCDF output for each task is written to", + f"{internal.FLUXSITE_DIRS['OUTPUT']}/_out.nc", + ] + ) def checkout(self, config_path: str, verbose: bool): """Endpoint for `benchcab checkout`.""" @@ -211,7 +218,6 @@ def checkout(self, config_path: str, verbose: bool): with rev_number_log_path.open("w", encoding="utf-8") as file: file.write(rev_number_log) - def build(self, config_path: str, verbose: bool): """Endpoint for `benchcab build`.""" config = self._get_config(config_path) @@ -219,20 +225,23 @@ def build(self, config_path: str, verbose: bool): for repo in self._get_models(config): if repo.build_script: - self.logger.info([ - "Compiling CABLE using custom build script for " - f"realisation {repo.name}..." - ]) + self.logger.info( + [ + "Compiling CABLE using custom build script for " + f"realisation {repo.name}..." + ] + ) repo.custom_build(modules=config["modules"], verbose=verbose) else: build_mode = "with MPI" if internal.MPI else "serially" - self.logger.info(f"Compiling CABLE {build_mode} for realisation {repo.name}...") + self.logger.info( + f"Compiling CABLE {build_mode} for realisation {repo.name}..." + ) repo.pre_build(verbose=verbose) repo.run_build(modules=config["modules"], verbose=verbose) repo.post_build(verbose=verbose) self.logger.info(f"Successfully compiled CABLE for realisation {repo.name}") - def fluxsite_setup_work_directory(self, config_path: str, verbose: bool): """Endpoint for `benchcab fluxsite-setup-work-dir`.""" config = self._get_config(config_path) diff --git a/benchcab/config.py b/benchcab/config.py index f2daa281..01757d8d 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -5,8 +5,8 @@ from pathlib import Path import yaml -from benchcab import internal from cerberus import Validator + import benchcab.utils as bu @@ -19,7 +19,6 @@ def __init__(self, validator: Validator): validator: cerberus.Validator A validation object that has been used and has the errors attribute. """ - # Nicely format the errors. errors = [f"{k} = {v}" for k, v in validator.errors.items()] @@ -49,7 +48,6 @@ def validate_config(config: dict) -> bool: ConfigValidationException Raised when the configuration file fails validation. """ - # Load the schema schema = bu.load_package_data("config-schema.yml") @@ -85,7 +83,6 @@ def read_config(config_path: str) -> dict: ConfigValidationError Raised when the configuration file fails validation. """ - # Load the configuration file. with open(Path(config_path), "r", encoding="utf-8") as file: config = yaml.safe_load(file) diff --git a/benchcab/main.py b/benchcab/main.py index 57576a62..f40956fe 100644 --- a/benchcab/main.py +++ b/benchcab/main.py @@ -19,7 +19,7 @@ def parse_and_dispatch(parser): args = vars(parser.parse_args(sys.argv[1:] if sys.argv[1:] else ["-h"])) # Intercept the verbosity flag to engage the logger - log_level = 'debug' if args.get('verbose', False) == True else 'info' + log_level = "debug" if args.get("verbose", False) == True else "info" # We just need to instantiate this with the desired level logger = get_logger(level=log_level) diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index cf5e354d..306478c6 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -2,18 +2,15 @@ # SPDX-License-Identifier: Apache-2.0 """Top-level utilities.""" -import pkgutil import json -import yaml import os -import sys +import pkgutil from importlib import resources from pathlib import Path -import logging -from benchcab.utils.singleton import Singleton -from benchcab.utils.singleton_logger import SingletonLogger -from typing import Union +import yaml + +from benchcab.utils.singleton_logger import SingletonLogger # List of one-argument decoding functions. PACKAGE_DATA_DECODERS = dict(json=json.loads, yml=yaml.safe_load) @@ -51,7 +48,7 @@ def load_package_data(filename: str) -> dict: return PACKAGE_DATA_DECODERS[ext](raw) -def get_logger(name='benchcab', level='debug'): +def get_logger(name="benchcab", level="debug"): """Get a singleton logger object. Parameters @@ -66,4 +63,4 @@ def get_logger(name='benchcab', level='debug'): benchcab.utils.SingletonLogger Logger instance. """ - return SingletonLogger(name=name, level=level) \ No newline at end of file + return SingletonLogger(name=name, level=level) diff --git a/benchcab/utils/fs.py b/benchcab/utils/fs.py index 13726835..d4c19c49 100644 --- a/benchcab/utils/fs.py +++ b/benchcab/utils/fs.py @@ -44,7 +44,6 @@ def next_path(path_pattern: str, path: Path = Path(), sep: str = "-") -> Path: file-2.txt file-3.txt """ - loc_pattern = Path(path_pattern) new_file_index = 1 common_filename, _ = loc_pattern.stem.split(sep) @@ -69,7 +68,6 @@ def mkdir(new_path: Path, verbose=False, **kwargs): **kwargs : dict, optional Additional options for `pathlib.Path.mkdir()` """ - if verbose: print(f"Creating {new_path} directory") new_path.mkdir(**kwargs) diff --git a/benchcab/utils/repo.py b/benchcab/utils/repo.py index b583c771..bffbb8df 100644 --- a/benchcab/utils/repo.py +++ b/benchcab/utils/repo.py @@ -8,8 +8,8 @@ import git from benchcab import internal -from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface from benchcab.utils import get_logger +from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface class Repo(AbstractBaseClass): @@ -105,7 +105,9 @@ def checkout(self, verbose=False): self.logger.debug(f"Reset to commit {self.commit} (hard reset)") repo = git.Repo(self.path) repo.head.reset(self.commit, working_tree=True) - self.logger.info(f"Successfully checked out {self.branch} - {self.get_revision()}") + self.logger.info( + f"Successfully checked out {self.branch} - {self.get_revision()}" + ) def get_revision(self) -> str: """Return the latest revision of the source code. @@ -186,7 +188,9 @@ def checkout(self, verbose=False): self.subprocess_handler.run_cmd(cmd, verbose=verbose) - self.logger.info(f"Successfully checked out {self.path.name} - {self.get_revision()}") + self.logger.info( + f"Successfully checked out {self.path.name} - {self.get_revision()}" + ) def get_revision(self) -> str: """Return the latest revision of the source code. diff --git a/benchcab/utils/singleton.py b/benchcab/utils/singleton.py index e76d08bf..ce2960b0 100644 --- a/benchcab/utils/singleton.py +++ b/benchcab/utils/singleton.py @@ -1,12 +1,11 @@ """Singleton Object.""" -class Singleton(type): +class Singleton(type): _instances = {} - + def __call__(cls, *args, **kwargs): - if cls not in cls._instances: cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) - - return cls._instances[cls] \ No newline at end of file + + return cls._instances[cls] diff --git a/benchcab/utils/singleton_logger.py b/benchcab/utils/singleton_logger.py index 6c1e715a..fca1ec11 100644 --- a/benchcab/utils/singleton_logger.py +++ b/benchcab/utils/singleton_logger.py @@ -1,13 +1,13 @@ """Singleton Logging Object.""" -import sys import logging +import sys from typing import Union + from benchcab.utils.singleton import Singleton class SingletonLogger(logging.Logger, metaclass=Singleton): - - def __init__(self, name : str = 'benchcab', level : str = 'debug'): + def __init__(self, name: str = "benchcab", level: str = "debug"): """Singleton logging instance. Parameters @@ -17,25 +17,23 @@ def __init__(self, name : str = 'benchcab', level : str = 'debug'): level : str, optional Log level, by default 'debug' """ - # Initialise the logger super(SingletonLogger, self).__init__(name=name) # Set level level = getattr(logging, level.upper()) self.setLevel(level) - + # Create the formatter - log_format = '%(asctime)s - %(levelname)s - %(module)s.%(filename)s:%(lineno)s - %(message)s' + log_format = "%(asctime)s - %(levelname)s - %(module)s.%(filename)s:%(lineno)s - %(message)s" formatter = logging.Formatter(log_format) # Create/set the handler to point to stdout handler = logging.StreamHandler(sys.stdout) handler.setFormatter(formatter) self.addHandler(handler) - - def _check_multiline(self, msg : Union[str, list, tuple]) -> str: + def _check_multiline(self, msg: Union[str, list, tuple]) -> str: """Automatically join multiline output. Parameters @@ -48,13 +46,11 @@ def _check_multiline(self, msg : Union[str, list, tuple]) -> str: str Original string or fragments joined with \n """ - if type(msg) in [list, tuple]: - return '\n'.join([str(m) for m in msg]) - + return "\n".join([str(m) for m in msg]) + return msg - def debug(self, msg, *args, **kwargs): """Emit a debug line, with multiline support. @@ -66,7 +62,6 @@ def debug(self, msg, *args, **kwargs): msg = self._check_multiline(msg) super().debug(msg, *args, **kwargs) - def info(self, msg, *args, **kwargs): """Emit a info line, with multiline support. @@ -78,7 +73,6 @@ def info(self, msg, *args, **kwargs): msg = self._check_multiline(msg) super().info(msg, *args, **kwargs) - def warn(self, msg, *args, **kwargs): """Emit a warn line, with multiline support. @@ -90,7 +84,6 @@ def warn(self, msg, *args, **kwargs): msg = self._check_multiline(msg) super().warn(msg, *args, **kwargs) - def error(self, msg, *args, **kwargs): """Emit a error line, with multiline support. @@ -102,7 +95,6 @@ def error(self, msg, *args, **kwargs): msg = self._check_multiline(msg) super().error(msg, *args, **kwargs) - def critical(self, msg, *args, **kwargs): """Emit a critical line, with multiline support. @@ -112,4 +104,4 @@ def critical(self, msg, *args, **kwargs): Message or message fragments for additional detail. """ msg = self._check_multiline(msg) - super().critical(msg, *args, **kwargs) \ No newline at end of file + super().critical(msg, *args, **kwargs) diff --git a/setup.py b/setup.py index 84088388..d5f86ab3 100644 --- a/setup.py +++ b/setup.py @@ -3,6 +3,6 @@ from setuptools import setup setup( - setup_requires=['setuptools', 'pbr'], + setup_requires=["setuptools", "pbr"], pbr=True, -) \ No newline at end of file +) diff --git a/tests/test_config.py b/tests/test_config.py index b6e0d8a8..8b53fe7d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,13 +1,14 @@ """`pytest` tests for config.py""" import pytest -import benchcab.utils as bu + import benchcab.config as bc +import benchcab.utils as bu def test_read_config_pass(): """Test read_config() passes as expected.""" - existent_path = bu.get_installed_root() / 'data' / 'test' / 'config-valid.yml' - + existent_path = bu.get_installed_root() / "data" / "test" / "config-valid.yml" + # Test for a path that exists config = bc.read_config(existent_path) assert config @@ -15,7 +16,7 @@ def test_read_config_pass(): def test_read_config_fail(): """Test that read_config() fails as expected.""" - nonexistent_path = bu.get_installed_root() / 'data' / 'test' / 'config-missing.yml' + nonexistent_path = bu.get_installed_root() / "data" / "test" / "config-missing.yml" # Test for a path that does not exist. with pytest.raises(FileNotFoundError): @@ -24,12 +25,12 @@ def test_read_config_fail(): def test_validate_config_valid(): """Test validate_config() for a valid config file.""" - valid_config = bu.load_package_data('test/config-valid.yml') + valid_config = bu.load_package_data("test/config-valid.yml") assert bc.validate_config(valid_config) def test_validate_config_invalid(): """Test validate_config() for an invalid config file.""" - invalid_config = bu.load_package_data('test/config-invalid.yml') + invalid_config = bu.load_package_data("test/config-invalid.yml") with pytest.raises(bc.ConfigValidationException): bc.validate_config(invalid_config) diff --git a/tests/test_utils.py b/tests/test_utils.py index f97d93d5..ad39db03 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,31 +1,29 @@ """Tests for utilities.""" import pytest + import benchcab.utils as bu def test_get_installed_root(): """Test get_installed_root().""" - # Test it actually returns something. We should be able to mock this. assert bu.get_installed_root() def test_load_package_data_pass(): """Test load_package_data() passes as expected.""" - - assert isinstance(bu.load_package_data('config-schema.yml'), dict) + assert isinstance(bu.load_package_data("config-schema.yml"), dict) def test_load_package_data_fail(): """Test load_package_data() fails as expected.""" - with pytest.raises(FileNotFoundError): - missing = bu.load_package_data('config-missing.yml') + missing = bu.load_package_data("config-missing.yml") def test_get_logger_singleton(): """Test get_logger() returns a singleton object...""" - logger1 = bu.get_logger(name='benchcab') - logger2 = bu.get_logger(name='benchcab') + logger1 = bu.get_logger(name="benchcab") + logger2 = bu.get_logger(name="benchcab") - assert logger1 is logger2 \ No newline at end of file + assert logger1 is logger2 From 023c4d4fa0f6d31f8086c655e406b5df10e500d1 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Thu, 21 Dec 2023 15:16:43 +1100 Subject: [PATCH 05/50] Manual ruff modifications. #103 --- benchcab/benchcab.py | 12 +++++++++--- benchcab/comparison.py | 9 +++++++++ benchcab/config.py | 10 ++++++---- benchcab/environment_modules.py | 28 ++++++++++++++++++++++++++++ benchcab/fluxsite.py | 27 +++++++++++++++++++++++++++ benchcab/main.py | 4 ++-- benchcab/model.py | 17 +++++++++++++++++ benchcab/utils/__init__.py | 2 +- benchcab/utils/fs.py | 4 +++- benchcab/utils/singleton.py | 9 +++++++++ benchcab/utils/singleton_logger.py | 24 +++++++++++++++++++++++- benchcab/utils/subprocess.py | 20 ++++++++++++++++++++ ruff.toml | 3 +++ tests/test_config.py | 6 +++--- tests/test_fluxsite.py | 2 +- tests/test_utils.py | 2 +- 16 files changed, 162 insertions(+), 17 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index cae441a7..d02b94fd 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -1,6 +1,3 @@ -# Copyright 2022 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. -# SPDX-License-Identifier: Apache-2.0 - """Contains the benchcab application class.""" import grp @@ -42,6 +39,15 @@ def __init__( benchcab_exe_path: Optional[Path], validate_env: bool = True, ) -> None: + """Constructor. + + Parameters + ---------- + benchcab_exe_path : Optional[Path] + Path to the executable. + validate_env : bool, optional + Validate the environment, by default True + """ self.benchcab_exe_path = benchcab_exe_path self.validate_env = validate_env diff --git a/benchcab/comparison.py b/benchcab/comparison.py index e3f1200d..1af9ca9c 100644 --- a/benchcab/comparison.py +++ b/benchcab/comparison.py @@ -23,6 +23,15 @@ def __init__( files: tuple[Path, Path], task_name: str, ) -> None: + """Constructor. + + Parameters + ---------- + files : tuple[Path, Path] + Files. + task_name : str + Name of the task. + """ self.files = files self.task_name = task_name diff --git a/benchcab/config.py b/benchcab/config.py index 01757d8d..37079bd7 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -10,7 +10,9 @@ import benchcab.utils as bu -class ConfigValidationException(Exception): +class ConfigValidationError(Exception): + """Config validation error class.""" + def __init__(self, validator: Validator): """Config validation exception. @@ -45,7 +47,7 @@ def validate_config(config: dict) -> bool: Raises ------ - ConfigValidationException + ConfigValidationError Raised when the configuration file fails validation. """ # Load the schema @@ -62,7 +64,7 @@ def validate_config(config: dict) -> bool: return True # Invalid - raise ConfigValidationException(v) + raise ConfigValidationError(v) def read_config(config_path: str) -> dict: @@ -84,7 +86,7 @@ def read_config(config_path: str) -> dict: Raised when the configuration file fails validation. """ # Load the configuration file. - with open(Path(config_path), "r", encoding="utf-8") as file: + with Path.open(Path(config_path), "r", encoding="utf-8") as file: config = yaml.safe_load(file) # Validate and return. diff --git a/benchcab/environment_modules.py b/benchcab/environment_modules.py index 9af465b6..c0a91ece 100644 --- a/benchcab/environment_modules.py +++ b/benchcab/environment_modules.py @@ -65,15 +65,43 @@ class EnvironmentModules(EnvironmentModulesInterface): """A concrete implementation of the `EnvironmentModulesInterface` abstract class.""" def module_is_avail(self, *args: str) -> bool: + """Check if module is available. + + Returns + ------- + bool + True if available, False otherwise. + """ return module("is-avail", *args) def module_is_loaded(self, *args: str) -> bool: + """Check if module is loaded. + + Returns + ------- + bool + True if loaded, False otherwise. + """ return module("is-loaded", *args) def module_load(self, *args: str) -> None: + """Load a module. + + Raises + ------ + EnvironmentModulesError + Raised when module fails to load. + """ if not module("load", *args): raise EnvironmentModulesError("Failed to load modules: " + " ".join(args)) def module_unload(self, *args: str) -> None: + """Unload a module. + + Raises + ------ + EnvironmentModulesError + Raised when module fails to unload. + """ if not module("unload", *args): raise EnvironmentModulesError("Failed to unload modules: " + " ".join(args)) diff --git a/benchcab/fluxsite.py b/benchcab/fluxsite.py index f455a2b0..a37265f7 100644 --- a/benchcab/fluxsite.py +++ b/benchcab/fluxsite.py @@ -30,6 +30,20 @@ def deep_update(mapping: Dict[KeyType, Any], *updating_mappings: Dict[KeyType, Any]) -> Dict[KeyType, Any]: + """Perform a deep update of a mapping. + + Parameters + ---------- + mapping : Dict[KeyType, Any] + Mapping. + *updating_mappings : Dict[KeyType, Any] + Mapping updates. + + Returns + ------- + Dict[KeyType, Any] + Updated mapping. + """ updated_mapping = mapping.copy() for updating_mapping in updating_mappings: for k, v in updating_mapping.items(): @@ -102,6 +116,19 @@ def __init__( sci_conf_id: int, sci_config: dict, ) -> None: + """Constructor. + + Parameters + ---------- + model : Model + Model. + met_forcing_file : str + Met forcinf file. + sci_conf_id : int + Science configuration ID. + sci_config : dict + Science configuration. + """ self.model = model self.met_forcing_file = met_forcing_file self.sci_conf_id = sci_conf_id diff --git a/benchcab/main.py b/benchcab/main.py index f40956fe..762ee0ee 100644 --- a/benchcab/main.py +++ b/benchcab/main.py @@ -19,10 +19,10 @@ def parse_and_dispatch(parser): args = vars(parser.parse_args(sys.argv[1:] if sys.argv[1:] else ["-h"])) # Intercept the verbosity flag to engage the logger - log_level = "debug" if args.get("verbose", False) == True else "info" + log_level = "debug" if args.get("verbose", False) is True else "info" # We just need to instantiate this with the desired level - logger = get_logger(level=log_level) + get_logger(level=log_level) func = args.pop("func") func(**args) diff --git a/benchcab/model.py b/benchcab/model.py index 473dca0d..e4ea88c0 100644 --- a/benchcab/model.py +++ b/benchcab/model.py @@ -32,6 +32,23 @@ def __init__( build_script: Optional[str] = None, model_id: Optional[int] = None, ) -> None: + """Constructor. + + Parameters + ---------- + repo : Repo + Respository. + name : Optional[str], optional + Name, by default None + patch : Optional[dict], optional + Patch, by default None + patch_remove : Optional[dict], optional + Patch remove, by default None + build_script : Optional[str], optional + Build script, by default None + model_id : Optional[int], optional + Model ID, by default None + """ self.repo = repo self.name = name if name else repo.get_branch_name() self.patch = patch diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index 306478c6..2e6efd0c 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -42,7 +42,7 @@ def load_package_data(filename: str) -> dict: ext = ext if ext != "yaml" else "yml" # Extract from the installations data directory. - raw = pkgutil.get_data("benchcab", os.path.join("data", filename)).decode("utf-8") + raw = pkgutil.get_data("benchcab", os.path.join("data", filename)).decode("utf-8") # noqa: PTH118 # Decode and return. return PACKAGE_DATA_DECODERS[ext](raw) diff --git a/benchcab/utils/fs.py b/benchcab/utils/fs.py index d4c19c49..61323352 100644 --- a/benchcab/utils/fs.py +++ b/benchcab/utils/fs.py @@ -35,7 +35,9 @@ def copy2(src: Path, dest: Path, verbose=False): def next_path(path_pattern: str, path: Path = Path(), sep: str = "-") -> Path: - """Finds the next free path in a sequentially named list of + """Find the next free path. + + Finds the next free path in a sequentially named list of files with the following pattern in the `path` directory: path_pattern = 'file{sep}*.suf': diff --git a/benchcab/utils/singleton.py b/benchcab/utils/singleton.py index ce2960b0..c3318337 100644 --- a/benchcab/utils/singleton.py +++ b/benchcab/utils/singleton.py @@ -2,9 +2,18 @@ class Singleton(type): + """Singleton base (meta) class.""" + _instances = {} def __call__(cls, *args, **kwargs): + """Create the object on first call, return otherwise. + + Returns + ------- + object + The object that metaclasses this base class. + """ if cls not in cls._instances: cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) diff --git a/benchcab/utils/singleton_logger.py b/benchcab/utils/singleton_logger.py index fca1ec11..ed7ad01f 100644 --- a/benchcab/utils/singleton_logger.py +++ b/benchcab/utils/singleton_logger.py @@ -7,6 +7,8 @@ class SingletonLogger(logging.Logger, metaclass=Singleton): + """A singleton logging interface.""" + def __init__(self, name: str = "benchcab", level: str = "debug"): """Singleton logging instance. @@ -44,7 +46,7 @@ def _check_multiline(self, msg: Union[str, list, tuple]) -> str: Returns ------- str - Original string or fragments joined with \n + Original string or fragments joined with newlines. """ if type(msg) in [list, tuple]: return "\n".join([str(m) for m in msg]) @@ -58,6 +60,10 @@ def debug(self, msg, *args, **kwargs): ---------- msg : str or list Message or message fragments for additional detail. + *args : + Passed to super().debug() + **kwargs : + Passed to super().debug() """ msg = self._check_multiline(msg) super().debug(msg, *args, **kwargs) @@ -69,6 +75,10 @@ def info(self, msg, *args, **kwargs): ---------- msg : str or list Message or message fragments for additional detail. + *args : + Passed to super().debug() + **kwargs : + Passed to super().debug() """ msg = self._check_multiline(msg) super().info(msg, *args, **kwargs) @@ -80,6 +90,10 @@ def warn(self, msg, *args, **kwargs): ---------- msg : str or list Message or message fragments for additional detail. + *args : + Passed to super().debug() + **kwargs : + Passed to super().debug() """ msg = self._check_multiline(msg) super().warn(msg, *args, **kwargs) @@ -91,6 +105,10 @@ def error(self, msg, *args, **kwargs): ---------- msg : str or list Message or message fragments for additional detail. + *args : + Passed to super().debug() + **kwargs : + Passed to super().debug() """ msg = self._check_multiline(msg) super().error(msg, *args, **kwargs) @@ -102,6 +120,10 @@ def critical(self, msg, *args, **kwargs): ---------- msg : str or list Message or message fragments for additional detail. + *args : + Passed to super().debug() + **kwargs : + Passed to super().debug() """ msg = self._check_multiline(msg) super().critical(msg, *args, **kwargs) diff --git a/benchcab/utils/subprocess.py b/benchcab/utils/subprocess.py index 7feeda6c..fe7e250b 100644 --- a/benchcab/utils/subprocess.py +++ b/benchcab/utils/subprocess.py @@ -41,6 +41,26 @@ def run_cmd( verbose: bool = False, env: Optional[dict] = None, ) -> subprocess.CompletedProcess: + """Constructor. + + Parameters + ---------- + cmd : str + Command to run. + capture_output : bool, optional + Capture the output, by default False + output_file : Optional[pathlib.Path], optional + Output file, by default None + verbose : bool, optional + Verbose output, by default False + env : Optional[dict], optional + Environment vars to pass, by default None + + Returns + ------- + subprocess.CompletedProcess + _description_ + """ kwargs: Any = {} with contextlib.ExitStack() as stack: if capture_output: diff --git a/ruff.toml b/ruff.toml index 64ca71c7..733b6572 100644 --- a/ruff.toml +++ b/ruff.toml @@ -3,4 +3,7 @@ select = ["F", "E", "I", "PL", "PTH", "PT", "N", "EM", "D"] ignore = [ "E501", # https://docs.astral.sh/ruff/rules/line-too-long/ "D401", # https://docs.astral.sh/ruff/rules/non-imperative-mood/ + "D104", # D104 Missing docstring in public package + "PLR0913", # PLR0913 Too many arguments in function definition + "D100", # D100 Missing docstring in public module ] \ No newline at end of file diff --git a/tests/test_config.py b/tests/test_config.py index 8b53fe7d..6f2d3d6a 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,4 +1,4 @@ -"""`pytest` tests for config.py""" +"""`pytest` tests for config.py.""" import pytest import benchcab.config as bc @@ -20,7 +20,7 @@ def test_read_config_fail(): # Test for a path that does not exist. with pytest.raises(FileNotFoundError): - config = bc.read_config(nonexistent_path) + bc.read_config(nonexistent_path) def test_validate_config_valid(): @@ -32,5 +32,5 @@ def test_validate_config_valid(): def test_validate_config_invalid(): """Test validate_config() for an invalid config file.""" invalid_config = bu.load_package_data("test/config-invalid.yml") - with pytest.raises(bc.ConfigValidationException): + with pytest.raises(bc.ConfigValidationError): bc.validate_config(invalid_config) diff --git a/tests/test_fluxsite.py b/tests/test_fluxsite.py index e186641d..7c12acaf 100644 --- a/tests/test_fluxsite.py +++ b/tests/test_fluxsite.py @@ -29,7 +29,7 @@ @pytest.fixture() -def mock_repo(): +def mock_repo(): #noqa: D103 class MockRepo(Repo): def __init__(self) -> None: self.branch = "test-branch" diff --git a/tests/test_utils.py b/tests/test_utils.py index ad39db03..61514e4a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -18,7 +18,7 @@ def test_load_package_data_pass(): def test_load_package_data_fail(): """Test load_package_data() fails as expected.""" with pytest.raises(FileNotFoundError): - missing = bu.load_package_data("config-missing.yml") + bu.load_package_data("config-missing.yml") def test_get_logger_singleton(): From ff36e5323ae99e468fedc6ff257d171f6edf0dc7 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Thu, 21 Dec 2023 15:20:38 +1100 Subject: [PATCH 06/50] Manual black modifications. #103 --- benchcab/utils/__init__.py | 4 +++- benchcab/utils/fs.py | 2 +- tests/test_fluxsite.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index 2e6efd0c..c771d96c 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -1,6 +1,8 @@ # Copyright 2022 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. # SPDX-License-Identifier: Apache-2.0 +# ruff: noqa: PTH118 + """Top-level utilities.""" import json import os @@ -42,7 +44,7 @@ def load_package_data(filename: str) -> dict: ext = ext if ext != "yaml" else "yml" # Extract from the installations data directory. - raw = pkgutil.get_data("benchcab", os.path.join("data", filename)).decode("utf-8") # noqa: PTH118 + raw = pkgutil.get_data("benchcab", os.path.join("data", filename)).decode("utf-8") # Decode and return. return PACKAGE_DATA_DECODERS[ext](raw) diff --git a/benchcab/utils/fs.py b/benchcab/utils/fs.py index 61323352..d3eee46a 100644 --- a/benchcab/utils/fs.py +++ b/benchcab/utils/fs.py @@ -36,7 +36,7 @@ def copy2(src: Path, dest: Path, verbose=False): def next_path(path_pattern: str, path: Path = Path(), sep: str = "-") -> Path: """Find the next free path. - + Finds the next free path in a sequentially named list of files with the following pattern in the `path` directory: diff --git a/tests/test_fluxsite.py b/tests/test_fluxsite.py index 7c12acaf..ac0906c9 100644 --- a/tests/test_fluxsite.py +++ b/tests/test_fluxsite.py @@ -29,7 +29,7 @@ @pytest.fixture() -def mock_repo(): #noqa: D103 +def mock_repo(): # noqa: D103 class MockRepo(Repo): def __init__(self) -> None: self.branch = "test-branch" From cb5c5cc8441d24c6e20cbfadcf73a3348b956d30 Mon Sep 17 00:00:00 2001 From: Sean Bryan Date: Fri, 1 Dec 2023 11:06:56 +1100 Subject: [PATCH 07/50] Add payu test suite for spatial configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spatial tests use the [payu framework][payu]. The payu framework was chosen so that we: - Encourage uptake of payu amongst users of CABLE - Have the foundations in place for running coupled models (atmosphere + land) with payu - Can easily test longer running simulations (payu makes it easy to run a model multiple times and have state persist in the model via restart files) The design of the spatial tests assumes each payu experiment is tailored to running CABLE with a specific meteorological forcing. This has the benefit that all the required inputs are already defined in the payu configuration file. An alternative would be to build up the spatial namelist configurations from scratch. This would be problematic as it is unclear if CABLE requires 'forcing specific' namelist options to be enabled to run with a particular met forcing. That is, CABLE does not allow for easy plug and play with different met forcings via the namelist file. The run directory structure is organised as follows: runs/ ├── spatial │ └── tasks │ ├── (a payu control / experiment directory) │ └── ... ├── payu-laboratory │ └── ... └── fluxsite └── ... Note we have a separate payu-laboratory directory. This is so we keep all CABLE outputs produced by benchcab under the bench_example work directory. This change includes the following additional features: - Add the ability to build the CABLE executable with MPI at runtime so that we run the spatial configurations with MPI. - Add the --mpi flag to benchcab build command so that the user can run the MPI build step independently. - Add subcommands to run each step of the spatial workflow in isolation. - Add payu key in the benchcab config file so that users can easily configure payu experiments and add optional command line arguments to the payu run command. - Add met_forcings key to specify different met forcings and their respective payu experiment. Fixes #5 [payu]: https://github.com/payu-org/payu [cable_example]: https://github.com/CABLE-LSM/cable_example --- benchcab/benchcab.py | 128 +++++++++++----- benchcab/cli.py | 43 ++++-- benchcab/data/config-schema.yml | 24 ++- benchcab/fluxsite.py | 79 ++-------- benchcab/internal.py | 31 +++- benchcab/model.py | 41 ++++-- benchcab/spatial.py | 172 ++++++++++++++++++++++ benchcab/utils/dict.py | 41 ++++++ benchcab/utils/namelist.py | 36 +++++ benchcab/workdir.py | 11 ++ docs/user_guide/config_options.md | 67 ++++++++- docs/user_guide/expected_output.md | 31 ++-- docs/user_guide/index.md | 96 ++++++++---- tests/conftest.py | 14 ++ tests/test_cli.py | 19 ++- tests/test_fluxsite.py | 120 +++------------ tests/test_model.py | 102 +++++++++---- tests/test_namelist.py | 87 +++++++++++ tests/test_spatial.py | 225 +++++++++++++++++++++++++++++ tests/test_workdir.py | 20 +++ 20 files changed, 1076 insertions(+), 311 deletions(-) create mode 100644 benchcab/spatial.py create mode 100644 benchcab/utils/dict.py create mode 100644 benchcab/utils/namelist.py create mode 100644 tests/test_namelist.py create mode 100644 tests/test_spatial.py diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 6b2a7ead..6acbe2b5 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -10,24 +10,20 @@ from subprocess import CalledProcessError from typing import Optional -from benchcab import internal +from benchcab import fluxsite, internal, spatial from benchcab.comparison import run_comparisons, run_comparisons_in_parallel from benchcab.config import read_config from benchcab.environment_modules import EnvironmentModules, EnvironmentModulesInterface -from benchcab.fluxsite import ( - Task, - get_fluxsite_comparisons, - get_fluxsite_tasks, - run_tasks, - run_tasks_in_parallel, -) from benchcab.internal import get_met_forcing_file_names from benchcab.model import Model from benchcab.utils.fs import mkdir, next_path from benchcab.utils.pbs import render_job_script from benchcab.utils.repo import SVNRepo, create_repo from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface -from benchcab.workdir import setup_fluxsite_directory_tree +from benchcab.workdir import ( + setup_fluxsite_directory_tree, + setup_spatial_directory_tree, +) class Benchcab: @@ -46,7 +42,8 @@ def __init__( self._config: Optional[dict] = None self._models: list[Model] = [] - self.tasks: list[Task] = [] # initialise fluxsite tasks lazily + self._fluxsite_tasks: list[fluxsite.FluxsiteTask] = [] + self._spatial_tasks: list[spatial.SpatialTask] = [] def _validate_environment(self, project: str, modules: list): """Performs checks on current user environment.""" @@ -113,20 +110,34 @@ def _get_models(self, config: dict) -> list[Model]: self._models.append(Model(repo=repo, model_id=id, **sub_config)) return self._models - def _initialise_tasks(self, config: dict) -> list[Task]: - """A helper method that initialises and returns the `tasks` attribute.""" - self.tasks = get_fluxsite_tasks( - models=self._get_models(config), - science_configurations=config.get( - "science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS - ), - fluxsite_forcing_file_names=get_met_forcing_file_names( - config.get("fluxsite", {}).get( - "experiment", internal.FLUXSITE_DEFAULT_EXPERIMENT - ) - ), - ) - return self.tasks + def _get_fluxsite_tasks(self, config: dict) -> list[fluxsite.FluxsiteTask]: + if not self._fluxsite_tasks: + self._fluxsite_tasks = fluxsite.get_fluxsite_tasks( + models=self._get_models(config), + science_configurations=config.get( + "science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS + ), + fluxsite_forcing_file_names=get_met_forcing_file_names( + config.get("fluxsite", {}).get( + "experiment", internal.FLUXSITE_DEFAULT_EXPERIMENT + ) + ), + ) + return self._fluxsite_tasks + + def _get_spatial_tasks(self, config) -> list[spatial.SpatialTask]: + if not self._spatial_tasks: + self._spatial_tasks = spatial.get_spatial_tasks( + models=self._get_models(config), + met_forcings=config.get("spatial", {}).get( + "met_forcings", internal.SPATIAL_DEFAULT_MET_FORCINGS + ), + science_configurations=config.get( + "science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS + ), + payu_args=config.get("spatial", {}).get("payu", {}).get("args"), + ) + return self._spatial_tasks def validate_config(self, config_path: str, verbose: bool): """Endpoint for `benchcab validate_config`.""" @@ -179,6 +190,7 @@ def fluxsite_submit_job( "The NetCDF output for each task is written to " f"{internal.FLUXSITE_DIRS['OUTPUT']}/_out.nc" ) + print("") def checkout(self, config_path: str, verbose: bool): """Endpoint for `benchcab checkout`.""" @@ -208,7 +220,7 @@ def checkout(self, config_path: str, verbose: bool): print("") - def build(self, config_path: str, verbose: bool): + def build(self, config_path: str, verbose: bool, mpi=False): """Endpoint for `benchcab build`.""" config = self._get_config(config_path) self._validate_environment(project=config["project"], modules=config["modules"]) @@ -221,11 +233,11 @@ def build(self, config_path: str, verbose: bool): ) repo.custom_build(modules=config["modules"], verbose=verbose) else: - build_mode = "with MPI" if internal.MPI else "serially" + build_mode = "with MPI" if mpi else "serially" print(f"Compiling CABLE {build_mode} for realisation {repo.name}...") - repo.pre_build(verbose=verbose) - repo.run_build(modules=config["modules"], verbose=verbose) - repo.post_build(verbose=verbose) + repo.pre_build(verbose=verbose, mpi=mpi) + repo.run_build(modules=config["modules"], verbose=verbose, mpi=mpi) + repo.post_build(verbose=verbose, mpi=mpi) print(f"Successfully compiled CABLE for realisation {repo.name}") print("") @@ -234,11 +246,10 @@ def fluxsite_setup_work_directory(self, config_path: str, verbose: bool): config = self._get_config(config_path) self._validate_environment(project=config["project"], modules=config["modules"]) - tasks = self.tasks if self.tasks else self._initialise_tasks(config) print("Setting up run directory tree for fluxsite tests...") setup_fluxsite_directory_tree(verbose=verbose) print("Setting up tasks...") - for task in tasks: + for task in self._get_fluxsite_tasks(config): task.setup_task(verbose=verbose) print("Successfully setup fluxsite tasks") print("") @@ -247,8 +258,8 @@ def fluxsite_run_tasks(self, config_path: str, verbose: bool): """Endpoint for `benchcab fluxsite-run-tasks`.""" config = self._get_config(config_path) self._validate_environment(project=config["project"], modules=config["modules"]) + tasks = self._get_fluxsite_tasks(config) - tasks = self.tasks if self.tasks else self._initialise_tasks(config) print("Running fluxsite tasks...") try: multiprocess = config["fluxsite"]["multiprocess"] @@ -258,9 +269,9 @@ def fluxsite_run_tasks(self, config_path: str, verbose: bool): ncpus = config.get("pbs", {}).get( "ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"] ) - run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=verbose) + fluxsite.run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=verbose) else: - run_tasks(tasks, verbose=verbose) + fluxsite.run_tasks(tasks, verbose=verbose) print("Successfully ran fluxsite tasks") print("") @@ -274,8 +285,9 @@ def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool): "nccmp/1.8.5.0" ) # use `nccmp -df` for bitwise comparisons - tasks = self.tasks if self.tasks else self._initialise_tasks(config) - comparisons = get_fluxsite_comparisons(tasks) + comparisons = fluxsite.get_fluxsite_comparisons( + self._get_fluxsite_tasks(config) + ) print("Running comparison tasks...") try: @@ -306,10 +318,46 @@ def fluxsite( else: self.fluxsite_submit_job(config_path, verbose, skip) - def spatial(self, config_path: str, verbose: bool): + def spatial_setup_work_directory(self, config_path: str, verbose: bool): + """Endpoint for `benchcab spatial-setup-work-dir`.""" + config = self._get_config(config_path) + self._validate_environment(project=config["project"], modules=config["modules"]) + + print("Setting up run directory tree for spatial tests...") + setup_spatial_directory_tree() + print("Setting up tasks...") + try: + payu_config = config["spatial"]["payu"]["config"] + except KeyError: + payu_config = None + for task in self._get_spatial_tasks(config): + task.setup_task(payu_config=payu_config, verbose=verbose) + print("Successfully setup spatial tasks") + print("") + + def spatial_run_tasks(self, config_path: str, verbose: bool): + """Endpoint for `benchcab spatial-run-tasks`.""" + config = self._get_config(config_path) + self._validate_environment(project=config["project"], modules=config["modules"]) + + print("Running spatial tasks...") + spatial.run_tasks(tasks=self._get_spatial_tasks(config), verbose=verbose) + print("Successfully dispatched payu jobs") + print("") + + def spatial(self, config_path: str, verbose: bool, skip: list): """Endpoint for `benchcab spatial`.""" + self.checkout(config_path, verbose) + self.build(config_path, verbose, mpi=True) + self.spatial_setup_work_directory(config_path, verbose) + self.spatial_run_tasks(config_path, verbose) - def run(self, config_path: str, no_submit: bool, verbose: bool, skip: list[str]): + def run(self, config_path: str, verbose: bool, skip: list[str]): """Endpoint for `benchcab run`.""" - self.fluxsite(config_path, no_submit, verbose, skip) - self.spatial(config_path, verbose) + self.checkout(config_path, verbose) + self.build(config_path, verbose) + self.build(config_path, verbose, mpi=True) + self.fluxsite_setup_work_directory(config_path, verbose) + self.spatial_setup_work_directory(config_path, verbose) + self.fluxsite_submit_job(config_path, verbose, skip) + self.spatial_run_tasks(config_path, verbose) diff --git a/benchcab/cli.py b/benchcab/cli.py index 4e19d268..fee7086f 100644 --- a/benchcab/cli.py +++ b/benchcab/cli.py @@ -38,9 +38,9 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser: action="store_true", ) - # parent parser that contains arguments common to all run specific subcommands - args_run_subcommand = argparse.ArgumentParser(add_help=False) - args_run_subcommand.add_argument( + # parent parser that contains the argument for --no-submit + args_no_submit = argparse.ArgumentParser(add_help=False) + args_no_submit.add_argument( "--no-submit", action="store_true", help="Force benchcab to execute tasks on the current compute node.", @@ -80,7 +80,6 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser: parents=[ args_help, args_subcommand, - args_run_subcommand, args_composite_subcommand, ], help="Run all test suites for CABLE.", @@ -109,7 +108,7 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser: parents=[ args_help, args_subcommand, - args_run_subcommand, + args_no_submit, args_composite_subcommand, ], help="Run the fluxsite test suite for CABLE.", @@ -140,6 +139,11 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser: config file.""", add_help=False, ) + parser_build.add_argument( + "--mpi", + action="store_true", + help="Enable MPI build.", + ) parser_build.set_defaults(func=app.build) # subcommand: 'benchcab fluxsite-setup-work-dir' @@ -168,9 +172,9 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser: "fluxsite-run-tasks", parents=[args_help, args_subcommand], help="Run the fluxsite tasks of the main fluxsite command.", - description="""Runs the fluxsite tasks for the fluxsite test suite. Note, this command should - ideally be run inside a PBS job. This command is invoked by the PBS job script generated by - `benchcab run`.""", + description="""Runs the fluxsite tasks for the fluxsite test suite. + Note, this command should ideally be run inside a PBS job. This command + is invoked by the PBS job script generated by `benchcab run`.""", add_help=False, ) parser_fluxsite_run_tasks.set_defaults(func=app.fluxsite_run_tasks) @@ -192,11 +196,32 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser: # subcommand: 'benchcab spatial' parser_spatial = subparsers.add_parser( "spatial", - parents=[args_help, args_subcommand], + parents=[args_help, args_subcommand, args_composite_subcommand], help="Run the spatial tests only.", description="""Runs the default spatial test suite for CABLE.""", add_help=False, ) parser_spatial.set_defaults(func=app.spatial) + # subcommand: 'benchcab spatial-setup-work-dir' + parser_spatial_setup_work_dir = subparsers.add_parser( + "spatial-setup-work-dir", + parents=[args_help, args_subcommand], + help="Run the work directory setup step of the spatial command.", + description="""Generates the spatial run directory tree in the current working + directory so that spatial tasks can be run.""", + add_help=False, + ) + parser_spatial_setup_work_dir.set_defaults(func=app.spatial_setup_work_directory) + + # subcommand 'benchcab spatial-run-tasks' + parser_spatial_run_tasks = subparsers.add_parser( + "spatial-run-tasks", + parents=[args_help, args_subcommand], + help="Run the spatial tasks of the main spatial command.", + description="Runs the spatial tasks for the spatial test suite.", + add_help=False, + ) + parser_spatial_run_tasks.set_defaults(func=app.spatial_run_tasks) + return main_parser diff --git a/benchcab/data/config-schema.yml b/benchcab/data/config-schema.yml index 892d3abf..7826ac99 100644 --- a/benchcab/data/config-schema.yml +++ b/benchcab/data/config-schema.yml @@ -95,4 +95,26 @@ fluxsite: schema: type: "string" required: false - \ No newline at end of file + +spatial: + type: "dict" + required: false + schema: + met_forcings: + type: "dict" + required: false + minlength: 1 + keysrules: + type: "string" + valuesrules: + type: "string" + payu: + type: "dict" + required: false + schema: + config: + type: "dict" + required: false + args: + type: "string" + required: false \ No newline at end of file diff --git a/benchcab/fluxsite.py b/benchcab/fluxsite.py index f455a2b0..da28940c 100644 --- a/benchcab/fluxsite.py +++ b/benchcab/fluxsite.py @@ -7,9 +7,7 @@ import operator import shutil import sys -from pathlib import Path from subprocess import CalledProcessError -from typing import Any, Dict, TypeVar import f90nml import flatdict @@ -19,70 +17,9 @@ from benchcab.comparison import ComparisonTask from benchcab.model import Model from benchcab.utils.fs import chdir, mkdir +from benchcab.utils.namelist import patch_namelist, patch_remove_namelist from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface -# fmt: off -# ====================================================== -# Copyright (c) 2017 - 2022 Samuel Colvin and other contributors -# from https://github.com/pydantic/pydantic/blob/fd2991fe6a73819b48c906e3c3274e8e47d0f761/pydantic/utils.py#L200 - -KeyType = TypeVar('KeyType') - - -def deep_update(mapping: Dict[KeyType, Any], *updating_mappings: Dict[KeyType, Any]) -> Dict[KeyType, Any]: - updated_mapping = mapping.copy() - for updating_mapping in updating_mappings: - for k, v in updating_mapping.items(): - if k in updated_mapping and isinstance(updated_mapping[k], dict) and isinstance(v, dict): - updated_mapping[k] = deep_update(updated_mapping[k], v) - else: - updated_mapping[k] = v - return updated_mapping - -# ====================================================== -# fmt: on - - -def deep_del( - mapping: Dict[KeyType, Any], *updating_mappings: Dict[KeyType, Any] -) -> Dict[KeyType, Any]: - """Deletes all key-value 'leaf nodes' in `mapping` specified by `updating_mappings`.""" - updated_mapping = mapping.copy() - for updating_mapping in updating_mappings: - for key, value in updating_mapping.items(): - if isinstance(updated_mapping[key], dict) and isinstance(value, dict): - updated_mapping[key] = deep_del(updated_mapping[key], value) - else: - del updated_mapping[key] - return updated_mapping - - -def patch_namelist(nml_path: Path, patch: dict): - """Writes a namelist patch specified by `patch` to `nml_path`. - - The `patch` dictionary must comply with the `f90nml` api. - """ - if not nml_path.exists(): - f90nml.write(patch, nml_path) - return - - nml = f90nml.read(nml_path) - f90nml.write(deep_update(nml, patch), nml_path, force=True) - - -def patch_remove_namelist(nml_path: Path, patch_remove: dict): - """Removes a subset of namelist parameters specified by `patch_remove` from `nml_path`. - - The `patch_remove` dictionary must comply with the `f90nml` api. - """ - nml = f90nml.read(nml_path) - try: - f90nml.write(deep_del(nml, patch_remove), nml_path, force=True) - except KeyError as exc: - msg = f"Namelist parameters specified in `patch_remove` do not exist in {nml_path.name}." - raise KeyError(msg) from exc - - f90_logical_repr = {True: ".true.", False: ".false."} @@ -90,7 +27,7 @@ class CableError(Exception): """Custom exception class for CABLE errors.""" -class Task: +class FluxsiteTask: """A class used to represent a single fluxsite task.""" subprocess_handler: SubprocessWrapperInterface = SubprocessWrapper() @@ -335,10 +272,10 @@ def get_fluxsite_tasks( models: list[Model], science_configurations: list[dict], fluxsite_forcing_file_names: list[str], -) -> list[Task]: +) -> list[FluxsiteTask]: """Returns a list of fluxsite tasks to run.""" tasks = [ - Task( + FluxsiteTask( model=model, met_forcing_file=file_name, sci_conf_id=sci_conf_id, @@ -351,14 +288,16 @@ def get_fluxsite_tasks( return tasks -def run_tasks(tasks: list[Task], verbose=False): +def run_tasks(tasks: list[FluxsiteTask], verbose=False): """Runs tasks in `tasks` serially.""" for task in tasks: task.run(verbose=verbose) def run_tasks_in_parallel( - tasks: list[Task], n_processes=internal.FLUXSITE_DEFAULT_PBS["ncpus"], verbose=False + tasks: list[FluxsiteTask], + n_processes=internal.FLUXSITE_DEFAULT_PBS["ncpus"], + verbose=False, ): """Runs tasks in `tasks` in parallel across multiple processes.""" run_task = operator.methodcaller("run", verbose=verbose) @@ -366,7 +305,7 @@ def run_tasks_in_parallel( pool.map(run_task, tasks, chunksize=1) -def get_fluxsite_comparisons(tasks: list[Task]) -> list[ComparisonTask]: +def get_fluxsite_comparisons(tasks: list[FluxsiteTask]) -> list[ComparisonTask]: """Returns a list of `ComparisonTask` objects to run comparisons with. Pairs should be matching in science configurations and meteorological diff --git a/benchcab/internal.py b/benchcab/internal.py index 365f4481..a42077a5 100644 --- a/benchcab/internal.py +++ b/benchcab/internal.py @@ -19,7 +19,6 @@ "walltime": "6:00:00", "storage": [], } -MPI = False FLUXSITE_DEFAULT_MULTIPROCESS = True # DIRECTORY PATHS/STRUCTURE: @@ -81,17 +80,43 @@ # Relative path to directory that stores bitwise comparison results FLUXSITE_DIRS["BITWISE_CMP"] = FLUXSITE_DIRS["ANALYSIS"] / "bitwise-comparisons" -# Path to met files: +# Relative path to root directory for CABLE spatial runs +SPATIAL_RUN_DIR = RUN_DIR / "spatial" + +# Relative path to tasks directory (contains payu control directories configured +# for each spatial task) +SPATIAL_TASKS_DIR = SPATIAL_RUN_DIR / "tasks" + +# A custom payu laboratory directory for payu runs +PAYU_LABORATORY_DIR = RUN_DIR / "payu-laboratory" + +# Path to PLUMBER2 site forcing data directory (doi: 10.25914/5fdb0902607e1): MET_DIR = Path("/g/data/ks32/CLEX_Data/PLUMBER2/v1-0/Met/") +# Default met forcings to use in the spatial test suite. Each met +# forcing has a corresponding payu experiment that is configured to run CABLE +# with that forcing. +SPATIAL_DEFAULT_MET_FORCINGS = { + "crujra_access": "https://github.com/CABLE-LSM/cable_example.git", +} + # CABLE SVN root url: CABLE_SVN_ROOT = "https://trac.nci.org.au/svn/cable" +# Relative path to temporary build directory (serial) +TMP_BUILD_DIR = Path("offline", ".tmp") + +# Relative path to temporary build directory (MPI) +TMP_BUILD_DIR_MPI = Path("offline", ".mpitmp") + # CABLE GitHub URL: CABLE_GIT_URL = "https://github.com/CABLE-LSM/CABLE.git" # CABLE executable file name: -CABLE_EXE = "cable-mpi" if MPI else "cable" +CABLE_EXE = "cable" + +# CABLE MPI executable file name: +CABLE_MPI_EXE = "cable-mpi" # CABLE namelist file name: CABLE_NML = "cable.nml" diff --git a/benchcab/model.py b/benchcab/model.py index f97c9831..1c93ab14 100644 --- a/benchcab/model.py +++ b/benchcab/model.py @@ -57,10 +57,14 @@ def model_id(self) -> int: def model_id(self, value: int): self._model_id = value - def get_exe_path(self) -> Path: + def get_exe_path(self, mpi=False) -> Path: """Return the path to the built executable.""" return ( - internal.SRC_DIR / self.name / self.src_dir / "offline" / internal.CABLE_EXE + internal.SRC_DIR + / self.name + / self.src_dir + / "offline" + / (internal.CABLE_MPI_EXE if mpi else internal.CABLE_EXE) ) def custom_build(self, modules: list[str], verbose=False): @@ -100,10 +104,14 @@ def custom_build(self, modules: list[str], verbose=False): verbose=verbose, ) - def pre_build(self, verbose=False): + def pre_build(self, mpi=False, verbose=False): """Runs CABLE pre-build steps.""" path_to_repo = internal.SRC_DIR / self.name - tmp_dir = path_to_repo / self.src_dir / "offline" / ".tmp" + tmp_dir = ( + path_to_repo + / self.src_dir + / (internal.TMP_BUILD_DIR_MPI if mpi else internal.TMP_BUILD_DIR) + ) if not tmp_dir.exists(): if verbose: print(f"mkdir {tmp_dir}") @@ -121,10 +129,14 @@ def pre_build(self, verbose=False): verbose=verbose, ) - def run_build(self, modules: list[str], verbose=False): + def run_build(self, modules: list[str], mpi=False, verbose=False): """Runs CABLE build scripts.""" path_to_repo = internal.SRC_DIR / self.name - tmp_dir = path_to_repo / self.src_dir / "offline" / ".tmp" + tmp_dir = ( + path_to_repo + / self.src_dir + / (internal.TMP_BUILD_DIR_MPI if mpi else internal.TMP_BUILD_DIR) + ) with chdir(tmp_dir), self.modules_handler.load(modules, verbose=verbose): env = os.environ.copy() @@ -133,20 +145,25 @@ def run_build(self, modules: list[str], verbose=False): env["CFLAGS"] = "-O2 -fp-model precise" env["LDFLAGS"] = f"-L{env['NETCDF_ROOT']}/lib/Intel -O0" env["LD"] = "-lnetcdf -lnetcdff" - env["FC"] = "mpif90" if internal.MPI else "ifort" + env["FC"] = "mpif90" if mpi else "ifort" self.subprocess_handler.run_cmd( - "make mpi" if internal.MPI else "make", env=env, verbose=verbose + "make mpi" if mpi else "make", env=env, verbose=verbose ) - def post_build(self, verbose=False): + def post_build(self, mpi=False, verbose=False): """Runs CABLE post-build steps.""" path_to_repo = internal.SRC_DIR / self.name - tmp_dir = path_to_repo / self.src_dir / "offline" / ".tmp" + tmp_dir = ( + path_to_repo + / self.src_dir + / (internal.TMP_BUILD_DIR_MPI if mpi else internal.TMP_BUILD_DIR) + ) + exe = internal.CABLE_MPI_EXE if mpi else internal.CABLE_EXE rename( - tmp_dir / internal.CABLE_EXE, - path_to_repo / self.src_dir / "offline" / internal.CABLE_EXE, + tmp_dir / exe, + path_to_repo / self.src_dir / "offline" / exe, verbose=verbose, ) diff --git a/benchcab/spatial.py b/benchcab/spatial.py new file mode 100644 index 00000000..f1c1f894 --- /dev/null +++ b/benchcab/spatial.py @@ -0,0 +1,172 @@ +# Copyright 2022 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +"""A module containing functions and data structures for running spatial tasks.""" + +from typing import Optional + +import git +import yaml + +from benchcab import internal +from benchcab.model import Model +from benchcab.utils.dict import deep_update +from benchcab.utils.fs import chdir +from benchcab.utils.namelist import patch_namelist, patch_remove_namelist +from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface + + +class SpatialTask: + """A class used to represent a single spatial task.""" + + subprocess_handler: SubprocessWrapperInterface = SubprocessWrapper() + + def __init__( + self, + model: Model, + met_forcing_name: str, + met_forcing_payu_experiment: str, + sci_conf_id: int, + sci_config: dict, + payu_args: Optional[str] = None, + ) -> None: + self.model = model + self.met_forcing_name = met_forcing_name + self.met_forcing_payu_experiment = met_forcing_payu_experiment + self.sci_conf_id = sci_conf_id + self.sci_config = sci_config + self.payu_args = payu_args + + def get_task_name(self) -> str: + """Returns the file name convention used for this task.""" + return f"{self.met_forcing_name}_R{self.model.model_id}_S{self.sci_conf_id}" + + def setup_task(self, payu_config: Optional[dict] = None, verbose=False): + """Does all file manipulations to run cable with payu for this task.""" + + if verbose: + print(f"Setting up task: {self.get_task_name()}") + + self.clone_experiment(verbose=verbose) + self.configure_experiment(payu_config, verbose=verbose) + self.update_namelist(verbose=verbose) + + def clone_experiment(self, verbose=False): + """Clone the payu experiment from GitHub.""" + url = self.met_forcing_payu_experiment + path = internal.SPATIAL_TASKS_DIR / self.get_task_name() + if verbose: + print(f"git clone {url} {path}") + _ = git.Repo.clone_from(url, path) + + def configure_experiment(self, payu_config: Optional[dict] = None, verbose=False): + """Configure the payu experiment for this task.""" + task_dir = internal.SPATIAL_TASKS_DIR / self.get_task_name() + exp_config_path = task_dir / "config.yaml" + with exp_config_path.open("r", encoding="utf-8") as file: + config = yaml.safe_load(file) + if config is None: + config = {} + + if verbose: + print( + " Updating experiment config parameters in", + task_dir / "config.yaml", + ) + + if payu_config: + config = deep_update(config, payu_config) + + config["exe"] = str(self.model.get_exe_path(mpi=True).absolute()) + + # Here we prepend inputs to the `input` list so that payu knows to use + # our inputs over the pre-existing inputs in the config file: + config["input"] = [ + # Note: only necessary for CABLE v2 + str( + ( + internal.CABLE_AUX_DIR + / "core" + / "biogeophys" + / "def_veg_params_zr_clitt_albedo_fix.txt" + ).absolute() + ), + # Note: only necessary for CABLE v2 + str( + ( + internal.CABLE_AUX_DIR + / "core" + / "biogeophys" + / "def_soil_params.txt" + ).absolute() + ), + *config.get("input", []), + ] + + config["laboratory"] = str(internal.PAYU_LABORATORY_DIR.absolute()) + + with exp_config_path.open("w", encoding="utf-8") as file: + yaml.dump(config, file) + + def update_namelist(self, verbose=False): + """Update the namelist file for this task.""" + nml_path = ( + internal.SPATIAL_TASKS_DIR / self.get_task_name() / internal.CABLE_NML + ) + if verbose: + print(f" Adding science configurations to CABLE namelist file {nml_path}") + patch_namelist(nml_path, self.sci_config) + + if self.model.patch: + if verbose: + print( + f" Adding branch specific configurations to CABLE namelist file {nml_path}" + ) + patch_namelist(nml_path, self.model.patch) + + if self.model.patch_remove: + if verbose: + print( + f" Removing branch specific configurations from CABLE namelist file {nml_path}" + ) + patch_remove_namelist(nml_path, self.model.patch_remove) + + def run(self, verbose=False) -> None: + """Runs a single spatial task.""" + + task_dir = internal.SPATIAL_TASKS_DIR / self.get_task_name() + with chdir(task_dir): + self.subprocess_handler.run_cmd( + f"payu run {self.payu_args}" if self.payu_args else "payu run", + verbose=verbose, + ) + + +def run_tasks(tasks: list[SpatialTask], verbose=False): + """Runs tasks in `tasks` sequentially.""" + + for task in tasks: + task.run(verbose=verbose) + + +def get_spatial_tasks( + models: list[Model], + met_forcings: dict[str, str], + science_configurations: list[dict], + payu_args: Optional[str] = None, +): + """Returns a list of spatial tasks to run.""" + tasks = [ + SpatialTask( + model=model, + met_forcing_name=met_forcing_name, + met_forcing_payu_experiment=met_forcing_payu_experiment, + sci_conf_id=sci_conf_id, + sci_config=sci_config, + payu_args=payu_args, + ) + for model in models + for met_forcing_name, met_forcing_payu_experiment in met_forcings.items() + for sci_conf_id, sci_config in enumerate(science_configurations) + ] + return tasks diff --git a/benchcab/utils/dict.py b/benchcab/utils/dict.py new file mode 100644 index 00000000..c76e6c78 --- /dev/null +++ b/benchcab/utils/dict.py @@ -0,0 +1,41 @@ +# Copyright 2022 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +"""Utility functions for manipulating nested dictionaries.""" + +from typing import Any, Dict, TypeVar + +# fmt: off +# ====================================================== +# Copyright (c) 2017 - 2022 Samuel Colvin and other contributors +# from https://github.com/pydantic/pydantic/blob/fd2991fe6a73819b48c906e3c3274e8e47d0f761/pydantic/utils.py#L200 + +KeyType = TypeVar('KeyType') + + +def deep_update(mapping: Dict[KeyType, Any], *updating_mappings: Dict[KeyType, Any]) -> Dict[KeyType, Any]: # noqa + updated_mapping = mapping.copy() + for updating_mapping in updating_mappings: + for k, v in updating_mapping.items(): + if k in updated_mapping and isinstance(updated_mapping[k], dict) and isinstance(v, dict): + updated_mapping[k] = deep_update(updated_mapping[k], v) + else: + updated_mapping[k] = v + return updated_mapping + +# ====================================================== +# fmt: on + + +def deep_del( + mapping: Dict[KeyType, Any], *updating_mappings: Dict[KeyType, Any] +) -> Dict[KeyType, Any]: + """Deletes all key-value 'leaf nodes' in `mapping` specified by `updating_mappings`.""" + updated_mapping = mapping.copy() + for updating_mapping in updating_mappings: + for key, value in updating_mapping.items(): + if isinstance(updated_mapping[key], dict) and isinstance(value, dict): + updated_mapping[key] = deep_del(updated_mapping[key], value) + else: + del updated_mapping[key] + return updated_mapping diff --git a/benchcab/utils/namelist.py b/benchcab/utils/namelist.py new file mode 100644 index 00000000..d6a6de48 --- /dev/null +++ b/benchcab/utils/namelist.py @@ -0,0 +1,36 @@ +# Copyright 2022 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + +"""Contains utility functions for manipulating Fortran namelist files.""" + +from pathlib import Path + +import f90nml + +from benchcab.utils.dict import deep_del, deep_update + + +def patch_namelist(nml_path: Path, patch: dict): + """Writes a namelist patch specified by `patch` to `nml_path`. + + The `patch` dictionary must comply with the `f90nml` api. + """ + if not nml_path.exists(): + f90nml.write(patch, nml_path) + return + + nml = f90nml.read(nml_path) + f90nml.write(deep_update(nml, patch), nml_path, force=True) + + +def patch_remove_namelist(nml_path: Path, patch_remove: dict): + """Removes a subset of namelist parameters specified by `patch_remove` from `nml_path`. + + The `patch_remove` dictionary must comply with the `f90nml` api. + """ + nml = f90nml.read(nml_path) + try: + f90nml.write(deep_del(nml, patch_remove), nml_path, force=True) + except KeyError as exc: + msg = f"Namelist parameters specified in `patch_remove` do not exist in {nml_path.name}." + raise KeyError(msg) from exc diff --git a/benchcab/workdir.py b/benchcab/workdir.py index d695337b..f2b32a25 100644 --- a/benchcab/workdir.py +++ b/benchcab/workdir.py @@ -28,3 +28,14 @@ def setup_fluxsite_directory_tree(verbose=False): """ for path in internal.FLUXSITE_DIRS.values(): mkdir(path, verbose=verbose, parents=True, exist_ok=True) + + +def setup_spatial_directory_tree(verbose=False): + """Generate the directory structure for running spatial tests.""" + + for path in [ + internal.SPATIAL_RUN_DIR, + internal.SPATIAL_TASKS_DIR, + internal.PAYU_LABORATORY_DIR, + ]: + mkdir(path, verbose=verbose, parents=True, exist_ok=True) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index 1d22c314..3a60a5cc 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -163,6 +163,69 @@ fluxsites: ``` +## spatial + +Contains settings specific to spatial tests. + +This key is _optional_. **Default** settings for the spatial tests will be used if it is not present. + +```yaml +spatial: + met_forcings: + crujra_access: https://github.com/CABLE-LSM/cable_example.git + payu: + config: + walltime: 1:00:00 + args: -n 2 +``` + +### [met_forcings](#met_forcings) + +Specify one or more spatial met forcings to use in the spatial test suite. Each entry is a key-value pair where the key is the name of the met forcing and the value is a URL to a payu experiment that is configured to run CABLE with that forcing. + +This key is _optional_. **Default** values for the `met_forcings` key is as follows: + +```yaml +spatial: + met_forcings: + crujra_access: https://github.com/CABLE-LSM/cable_example.git +``` + +### [payu](#payu) + +Contains settings specific to the payu workflow manager. + +This key is _optional_. **Default** values for the payu settings will apply if not specified. + +```yaml +spatial: + payu: + config: + walltime: 1:00:00 + args: -n 2 +``` + +[`config`](#+payu.config){ #+payu.config } + +: **Default:** unset, _optional key_. :octicons-dash-24: Specify global configuration options for running payu. Settings specified here are passed into to the payu configuration file for each experiment. + +```yaml +spatial: + payu: + config: + walltime: 1:00:00 +``` + +[`args`](#+payu.args){ #+payu.args } + +: **Default:** unset, _optional key_. :octicons-dash-24: Specify command line arguments to the `payu run` command in the form of a string. Arguments are used for all spatial payu runs. + +```yaml +spatial: + payu: + args: -n 2 +``` + ## realisations Entries for each CABLE branch to use. Each entry is a key-value pair and are listed as follows: @@ -337,7 +400,7 @@ realisations: ### [patch_remove](#patch_remove) -: **Default:** unset, no effect, _optional key. :octicons-dash-24: Specifies branch-specific namelist settings to be removed from the `cable.nml` namelist settings. When the `patch_remove` key is specified, the specified namelists are removed from all namelist files for this branch for all science configurations run by `benchcab`. When specifying a namelist parameter in `patch_remove`, the value of the namelist parameter is ignored. +: **Default:** unset, _optional key. :octicons-dash-24: Specifies branch-specific namelist settings to be removed from the `cable.nml` namelist settings. When the `patch_remove` key is specified, the specified namelists are removed from all namelist files for this branch for all science configurations run by `benchcab`. When specifying a namelist parameter in `patch_remove`, the value of the namelist parameter is ignored. : The `patch_remove` key must be a dictionary-like data structure that is compliant with the [`f90nml`][f90nml-github] python package. ```yaml @@ -356,7 +419,7 @@ realisations: ## science_configurations -: **Default:** unset, no impact, _optional key_. :octicons-dash-24: User defined science configurations. Science configurations that are specified here will replace [the default science configurations](default_science_configurations.md). In the output filenames, each configuration is identified with S where N is an integer starting from 0 for the first listed configuration and increasing by 1 for each subsequent configuration. +: **Default:** unset, _optional key_. :octicons-dash-24: User defined science configurations. Science configurations that are specified here will replace [the default science configurations](default_science_configurations.md). In the output filenames, each configuration is identified with S where N is an integer starting from 0 for the first listed configuration and increasing by 1 for each subsequent configuration. ```yaml science_configurations: [ diff --git a/docs/user_guide/expected_output.md b/docs/user_guide/expected_output.md index a126fb69..66963a4d 100644 --- a/docs/user_guide/expected_output.md +++ b/docs/user_guide/expected_output.md @@ -6,11 +6,11 @@ Other sub-commands should print out part of this output. ``` $ benchcab run -Creating src directory: /scratch/tm70/sb8430/bench_example/src +Creating src directory Checking out repositories... -Successfully checked out trunk at revision 9550 -Successfully checked out test-branch at revision 9550 -Successfully checked out CABLE-AUX at revision 9550 +Successfully checked out trunk at revision 9672 +Successfully checked out test-branch at revision 9672 +Successfully checked out CABLE-AUX at revision 9672 Writing revision number info to rev_number-1.log Compiling CABLE serially for realisation trunk... @@ -18,24 +18,31 @@ Successfully compiled CABLE for realisation trunk Compiling CABLE serially for realisation test-branch... Successfully compiled CABLE for realisation test-branch +Compiling CABLE with MPI for realisation trunk... +Successfully compiled CABLE for realisation trunk +Compiling CABLE with MPI for realisation test-branch... +Successfully compiled CABLE for realisation test-branch + Setting up run directory tree for fluxsite tests... -Creating runs/fluxsite/logs directory: /scratch/tm70/sb8430/bench_example/runs/fluxsite/logs -Creating runs/fluxsite/outputs directory: /scratch/tm70/sb8430/bench_example/runs/fluxsite/outputs -Creating runs/fluxsite/tasks directory: /scratch/tm70/sb8430/bench_example/runs/fluxsite/tasks -Creating runs/fluxsite/analysis directory: /scratch/tm70/sb8430/bench_example/runs/fluxsite/analysis -Creating runs/fluxsite/analysis/bitwise-comparisons directory: /scratch/tm70/sb8430/bench_example/runs/fluxsite/analysis/bitwise-comparisons -Creating task directories... Setting up tasks... Successfully setup fluxsite tasks +Setting up run directory tree for spatial tests... +Setting up tasks... +Successfully setup spatial tasks + Creating PBS job script to run fluxsite tasks on compute nodes: benchmark_cable_qsub.sh -PBS job submitted: 82479088.gadi-pbs +PBS job submitted: 100563227.gadi-pbs The CABLE log file for each task is written to runs/fluxsite/logs/_log.txt The CABLE standard output for each task is written to runs/fluxsite/tasks//out.txt The NetCDF output for each task is written to runs/fluxsite/outputs/_out.nc + +Running spatial tasks... +Successfully dispatched payu jobs + ``` -The PBS schedule job should print out the following to the job log file: +The benchmark_cable_qsub.sh PBS job should print out the following to the job log file: ``` Running fluxsite tasks... Successfully ran fluxsite tasks diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index e45e4f2c..3d93b9fd 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -8,12 +8,6 @@ In this guide, we will describe: `benchcab` has been designed to work on NCI machine exclusively. It might be extended later on to other systems. -!!! warning "Limitations" - Currently, - - * `benchcab` can only run simulations at flux sites. - * spin-up for CASA simulations are not supported. - ## Pre-requisites To use `benchcab`, you need to join the following projects at NCI: @@ -44,15 +38,24 @@ You need to load the module on each new session at NCI on login or compute nodes - *New feature:* running two versions of CABLE with the same standard set of science configurations except one version is patched to use a new feature. - *Ensemble run:* running any number of versions of CABLE with the same set of customised science configurations. -The regression and new feature run modes should be used as necessary when evaluating new development in CABLE. +The regression and new feature run modes should be used as necessary when evaluating new developments in CABLE. -The code will: (i) check out and (ii) build the code branches. Then it will run each executable across N standard science configurations for a given number of sites. It is possible to produce some plots locally from the output produced. But [the modelevaluation website][meorg] can be used for further benchmarking and evaluation. +The `benchcab` tool: + +- checks out the model versions specified by the user +- builds the required executables +- runs each model version across N standard science configurations for a variety of meteorological forcings +- performs bitwise comparison checks on model outputs across model versions + +The user can then pipe the model outputs into a benchmark analysis via [modelevaluation.org][meorg] to assess model performance. ### Create a work directory #### Choose a location -You can run the benchmark from any directory you want under `/scratch` or `/g/data`. `/scratch` is preferred as the data in the run directory does not need to be preserved for a long time. The code will create sub-directories as needed. Please ensure you have enough space to store the CABLE outputs in your directory, at least temporary, until you upload them to [modelevaluation.org][meorg]. You will need about 16GB for the outputs for the `forty-two-site` experiment (with 4 different science configurations). +You can run the benchmark from any directory you want under `/scratch` or `/g/data`. `/scratch` is preferred as the data in the run directory does not need to be preserved for a long time. The code will create sub-directories as needed. Please ensure you have enough space to store the CABLE outputs in your directory, at least temporarily until you upload them to [modelevaluation.org][meorg]. + +The full test suite will require about 22GB of storage space. !!! Warning "The HOME directory is unsuitable" @@ -83,7 +86,7 @@ cd bench_example !!! warning `benchcab` will stop if it is not run within a work directory with the proper structure. -Currently, `benchcab` can only run CABLE for flux sites. **To run the whole workflow**, run +Currently, `benchcab` can only run CABLE for flux site and offline spatial configurations. **To run the whole workflow**, run ```bash benchcab run @@ -93,7 +96,8 @@ The tool will follow the steps: 1. Checkout the code branches. The codes will be stored under `src/` directory in your work directory. The sub-directories are created automatically. 2. Compile the source code from all branches -3. Setup and launch a PBS job to run the simulations in parallel. When `benchcab` launches the PBS job, it will print out the job ID to the terminal. You can check the status of the job with `qstat`. `benchcab` will not warn you when the simulations are over. +3. Setup and launch a PBS job to run the flux site simulations in parallel. When `benchcab` launches the PBS job, it will print out the job ID to the terminal. You can check the status of the job with `qstat`. `benchcab` will not warn you when the simulations are over. +4. Setup and run an ensemble of offline spatial runs using the [`payu`][payu-github] framework. !!! tip "Expected output" @@ -119,22 +123,27 @@ The following files and directories are created when `benchcab run` executes suc ├── benchmark_cable_qsub.sh.o ├── rev_number-1.log ├── runs -│   └── fluxsite -│   ├── logs -│ │ ├── _log.txt -│ │ └── ... -│   ├── outputs -│ │ ├── _out.nc -│ │ └── ... -│   ├── analysis -│ │ └── bitwise-comparisons -│   └── tasks -│ ├── -│ │ ├── cable (executable) -│ │ ├── cable.nml -│ │ ├── cable_soilparm.nml -│ │ └── pft_params.nml -│ └── ... +│   ├── fluxsite +│   │ ├── logs +│ │ │ ├── _log.txt +│ │ │ └── ... +│   │ ├── outputs +│ │ │ ├── _out.nc +│ │ │ └── ... +│   │ ├── analysis +│ │ │ └── bitwise-comparisons +│   │ └── tasks +│ │ ├── +│ │ │ ├── cable (executable) +│ │ │ ├── cable.nml +│ │ │ ├── cable_soilparm.nml +│ │ │ └── pft_params.nml +│ │ └── ... +│   ├── spatial +│   │ └── tasks +│ │ ├── (a payu control / experiment directory) +│ │ └── ... +│   └── payu-laboratory └── src ├── CABLE-AUX ├── @@ -155,11 +164,11 @@ The following files and directories are created when `benchcab run` executes suc `runs/fluxsite/` -: directory that contains the log files, output files, and tasks for running CABLE. +: directory that contains the log files, output files, and tasks for running CABLE in the fluxsite configuration. `runs/fluxsite/tasks` -: directory that contains task directories. A task consists of a CABLE run for a branch (realisation), a meteorological forcing, and a science configuration. In the above directory structure, `` uses the following naming convention: +: directory that contains fluxsite task directories. A task consists of a CABLE run for a branch (realisation), a meteorological forcing, and a science configuration. In the above directory structure, `` uses the following naming convention: ``` _R_S @@ -183,6 +192,29 @@ The following files and directories are created when `benchcab run` executes suc : directory that contains the standard output produced by the bitwise comparison command: `benchcab fluxsite-bitwise-cmp`. Standard output is only saved when the netcdf files being compared differ from each other +`runs/spatial/` + +: directory that contains task directories for running CABLE in the offline spatial configuration. + +`runs/spatial/tasks` + +: directory that contains payu control directories (or experiments) configured for each spatial task. A task consists of a CABLE run for a branch (realisation), a meteorological forcing, and a science configuration. In the above directory structure, `` uses the following naming convention: + +``` +_R_S +``` + +: where `met_forcing_name` is the name of the spatial met forcing, `realisation_key` is the branch key specified in the config file, and `science_config_key` identifies the science configuration used. See the [`met_forcings`](config_options.md#met_forcings) option for more information on how to configure the met forcings used. + + +`runs/spatial/tasks//` + +: a payu control directory (or experiment). See [Configuring your experiment](https://payu.readthedocs.io/en/latest/config.html) for more information on payu experiments. + +`runs/payu-laboratory/` + +: a custom payu laboratory directory. See [Laboratory Structure](https://payu.readthedocs.io/en/latest/design.html#laboratory-structure) for more information on the payu laboratory directory. + !!! warning "Re-running `benchcab` multiple times in the same working directory" We recommend the user to manually delete the generated files when re-running `benchcab`. Re-running `benchcab` multiple times in the same working directory is currently not yet supported (see issue [CABLE-LSM/benchcab#20](https://github.com/CABLE-LSM/benchcab/issues/20)). To clean the current working directory, run the following command in the working directory @@ -192,10 +224,11 @@ The following files and directories are created when `benchcab run` executes suc ## Analyse the output with [modelevaluation.org][meorg] - - Once the benchmarking has finished running all the simulations, you need to upload the output files to [modelevaluation.org][meorg] via the web interface. To do this: +!!! warning "Limitations" + Model evaluation for offline spatial outputs is not yet available (see issue [CABLE-LSM/benchcab#193](https://github.com/CABLE-LSM/benchcab/issues/193)). + 1. Go to [modelevaluation.org][meorg] and login or create a new account. 2. Navigate to the `benchcab-evaluation` workspace. To do this, click the **Current Workspace** button at the top of the page, and select `benchcab-evaluation` under "Workspaces Shared With Me".
@@ -272,3 +305,4 @@ Alternatively, you can also access the ACCESS-NRI User support via [the ACCESS-H [benchmark_5]: https://modelevaluation.org/modelOutput/display/diLdf49PfpEwZemTz [benchmark_42]: https://modelevaluation.org/modelOutput/display/pvkuY5gpR2n4FKZw3 [run_CABLE_v2]: running_CABLE_v2.md +[payu-github]: https://github.com/payu-org/payu diff --git a/tests/conftest.py b/tests/conftest.py index 6f20dc20..a36a37e7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -90,6 +90,20 @@ def config(): }, "multiprocessing": True, }, + "spatial": { + "met_forcings": { + "crujra_access": "https://github.com/CABLE-LSM/cable_example.git", + "gswp": "foo", + }, + "payu": { + "config": { + "ncpus": 16, + "walltime": "1:00:00", + "mem": "64GB", + }, + "args": "-n 2", + }, + }, } diff --git a/tests/test_cli.py b/tests/test_cli.py index f4c4e038..c190d25c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -15,7 +15,6 @@ def test_cli_parser(): res = vars(parser.parse_args(["run"])) assert res == { "config_path": "config.yaml", - "no_submit": False, "verbose": False, "skip": [], "func": app.run, @@ -42,6 +41,7 @@ def test_cli_parser(): assert res == { "config_path": "config.yaml", "verbose": False, + "mpi": False, "func": app.build, } @@ -93,9 +93,26 @@ def test_cli_parser(): assert res == { "config_path": "config.yaml", "verbose": False, + "skip": [], "func": app.spatial, } + # Success case: default spatial-setup-work-dir command + res = vars(parser.parse_args(["spatial-setup-work-dir"])) + assert res == { + "config_path": "config.yaml", + "verbose": False, + "func": app.spatial_setup_work_directory, + } + + # Success case: default spatial-run-tasks command + res = vars(parser.parse_args(["spatial-run-tasks"])) + assert res == { + "config_path": "config.yaml", + "verbose": False, + "func": app.spatial_run_tasks, + } + # Failure case: pass --no-submit to a non 'run' command with pytest.raises(SystemExit): parser.parse_args(["fluxsite-setup-work-dir", "--no-submit"]) diff --git a/tests/test_fluxsite.py b/tests/test_fluxsite.py index 606b7d09..17c45881 100644 --- a/tests/test_fluxsite.py +++ b/tests/test_fluxsite.py @@ -6,7 +6,6 @@ """ import math -from pathlib import Path import f90nml import netCDF4 @@ -15,12 +14,10 @@ from benchcab import __version__, internal from benchcab.fluxsite import ( CableError, - Task, + FluxsiteTask, get_comparison_name, get_fluxsite_comparisons, get_fluxsite_tasks, - patch_namelist, - patch_remove_namelist, ) from benchcab.model import Model from benchcab.utils.repo import Repo @@ -59,8 +56,8 @@ def model(mock_subprocess_handler, mock_repo): @pytest.fixture() def task(model, mock_subprocess_handler): - """Returns a mock `Task` instance.""" - _task = Task( + """Returns a mock `FluxsiteTask` instance.""" + _task = FluxsiteTask( model=model, met_forcing_file="forcing-file.nc", sci_conf_id=0, @@ -71,7 +68,7 @@ def task(model, mock_subprocess_handler): class TestGetTaskName: - """tests for `Task.get_task_name()`.""" + """tests for `FluxsiteTask.get_task_name()`.""" def test_task_name_convention(self, task): """Success case: check task name convention.""" @@ -79,7 +76,7 @@ def test_task_name_convention(self, task): class TestGetLogFilename: - """Tests for `Task.get_log_filename()`.""" + """Tests for `FluxsiteTask.get_log_filename()`.""" def test_log_filename_convention(self, task): """Success case: check log file name convention.""" @@ -87,7 +84,7 @@ def test_log_filename_convention(self, task): class TestGetOutputFilename: - """Tests for `Task.get_output_filename()`.""" + """Tests for `FluxsiteTask.get_output_filename()`.""" def test_output_filename_convention(self, task): """Success case: check output file name convention.""" @@ -95,11 +92,11 @@ def test_output_filename_convention(self, task): class TestFetchFiles: - """Tests for `Task.fetch_files()`.""" + """Tests for `FluxsiteTask.fetch_files()`.""" @pytest.fixture(autouse=True) def _setup(self, task): - """Setup precondition for `Task.fetch_files()`.""" + """Setup precondition for `FluxsiteTask.fetch_files()`.""" internal.NAMELIST_DIR.mkdir() (internal.NAMELIST_DIR / internal.CABLE_NML).touch() (internal.NAMELIST_DIR / internal.CABLE_SOIL_NML).touch() @@ -125,11 +122,11 @@ def test_required_files_are_copied_to_task_dir(self, task): class TestCleanTask: - """Tests for `Task.clean_task()`.""" + """Tests for `FluxsiteTask.clean_task()`.""" @pytest.fixture(autouse=True) def _setup(self, task): - """Setup precondition for `Task.clean_task()`.""" + """Setup precondition for `FluxsiteTask.clean_task()`.""" task_dir = internal.FLUXSITE_DIRS["TASKS"] / task.get_task_name() task_dir.mkdir(parents=True) (task_dir / internal.CABLE_NML).touch() @@ -157,91 +154,12 @@ def test_clean_files(self, task): assert not (internal.FLUXSITE_DIRS["LOG"] / task.get_log_filename()).exists() -class TestPatchNamelist: - """Tests for `patch_namelist()`.""" - - @pytest.fixture() - def nml_path(self): - """Return a path to a namelist file used for testing.""" - return Path("test.nml") - - def test_patch_on_non_existing_namelist_file(self, nml_path): - """Success case: patch non-existing namelist file.""" - patch = {"cable": {"file": "/path/to/file", "bar": 123}} - patch_namelist(nml_path, patch) - assert f90nml.read(nml_path) == patch - - def test_patch_on_non_empty_namelist_file(self, nml_path): - """Success case: patch non-empty namelist file.""" - f90nml.write({"cable": {"file": "/path/to/file", "bar": 123}}, nml_path) - patch_namelist(nml_path, {"cable": {"some": {"parameter": True}, "bar": 456}}) - assert f90nml.read(nml_path) == { - "cable": { - "file": "/path/to/file", - "bar": 456, - "some": {"parameter": True}, - } - } - - def test_empty_patch_does_nothing(self, nml_path): - """Success case: empty patch does nothing.""" - f90nml.write({"cable": {"file": "/path/to/file", "bar": 123}}, nml_path) - prev = f90nml.read(nml_path) - patch_namelist(nml_path, {}) - assert f90nml.read(nml_path) == prev - - -class TestPatchRemoveNamelist: - """Tests for `patch_remove_namelist()`.""" - - @pytest.fixture() - def nml(self): - """Return a namelist dictionary used for testing.""" - return { - "cable": { - "cable_user": { - "some_parameter": True, - "new_feature": True, - }, - }, - } - - @pytest.fixture() - def nml_path(self, nml): - """Create a namelist file and return its path.""" - _nml_path = Path("test.nml") - f90nml.write(nml, _nml_path) - return _nml_path - - def test_remove_namelist_parameter_from_derived_type(self, nml_path): - """Success case: remove a namelist parameter from derrived type.""" - patch_remove_namelist( - nml_path, {"cable": {"cable_user": {"new_feature": True}}} - ) - assert f90nml.read(nml_path) == { - "cable": {"cable_user": {"some_parameter": True}} - } - - def test_empty_patch_remove_does_nothing(self, nml_path, nml): - """Success case: empty patch_remove does nothing.""" - patch_remove_namelist(nml_path, {}) - assert f90nml.read(nml_path) == nml - - def test_key_error_raised_for_non_existent_namelist_parameter(self, nml_path): - """Failure case: test patch_remove KeyError exeption.""" - with pytest.raises( - KeyError, - match=f"Namelist parameters specified in `patch_remove` do not exist in {nml_path.name}.", - ): - patch_remove_namelist(nml_path, {"cable": {"foo": {"bar": True}}}) - - class TestSetupTask: - """Tests for `Task.setup_task()`.""" + """Tests for `FluxsiteTask.setup_task()`.""" @pytest.fixture(autouse=True) def _setup(self, task): - """Setup precondition for `Task.setup_task()`.""" + """Setup precondition for `FluxsiteTask.setup_task()`.""" (internal.NAMELIST_DIR).mkdir() (internal.NAMELIST_DIR / internal.CABLE_NML).touch() (internal.NAMELIST_DIR / internal.CABLE_SOIL_NML).touch() @@ -288,11 +206,11 @@ def test_all_settings_are_patched_into_namelist_file(self, task): class TestRunCable: - """Tests for `Task.run_cable()`.""" + """Tests for `FluxsiteTask.run_cable()`.""" @pytest.fixture(autouse=True) def _setup(self, task): - """Setup precondition for `Task.run_cable()`.""" + """Setup precondition for `FluxsiteTask.run_cable()`.""" task_dir = internal.FLUXSITE_DIRS["TASKS"] / task.get_task_name() task_dir.mkdir(parents=True) @@ -314,7 +232,7 @@ def test_cable_error_exception(self, task, mock_subprocess_handler): class TestAddProvenanceInfo: - """Tests for `Task.add_provenance_info()`.""" + """Tests for `FluxsiteTask.add_provenance_info()`.""" @pytest.fixture() def nml(self): @@ -338,7 +256,7 @@ def nc_output_path(self, task): @pytest.fixture(autouse=True) def _setup(self, task, nml): - """Setup precondition for `Task.add_provenance_info()`.""" + """Setup precondition for `FluxsiteTask.add_provenance_info()`.""" task_dir = internal.FLUXSITE_DIRS["TASKS"] / task.get_task_name() task_dir.mkdir(parents=True) fluxsite_output_dir = internal.FLUXSITE_DIRS["OUTPUT"] @@ -365,7 +283,7 @@ class TestGetFluxsiteTasks: @pytest.fixture() def models(self, mock_repo): - """Return a list of `CableRepository` instances used for testing.""" + """Return a list of `Model` instances used for testing.""" return [Model(repo=mock_repo, model_id=id) for id in range(2)] @pytest.fixture() @@ -407,7 +325,7 @@ class TestGetFluxsiteComparisons: def test_comparisons_for_two_branches_with_two_tasks(self, mock_repo): """Success case: comparisons for two branches with two tasks.""" tasks = [ - Task( + FluxsiteTask( model=Model(repo=mock_repo, model_id=model_id), met_forcing_file="foo.nc", sci_config={"foo": "bar"}, @@ -430,7 +348,7 @@ def test_comparisons_for_two_branches_with_two_tasks(self, mock_repo): def test_comparisons_for_three_branches_with_three_tasks(self, mock_repo): """Success case: comparisons for three branches with three tasks.""" tasks = [ - Task( + FluxsiteTask( model=Model(repo=mock_repo, model_id=model_id), met_forcing_file="foo.nc", sci_config={"foo": "bar"}, diff --git a/tests/test_model.py b/tests/test_model.py index a1c68580..b3e19af0 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -25,18 +25,24 @@ class MockRepo(Repo): def __init__(self) -> None: self.handle = "trunk" - def checkout(self, verbose=False): + def checkout(self, path: Path, verbose=False): pass def get_branch_name(self) -> str: return self.handle - def get_revision(self) -> str: + def get_revision(self, path: Path) -> str: pass return MockRepo() +@pytest.fixture(params=[False, True]) +def mpi(request): + """Return a parametrized mpi flag for testing.""" + return request.param + + @pytest.fixture() def model(mock_repo, mock_subprocess_handler, mock_environment_modules_handler): """Return a mock `Model` instance for testing against.""" @@ -67,11 +73,15 @@ def test_undefined_model_id(self, model): class TestGetExePath: """Tests for `Model.get_exe_path()`.""" - def test_serial_exe_path(self, model): - """Success case: get path to serial executable.""" + @pytest.mark.parametrize( + ("mpi", "expected_exe"), + [(False, internal.CABLE_EXE), (True, internal.CABLE_MPI_EXE)], + ) + def test_get_exe_path(self, model, mpi, expected_exe): + """Success case: get path to executable.""" assert ( - model.get_exe_path() - == internal.SRC_DIR / model.name / "offline" / internal.CABLE_EXE + model.get_exe_path(mpi=mpi) + == internal.SRC_DIR / model.name / "offline" / expected_exe ) @@ -142,10 +152,18 @@ def _setup(self, model): (internal.SRC_DIR / model.name / "offline" / "Makefile").touch() (internal.SRC_DIR / model.name / "offline" / "foo.f90").touch() - def test_source_files_and_scripts_are_copied_to_tmp_dir(self, model): + @pytest.fixture() + def tmp_dir(self, model, mpi): + """Return the relative path to the temporary build directory.""" + return ( + internal.SRC_DIR + / model.name + / (internal.TMP_BUILD_DIR_MPI if mpi else internal.TMP_BUILD_DIR) + ) + + def test_source_files_and_scripts_are_copied_to_tmp_dir(self, model, mpi, tmp_dir): """Success case: test source files and scripts are copied to .tmp.""" - model.pre_build() - tmp_dir = internal.SRC_DIR / model.name / "offline" / ".tmp" + model.pre_build(mpi=mpi) assert (tmp_dir / "Makefile").exists() assert (tmp_dir / "foo.f90").exists() @@ -164,30 +182,36 @@ def modules(self): return ["foo", "bar"] @pytest.fixture() - def env(self, netcdf_root): - """Return a dictionary containing the required environment variables.""" + def expected_env(self, netcdf_root, mpi): + """Return a dictionary of expected environment variables to be defined.""" return { "NCDIR": f"{netcdf_root}/lib/Intel", "NCMOD": f"{netcdf_root}/include/Intel", "CFLAGS": "-O2 -fp-model precise", "LDFLAGS": f"-L{netcdf_root}/lib/Intel -O0", "LD": "-lnetcdf -lnetcdff", - "FC": "ifort", + "FC": "mpif90" if mpi else "ifort", } @pytest.fixture(autouse=True) def _setup(self, model, netcdf_root): """Setup precondition for `Model.run_build()`.""" - (internal.SRC_DIR / model.name / "offline" / ".tmp").mkdir(parents=True) + (internal.SRC_DIR / model.name / internal.TMP_BUILD_DIR).mkdir(parents=True) + (internal.SRC_DIR / model.name / internal.TMP_BUILD_DIR_MPI).mkdir(parents=True) # This is required so that we can use the NETCDF_ROOT environment # variable when running `make`: os.environ["NETCDF_ROOT"] = netcdf_root - def test_build_command_execution(self, model, mock_subprocess_handler, modules): + @pytest.mark.parametrize( + ("mpi", "expected_commands"), [(False, ["make"]), (True, ["make mpi"])] + ) + def test_build_command_execution( + self, model, mock_subprocess_handler, modules, mpi, expected_commands + ): """Success case: test build commands are run.""" - model.run_build(modules) - assert mock_subprocess_handler.commands == ["make"] + model.run_build(modules, mpi=mpi) + assert mock_subprocess_handler.commands == expected_commands def test_modules_loaded_at_runtime( self, model, mock_environment_modules_handler, modules @@ -202,11 +226,11 @@ def test_modules_loaded_at_runtime( ) in mock_environment_modules_handler.commands def test_commands_are_run_with_environment_variables( - self, model, mock_subprocess_handler, modules, env + self, model, mock_subprocess_handler, modules, mpi, expected_env ): """Success case: test commands are run with the correct environment variables.""" - model.run_build(modules) - for kv in env.items(): + model.run_build(modules, mpi=mpi) + for kv in expected_env.items(): assert kv in mock_subprocess_handler.env.items() @@ -216,18 +240,38 @@ class TestPostBuild: @pytest.fixture(autouse=True) def _setup(self, model): """Setup precondition for `Model.post_build()`.""" - (internal.SRC_DIR / model.name / "offline" / ".tmp").mkdir(parents=True) - ( - internal.SRC_DIR / model.name / "offline" / ".tmp" / internal.CABLE_EXE - ).touch() + tmp_build_dir = internal.SRC_DIR / model.name / internal.TMP_BUILD_DIR + tmp_build_dir.mkdir(parents=True) + (tmp_build_dir / internal.CABLE_EXE).touch() + + tmp_build_dir_mpi = internal.SRC_DIR / model.name / internal.TMP_BUILD_DIR_MPI + tmp_build_dir_mpi.mkdir(parents=True) + (tmp_build_dir_mpi / internal.CABLE_MPI_EXE).touch() + + @pytest.fixture() + def tmp_dir(self, model, mpi): + """Return the relative path to the temporary build directory.""" + return ( + internal.SRC_DIR + / model.name + / (internal.TMP_BUILD_DIR_MPI if mpi else internal.TMP_BUILD_DIR) + ) + + @pytest.fixture() + def exe(self, mpi): + """Return the name of the CABLE executable.""" + return internal.CABLE_MPI_EXE if mpi else internal.CABLE_EXE + + @pytest.fixture() + def offline_dir(self, model): + """Return the relative path to the offline source directory.""" + return internal.SRC_DIR / model.name / "offline" - def test_exe_moved_to_offline_dir(self, model): + def test_exe_moved_to_offline_dir(self, model, mpi, tmp_dir, exe, offline_dir): """Success case: test executable is moved to offline directory.""" - model.post_build() - tmp_dir = internal.SRC_DIR / model.name / "offline" / ".tmp" - assert not (tmp_dir / internal.CABLE_EXE).exists() - offline_dir = internal.SRC_DIR / model.name / "offline" - assert (offline_dir / internal.CABLE_EXE).exists() + model.post_build(mpi=mpi) + assert not (tmp_dir / exe).exists() + assert (offline_dir / exe).exists() class TestCustomBuild: diff --git a/tests/test_namelist.py b/tests/test_namelist.py new file mode 100644 index 00000000..87d629cf --- /dev/null +++ b/tests/test_namelist.py @@ -0,0 +1,87 @@ +"""`pytest` tests for namelist.py.""" + +from pathlib import Path + +import f90nml +import pytest + +from benchcab.utils.namelist import patch_namelist, patch_remove_namelist + + +class TestPatchNamelist: + """Tests for `patch_namelist()`.""" + + @pytest.fixture() + def nml_path(self): + """Return a path to a namelist file used for testing.""" + return Path("test.nml") + + def test_patch_on_non_existing_namelist_file(self, nml_path): + """Success case: patch non-existing namelist file.""" + patch = {"cable": {"file": "/path/to/file", "bar": 123}} + patch_namelist(nml_path, patch) + assert f90nml.read(nml_path) == patch + + def test_patch_on_non_empty_namelist_file(self, nml_path): + """Success case: patch non-empty namelist file.""" + f90nml.write({"cable": {"file": "/path/to/file", "bar": 123}}, nml_path) + patch_namelist(nml_path, {"cable": {"some": {"parameter": True}, "bar": 456}}) + assert f90nml.read(nml_path) == { + "cable": { + "file": "/path/to/file", + "bar": 456, + "some": {"parameter": True}, + } + } + + def test_empty_patch_does_nothing(self, nml_path): + """Success case: empty patch does nothing.""" + f90nml.write({"cable": {"file": "/path/to/file", "bar": 123}}, nml_path) + prev = f90nml.read(nml_path) + patch_namelist(nml_path, {}) + assert f90nml.read(nml_path) == prev + + +class TestPatchRemoveNamelist: + """Tests for `patch_remove_namelist()`.""" + + @pytest.fixture() + def nml(self): + """Return a namelist dictionary used for testing.""" + return { + "cable": { + "cable_user": { + "some_parameter": True, + "new_feature": True, + }, + }, + } + + @pytest.fixture() + def nml_path(self, nml): + """Create a namelist file and return its path.""" + _nml_path = Path("test.nml") + f90nml.write(nml, _nml_path) + return _nml_path + + def test_remove_namelist_parameter_from_derived_type(self, nml_path): + """Success case: remove a namelist parameter from derrived type.""" + patch_remove_namelist( + nml_path, {"cable": {"cable_user": {"new_feature": True}}} + ) + assert f90nml.read(nml_path) == { + "cable": {"cable_user": {"some_parameter": True}} + } + + def test_empty_patch_remove_does_nothing(self, nml_path, nml): + """Success case: empty patch_remove does nothing.""" + patch_remove_namelist(nml_path, {}) + assert f90nml.read(nml_path) == nml + + def test_key_error_raised_for_non_existent_namelist_parameter(self, nml_path): + """Failure case: test patch_remove KeyError exeption.""" + with pytest.raises( + KeyError, + match=f"Namelist parameters specified in `patch_remove` do not exist in {nml_path.name}.", + ): + patch_remove_namelist(nml_path, {"cable": {"foo": {"bar": True}}}) diff --git a/tests/test_spatial.py b/tests/test_spatial.py new file mode 100644 index 00000000..71a4bc9a --- /dev/null +++ b/tests/test_spatial.py @@ -0,0 +1,225 @@ +"""`pytest` tests for spatial.py. + +Note: explicit teardown for generated files and directories are not required as +the working directory used for testing is cleaned up in the `_run_around_tests` +pytest autouse fixture. +""" + +import contextlib +import io +from pathlib import Path + +import f90nml +import pytest +import yaml + +from benchcab import internal +from benchcab.model import Model +from benchcab.spatial import SpatialTask, get_spatial_tasks +from benchcab.utils.repo import Repo + + +@pytest.fixture() +def mock_repo(): + class MockRepo(Repo): + def __init__(self) -> None: + self.branch = "test-branch" + self.revision = "1234" + + def checkout(self, verbose=False): + pass + + def get_branch_name(self) -> str: + return self.branch + + def get_revision(self) -> str: + return self.revision + + return MockRepo() + + +@pytest.fixture() +def model(mock_subprocess_handler, mock_repo): + """Returns a `Model` instance.""" + _model = Model( + model_id=1, + repo=mock_repo, + patch={"cable": {"some_branch_specific_setting": True}}, + ) + _model.subprocess_handler = mock_subprocess_handler + return _model + + +@pytest.fixture() +def task(model, mock_subprocess_handler): + """Returns a mock `SpatialTask` instance.""" + _task = SpatialTask( + model=model, + met_forcing_name="crujra_access", + met_forcing_payu_experiment="https://github.com/CABLE-LSM/cable_example.git", + sci_conf_id=0, + sci_config={"cable": {"some_setting": True}}, + ) + _task.subprocess_handler = mock_subprocess_handler + return _task + + +class TestGetTaskName: + """Tests for `SpatialTask.get_task_name()`.""" + + def test_task_name_convention(self, task): + """Success case: check task name convention.""" + assert task.get_task_name() == "crujra_access_R1_S0" + + +class TestConfigureExperiment: + """Tests for `SpatialTask.configure_experiment()`.""" + + @pytest.fixture(autouse=True) + def _create_task_dir(self): + task_dir = internal.SPATIAL_TASKS_DIR / "crujra_access_R1_S0" + task_dir.mkdir(parents=True) + (task_dir / "config.yaml").touch() + (task_dir / "cable.nml").touch() + + def test_payu_config_parameters(self, task): + """Success case: check config.yaml parameters.""" + task.configure_experiment(payu_config={"some_parameter": "foo"}) + config_path = internal.SPATIAL_TASKS_DIR / task.get_task_name() / "config.yaml" + with config_path.open("r", encoding="utf-8") as file: + config = yaml.safe_load(file) + assert config["exe"] == str( + ( + internal.SRC_DIR / "test-branch" / "offline" / internal.CABLE_MPI_EXE + ).absolute() + ) + assert config["input"] == [ + str( + Path( + "src/CABLE-AUX/core/biogeophys/def_veg_params_zr_clitt_albedo_fix.txt" + ).absolute() + ), + str(Path("src/CABLE-AUX/core/biogeophys/def_soil_params.txt").absolute()), + ] + assert config["laboratory"] == str(internal.PAYU_LABORATORY_DIR.absolute()) + assert config["some_parameter"] == "foo" + + @pytest.mark.parametrize( + ("verbosity", "expected"), + [ + (False, ""), + ( + True, + " Updating experiment config parameters in " + "runs/spatial/tasks/crujra_access_R1_S0/config.yaml\n", + ), + ], + ) + def test_standard_output(self, task, verbosity, expected): + """Success case: test standard output.""" + with contextlib.redirect_stdout(io.StringIO()) as buf: + task.configure_experiment(verbose=verbosity) + assert buf.getvalue() == expected + + +class TestUpdateNamelist: + """Tests for `SpatialTask.update_namelist()`.""" + + @pytest.fixture(autouse=True) + def _create_task_dir(self): + task_dir = internal.SPATIAL_TASKS_DIR / "crujra_access_R1_S0" + task_dir.mkdir(parents=True) + (task_dir / "config.yaml").touch() + (task_dir / "cable.nml").touch() + + def test_namelist_parameters_are_patched(self, task): + """Success case: test namelist parameters are patched.""" + task.update_namelist() + res_nml = f90nml.read( + str(internal.SPATIAL_TASKS_DIR / task.get_task_name() / internal.CABLE_NML) + ) + assert res_nml["cable"] == { + "some_setting": True, + "some_branch_specific_setting": True, + } + + @pytest.mark.parametrize( + ("verbosity", "expected"), + [ + (False, ""), + ( + True, + " Adding science configurations to CABLE namelist file " + "runs/spatial/tasks/crujra_access_R1_S0/cable.nml\n" + " Adding branch specific configurations to CABLE namelist file " + "runs/spatial/tasks/crujra_access_R1_S0/cable.nml\n", + ), + ], + ) + def test_standard_output(self, task, verbosity, expected): + """Success case: test standard output.""" + with contextlib.redirect_stdout(io.StringIO()) as buf: + task.update_namelist(verbose=verbosity) + assert buf.getvalue() == expected + + +class TestRun: + """Tests for `SpatialTask.run()`.""" + + @pytest.fixture(autouse=True) + def _setup(self, task): + task_dir = internal.SPATIAL_TASKS_DIR / task.get_task_name() + task_dir.mkdir(parents=True, exist_ok=True) + + def test_payu_run_command(self, task, mock_subprocess_handler): + """Success case: test payu run command.""" + task.run() + assert "payu run" in mock_subprocess_handler.commands + + def test_payu_run_with_optional_arguments(self, task, mock_subprocess_handler): + """Success case: test payu run command with optional arguments.""" + task.payu_args = "--some-flag" + task.run() + assert "payu run --some-flag" in mock_subprocess_handler.commands + + +class TestGetSpatialTasks: + """Tests for `get_spatial_tasks()`.""" + + @pytest.fixture() + def models(self, mock_repo): + """Return a list of `Model` instances used for testing.""" + return [Model(repo=mock_repo, model_id=id) for id in range(2)] + + @pytest.fixture() + def met_forcings(self, config): + """Return a list of spatial met forcing specifications.""" + return config["spatial"]["met_forcings"] + + @pytest.fixture() + def science_configurations(self, config): + """Return a list of science configurations used for testing.""" + return config["science_configurations"] + + def test_task_product_across_branches_forcings_and_configurations( + self, models, met_forcings, science_configurations + ): + """Success case: test task product across branches, forcings and configurations.""" + tasks = get_spatial_tasks( + models=models, + met_forcings=met_forcings, + science_configurations=science_configurations, + ) + met_forcing_names = list(met_forcings.keys()) + assert [ + (task.model, task.met_forcing_name, task.sci_config) for task in tasks + ] == [ + (models[0], met_forcing_names[0], science_configurations[0]), + (models[0], met_forcing_names[0], science_configurations[1]), + (models[0], met_forcing_names[1], science_configurations[0]), + (models[0], met_forcing_names[1], science_configurations[1]), + (models[1], met_forcing_names[0], science_configurations[0]), + (models[1], met_forcing_names[0], science_configurations[1]), + (models[1], met_forcing_names[1], science_configurations[0]), + (models[1], met_forcing_names[1], science_configurations[1]), + ] diff --git a/tests/test_workdir.py b/tests/test_workdir.py index 62dd710c..ae67676e 100644 --- a/tests/test_workdir.py +++ b/tests/test_workdir.py @@ -12,6 +12,7 @@ from benchcab.workdir import ( clean_directory_tree, setup_fluxsite_directory_tree, + setup_spatial_directory_tree, ) @@ -37,6 +38,25 @@ def test_directory_structure_generated(self, fluxsite_directory_list): assert path.exists() +class TestSetupSpatialDirectoryTree: + """Tests for `setup_spatial_directory_tree()`.""" + + @pytest.fixture() + def spatial_directory_list(self): + """Return the list of work directories we want benchcab to create.""" + return [ + Path("runs", "spatial"), + Path("runs", "spatial", "tasks"), + Path("runs", "payu-laboratory"), + ] + + def test_directory_structure_generated(self, spatial_directory_list): + """Success case: generate spatial directory structure.""" + setup_spatial_directory_tree() + for path in spatial_directory_list: + assert path.exists() + + class TestCleanDirectoryTree: """Tests for `clean_directory_tree()`.""" From 2b9ee09e66aadc5c9e55e7d112a9104bbf8e1dd0 Mon Sep 17 00:00:00 2001 From: Sean Bryan Date: Fri, 1 Dec 2023 11:54:18 +1100 Subject: [PATCH 08/50] Use one branch in integration test This is to reduce the compilation work as we now compile the serial and MPI executable. --- benchcab/data/test/integration.sh | 7 ------- 1 file changed, 7 deletions(-) diff --git a/benchcab/data/test/integration.sh b/benchcab/data/test/integration.sh index fd8cc6f0..e672c887 100644 --- a/benchcab/data/test/integration.sh +++ b/benchcab/data/test/integration.sh @@ -18,13 +18,6 @@ cat > config.yaml << EOL project: $PROJECT realisations: - - repo: - svn: - branch_path: trunk - # TODO(Sean): This is required to compile legacy versions. - # We should probably deprecate support for SVN branches - # and remove the SVN trunk from our integration tests. - build_script: offline/build3.sh - repo: git: branch: main From 80ecad7a9504002c78c132be9e23921da0cab951 Mon Sep 17 00:00:00 2001 From: Sean Bryan Date: Tue, 5 Dec 2023 17:15:29 +1100 Subject: [PATCH 09/50] Remove repeated content --- docs/user_guide/index.md | 9 --------- 1 file changed, 9 deletions(-) diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index 3d93b9fd..badd66fc 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -40,15 +40,6 @@ You need to load the module on each new session at NCI on login or compute nodes The regression and new feature run modes should be used as necessary when evaluating new developments in CABLE. -The `benchcab` tool: - -- checks out the model versions specified by the user -- builds the required executables -- runs each model version across N standard science configurations for a variety of meteorological forcings -- performs bitwise comparison checks on model outputs across model versions - -The user can then pipe the model outputs into a benchmark analysis via [modelevaluation.org][meorg] to assess model performance. - ### Create a work directory #### Choose a location From 58e08b642de5453bf5c36b9e3328a10b4da41703 Mon Sep 17 00:00:00 2001 From: Sean Bryan Date: Tue, 5 Dec 2023 17:15:50 +1100 Subject: [PATCH 10/50] Add section on supported configurations --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4f3965bb..28df6dfc 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,20 @@ - checks out the model versions specified by the user - builds the required executables -- runs each model version across N standard science configurations +- runs each model version across N standard science configurations for a variety of meteorological forcings - performs bitwise comparison checks on model outputs across model versions The user can then pipe the model outputs into a benchmark analysis via [modelevaluation.org][meorg] to assess model performance. The full documentation is available at [benchcab.readthedocs.io][docs]. +## Supported configurations + +`benchcab` currently tests the following model configurations for CABLE: + +- **Flux site simulations (offline)** - running CABLE forced with observed eddy covariance data at a single site +- **Global/regional simulations (offline)** - running CABLE forced with observational products over a region (global or regional) + ## License `benchcab` is distributed under [an Apache License v2.0][apache-license]. From 27977463357210ed1cd4a4f1c4d3bce35e5e7f79 Mon Sep 17 00:00:00 2001 From: Sean Bryan <39685865+SeanBryan51@users.noreply.github.com> Date: Mon, 11 Dec 2023 16:25:10 +1100 Subject: [PATCH 11/50] Update README.md Co-authored-by: Claire Carouge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 28df6dfc..a057102b 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ The full documentation is available at [benchcab.readthedocs.io][docs]. `benchcab` currently tests the following model configurations for CABLE: - **Flux site simulations (offline)** - running CABLE forced with observed eddy covariance data at a single site -- **Global/regional simulations (offline)** - running CABLE forced with observational products over a region (global or regional) +- **Global/regional simulations (offline)** - running CABLE forced with meteorological fields over a region (global or regional) ## License From ddf21cb036971f2950c417873af2dbaacfc1fd57 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Fri, 19 Jan 2024 12:49:52 +1100 Subject: [PATCH 12/50] Add function for reading optional data --- benchcab/config.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/benchcab/config.py b/benchcab/config.py index f2daa281..cc0c7bcd 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -66,6 +66,21 @@ def validate_config(config: dict) -> bool: # Invalid raise ConfigValidationException(v) +def read_optional_data(config: dict): + + config["name"] = config.get("name", Path(".")) + config["science_configurations"] = config.get("science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS) + + config["fluxsite"] = config.get("fluxsite", {}) + config["fluxsite"]["experiment"] = config["fluxsite"].get("experiment", internal.FLUXSITE_DEFAULT_EXPERIMENT) + config["fluxsite"]["pbs"] = config["fluxsite"].get("pbs", {}) + + pbs_config = config["fluxsite"]["pbs"] + pbs_config_params = ["mem", "ncpus", "storage", "walltime"] + for pcp in pbs_config_params: + pbs_config[pcp] = pbs_config.get(pcp, internal.FLUXSITE_DEFAULT_PBS[pcp]) + + pbs_config["multiprocess"] = internal.FLUXSITE_DEFAULT_MULTIPROCESS def read_config(config_path: str) -> dict: """Reads the config file and returns a dictionary containing the configurations. @@ -90,6 +105,8 @@ def read_config(config_path: str) -> dict: with open(Path(config_path), "r", encoding="utf-8") as file: config = yaml.safe_load(file) + read_optional_data(config) + # Validate and return. validate_config(config) return config From 55b4323bdde583fbf61b9fbb9b5b4974524ee366 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Fri, 19 Jan 2024 13:07:50 +1100 Subject: [PATCH 13/50] Use parameters assuming initialisation --- benchcab/benchcab.py | 35 +++++++++-------------------------- benchcab/utils/pbs.py | 11 ++++------- 2 files changed, 13 insertions(+), 33 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 6b2a7ead..b519af2e 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -105,10 +105,9 @@ def _get_config(self, config_path: str) -> dict: def _get_models(self, config: dict) -> list[Model]: if not self._models: for id, sub_config in enumerate(config["realisations"]): - name = sub_config.get("name") repo = create_repo( spec=sub_config.pop("repo"), - path=internal.SRC_DIR / name if name else internal.SRC_DIR, + path=internal.SRC_DIR / config["name"], ) self._models.append(Model(repo=repo, model_id=id, **sub_config)) return self._models @@ -117,13 +116,9 @@ def _initialise_tasks(self, config: dict) -> list[Task]: """A helper method that initialises and returns the `tasks` attribute.""" self.tasks = get_fluxsite_tasks( models=self._get_models(config), - science_configurations=config.get( - "science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS - ), + science_configurations=config["science_configurations"], fluxsite_forcing_file_names=get_met_forcing_file_names( - config.get("fluxsite", {}).get( - "experiment", internal.FLUXSITE_DEFAULT_EXPERIMENT - ) + config["fluxsite"]["experiment"] ), ) return self.tasks @@ -155,7 +150,7 @@ def fluxsite_submit_job( verbose=verbose, skip_bitwise_cmp="fluxsite-bitwise-cmp" in skip, benchcab_path=str(self.benchcab_exe_path), - pbs_config=config.get("fluxsite", {}).get("pbs"), + pbs_config=config["fluxsite"]["pbs"], ) file.write(contents) @@ -201,6 +196,7 @@ def checkout(self, config_path: str, verbose: bool): ) cable_aux_repo.checkout(verbose=verbose) + rev_number_log_path = next_path("rev_number-*.log") print(f"Writing revision number info to {rev_number_log_path}") with rev_number_log_path.open("w", encoding="utf-8") as file: @@ -250,14 +246,8 @@ def fluxsite_run_tasks(self, config_path: str, verbose: bool): tasks = self.tasks if self.tasks else self._initialise_tasks(config) print("Running fluxsite tasks...") - try: - multiprocess = config["fluxsite"]["multiprocess"] - except KeyError: - multiprocess = internal.FLUXSITE_DEFAULT_MULTIPROCESS - if multiprocess: - ncpus = config.get("pbs", {}).get( - "ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"] - ) + if config["multiprocess"]: + ncpus = config["pbs"]["ncpus"] run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=verbose) else: run_tasks(tasks, verbose=verbose) @@ -278,15 +268,8 @@ def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool): comparisons = get_fluxsite_comparisons(tasks) print("Running comparison tasks...") - try: - multiprocess = config["fluxsite"]["multiprocess"] - except KeyError: - multiprocess = internal.FLUXSITE_DEFAULT_MULTIPROCESS - if multiprocess: - try: - ncpus = config["fluxsite"]["pbs"]["ncpus"] - except KeyError: - ncpus = internal.FLUXSITE_DEFAULT_PBS["ncpus"] + if config["multiprocess"]: + ncpus = config["fluxsite"]["pbs"]["ncpus"] run_comparisons_in_parallel(comparisons, n_processes=ncpus, verbose=verbose) else: run_comparisons(comparisons, verbose=verbose) diff --git a/benchcab/utils/pbs.py b/benchcab/utils/pbs.py index c0386a7d..9118eb42 100644 --- a/benchcab/utils/pbs.py +++ b/benchcab/utils/pbs.py @@ -29,19 +29,16 @@ def render_job_script( f"module load {module_name}" for module_name in modules ) verbose_flag = "-v" if verbose else "" - ncpus = pbs_config.get("ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"]) - mem = pbs_config.get("mem", internal.FLUXSITE_DEFAULT_PBS["mem"]) - walltime = pbs_config.get("walltime", internal.FLUXSITE_DEFAULT_PBS["walltime"]) storage_flags = [ "gdata/ks32", "gdata/hh5", - *pbs_config.get("storage", internal.FLUXSITE_DEFAULT_PBS["storage"]), + *pbs_config["storage"] ] return f"""#!/bin/bash #PBS -l wd -#PBS -l ncpus={ncpus} -#PBS -l mem={mem} -#PBS -l walltime={walltime} +#PBS -l ncpus={pbs_config["ncpus"]} +#PBS -l mem={pbs_config["mem"]} +#PBS -l walltime={pbs_config["walltime"]} #PBS -q normal #PBS -P {project} #PBS -j oe From 9d7208a33e23e191d6efa842530c7f967bd43cef Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Sun, 21 Jan 2024 15:41:30 +1100 Subject: [PATCH 14/50] Correct config schema for multiprocess and walltime --- benchcab/data/config-schema.yml | 4 ++-- tests/conftest.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchcab/data/config-schema.yml b/benchcab/data/config-schema.yml index 892d3abf..755c374e 100644 --- a/benchcab/data/config-schema.yml +++ b/benchcab/data/config-schema.yml @@ -72,7 +72,7 @@ fluxsite: "US-Whs" ] required: false - multiprocessing: + multiprocess: type: "boolean" required: false pbs: @@ -87,7 +87,7 @@ fluxsite: required: false walltime: type: "string" - regex: "^[0-4][0-9]:[0-5][0-9]:[0-5][0-9]$" + regex: "^[0-4]?[0-9]:[0-5]?[0-9]:[0-5]?[0-9]$" required: false storage: type: list diff --git a/tests/conftest.py b/tests/conftest.py index 6f20dc20..a6d06006 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -88,7 +88,7 @@ def config(): "walltime": "01:00:00", "storage": ["gdata/foo123"], }, - "multiprocessing": True, + "multiprocess": True, }, } From 3f59fe3a6d212969e5d845d119b9144018f3e3c6 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Sun, 21 Jan 2024 15:59:08 +1100 Subject: [PATCH 15/50] Make pbs_config as default argument in render_job_script --- benchcab/utils/pbs.py | 8 ++------ tests/test_pbs.py | 5 ++++- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/benchcab/utils/pbs.py b/benchcab/utils/pbs.py index 9118eb42..99528172 100644 --- a/benchcab/utils/pbs.py +++ b/benchcab/utils/pbs.py @@ -3,27 +3,23 @@ """Contains helper functions for manipulating PBS job scripts.""" -from typing import Optional - from benchcab import internal - def render_job_script( project: str, config_path: str, modules: list, benchcab_path: str, + pbs_config: dict, verbose=False, skip_bitwise_cmp=False, - pbs_config: Optional[dict] = None, ) -> str: """Returns the text for a PBS job script that executes all computationally expensive commands. This includes things such as running CABLE and running bitwise comparison jobs between model output files. """ - if pbs_config is None: - pbs_config = internal.FLUXSITE_DEFAULT_PBS + pbs_config = internal.FLUXSITE_DEFAULT_PBS | pbs_config module_load_lines = "\n".join( f"module load {module_name}" for module_name in modules diff --git a/tests/test_pbs.py b/tests/test_pbs.py index ec53fe7b..e12a94b1 100644 --- a/tests/test_pbs.py +++ b/tests/test_pbs.py @@ -13,6 +13,7 @@ def test_default_job_script(self): project="tm70", config_path="/path/to/config.yaml", modules=["foo", "bar", "baz"], + pbs_config=internal.FLUXSITE_DEFAULT_PBS, benchcab_path="/absolute/path/to/benchcab", ) == ( f"""#!/bin/bash @@ -46,6 +47,7 @@ def test_verbose_flag_added_to_command_line_arguments(self): project="tm70", config_path="/path/to/config.yaml", modules=["foo", "bar", "baz"], + pbs_config=internal.FLUXSITE_DEFAULT_PBS, verbose=True, benchcab_path="/absolute/path/to/benchcab", ) == ( @@ -80,6 +82,7 @@ def test_skip_bitwise_comparison_step(self): project="tm70", config_path="/path/to/config.yaml", modules=["foo", "bar", "baz"], + pbs_config=internal.FLUXSITE_DEFAULT_PBS, skip_bitwise_cmp=True, benchcab_path="/absolute/path/to/benchcab", ) == ( @@ -145,7 +148,7 @@ def test_pbs_config_parameters(self): ) def test_default_pbs_config(self): - """Success case: if the pbs_config is empty, use the default values.""" + """Success case: if any key(s) of pbs_config is/are empty, use the default values.""" assert render_job_script( project="tm70", config_path="/path/to/config.yaml", From 39049bdbacac0fd26ede4e91415afec8d7412a82 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Sun, 21 Jan 2024 15:59:47 +1100 Subject: [PATCH 16/50] Clean up parsing optional config data --- benchcab/benchcab.py | 4 ++-- benchcab/config.py | 16 +++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index b519af2e..b77090a2 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -107,7 +107,7 @@ def _get_models(self, config: dict) -> list[Model]: for id, sub_config in enumerate(config["realisations"]): repo = create_repo( spec=sub_config.pop("repo"), - path=internal.SRC_DIR / config["name"], + path=internal.SRC_DIR / sub_config["name"], ) self._models.append(Model(repo=repo, model_id=id, **sub_config)) return self._models @@ -147,10 +147,10 @@ def fluxsite_submit_job( project=config["project"], config_path=config_path, modules=config["modules"], + pbs_config=config["fluxsite"]["pbs"], verbose=verbose, skip_bitwise_cmp="fluxsite-bitwise-cmp" in skip, benchcab_path=str(self.benchcab_exe_path), - pbs_config=config["fluxsite"]["pbs"], ) file.write(contents) diff --git a/benchcab/config.py b/benchcab/config.py index cc0c7bcd..6e7d7fbd 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -68,19 +68,17 @@ def validate_config(config: dict) -> bool: def read_optional_data(config: dict): - config["name"] = config.get("name", Path(".")) + if "realisations" in config: + for r in config["realisations"]: + r["name"] = r.get("name", str(internal.SRC_DIR)) + config["science_configurations"] = config.get("science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS) config["fluxsite"] = config.get("fluxsite", {}) + + config["fluxsite"]["multiprocess"] = config["fluxsite"].get("multiprocess", internal.FLUXSITE_DEFAULT_MULTIPROCESS) config["fluxsite"]["experiment"] = config["fluxsite"].get("experiment", internal.FLUXSITE_DEFAULT_EXPERIMENT) - config["fluxsite"]["pbs"] = config["fluxsite"].get("pbs", {}) - - pbs_config = config["fluxsite"]["pbs"] - pbs_config_params = ["mem", "ncpus", "storage", "walltime"] - for pcp in pbs_config_params: - pbs_config[pcp] = pbs_config.get(pcp, internal.FLUXSITE_DEFAULT_PBS[pcp]) - - pbs_config["multiprocess"] = internal.FLUXSITE_DEFAULT_MULTIPROCESS + config["fluxsite"]["pbs"] = config["fluxsite"].get("pbs", internal.FLUXSITE_DEFAULT_PBS) def read_config(config_path: str) -> dict: """Reads the config file and returns a dictionary containing the configurations. From b4a5069025e809caaa67690388d7874bf0da1156 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Mon, 22 Jan 2024 11:21:20 +1100 Subject: [PATCH 17/50] Add functionality and test for missing pbs keys --- benchcab/utils/pbs.py | 5 ++++- tests/test_pbs.py | 47 +++++++++++++++---------------------------- 2 files changed, 20 insertions(+), 32 deletions(-) diff --git a/benchcab/utils/pbs.py b/benchcab/utils/pbs.py index 99528172..9ed3e203 100644 --- a/benchcab/utils/pbs.py +++ b/benchcab/utils/pbs.py @@ -19,7 +19,10 @@ def render_job_script( This includes things such as running CABLE and running bitwise comparison jobs between model output files. """ - pbs_config = internal.FLUXSITE_DEFAULT_PBS | pbs_config + + pbs_missing_keys = internal.FLUXSITE_DEFAULT_PBS.keys() - pbs_config.keys() + if len(pbs_missing_keys) != 0: + raise ValueError(f"Default pbs parameters missing: {sorted(pbs_missing_keys)}") module_load_lines = "\n".join( f"module load {module_name}" for module_name in modules diff --git a/tests/test_pbs.py b/tests/test_pbs.py index e12a94b1..d547dc16 100644 --- a/tests/test_pbs.py +++ b/tests/test_pbs.py @@ -3,6 +3,8 @@ from benchcab import internal from benchcab.utils.pbs import render_job_script +import pytest +import re class TestRenderJobScript: """Tests for `render_job_script()`.""" @@ -148,34 +150,17 @@ def test_pbs_config_parameters(self): ) def test_default_pbs_config(self): - """Success case: if any key(s) of pbs_config is/are empty, use the default values.""" - assert render_job_script( - project="tm70", - config_path="/path/to/config.yaml", - modules=["foo", "bar", "baz"], - skip_bitwise_cmp=True, - benchcab_path="/absolute/path/to/benchcab", - pbs_config={}, - ) == ( - f"""#!/bin/bash -#PBS -l wd -#PBS -l ncpus={internal.FLUXSITE_DEFAULT_PBS["ncpus"]} -#PBS -l mem={internal.FLUXSITE_DEFAULT_PBS["mem"]} -#PBS -l walltime={internal.FLUXSITE_DEFAULT_PBS["walltime"]} -#PBS -q normal -#PBS -P tm70 -#PBS -j oe -#PBS -m e -#PBS -l storage=gdata/ks32+gdata/hh5 - -module purge -module load foo -module load bar -module load baz - -set -ev - -/absolute/path/to/benchcab fluxsite-run-tasks --config=/path/to/config.yaml - -""" - ) + """Failure case: if any key(s) of pbs_config is/are empty, fail the test.""" + pbs_missing_keys = ['mem', 'ncpus', 'storage'] + with pytest.raises( + ValueError, + match=f"Default pbs parameters missing: " + re.escape(str(pbs_missing_keys)) + ): + render_job_script( + project="tm70", + config_path="/path/to/config.yaml", + modules=["foo", "bar", "baz"], + skip_bitwise_cmp=True, + benchcab_path="/absolute/path/to/benchcab", + pbs_config={ "walltime" : "48:00:00" }, + ) \ No newline at end of file From 3279972bdc5d96b9df2cdd194a123e89c7664e3b Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Mon, 22 Jan 2024 11:22:14 +1100 Subject: [PATCH 18/50] Add ignoring case flag at the start of RE for mem in config.yml --- benchcab/data/config-schema.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchcab/data/config-schema.yml b/benchcab/data/config-schema.yml index 755c374e..2c8da5bb 100644 --- a/benchcab/data/config-schema.yml +++ b/benchcab/data/config-schema.yml @@ -83,7 +83,7 @@ fluxsite: required: false mem: type: "string" - regex: "^[0-9]+(?i)(mb|gb)$" + regex: "(?i)^[0-9]+(mb|gb)$" required: false walltime: type: "string" From f06a1e9d7451ef162402bd2785edf75ef1c3a9b5 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Mon, 22 Jan 2024 11:38:50 +1100 Subject: [PATCH 19/50] Use black and ruff for relevant code sections --- benchcab/benchcab.py | 1 - benchcab/config.py | 21 +++++++++++++++------ benchcab/utils/pbs.py | 11 ++++------- tests/test_pbs.py | 10 ++++++---- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index b77090a2..d2b13016 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -196,7 +196,6 @@ def checkout(self, config_path: str, verbose: bool): ) cable_aux_repo.checkout(verbose=verbose) - rev_number_log_path = next_path("rev_number-*.log") print(f"Writing revision number info to {rev_number_log_path}") with rev_number_log_path.open("w", encoding="utf-8") as file: diff --git a/benchcab/config.py b/benchcab/config.py index 6e7d7fbd..17245b54 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -66,19 +66,28 @@ def validate_config(config: dict) -> bool: # Invalid raise ConfigValidationException(v) -def read_optional_data(config: dict): +def read_optional_data(config: dict): if "realisations" in config: for r in config["realisations"]: r["name"] = r.get("name", str(internal.SRC_DIR)) - config["science_configurations"] = config.get("science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS) + config["science_configurations"] = config.get( + "science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS + ) config["fluxsite"] = config.get("fluxsite", {}) - - config["fluxsite"]["multiprocess"] = config["fluxsite"].get("multiprocess", internal.FLUXSITE_DEFAULT_MULTIPROCESS) - config["fluxsite"]["experiment"] = config["fluxsite"].get("experiment", internal.FLUXSITE_DEFAULT_EXPERIMENT) - config["fluxsite"]["pbs"] = config["fluxsite"].get("pbs", internal.FLUXSITE_DEFAULT_PBS) + + config["fluxsite"]["multiprocess"] = config["fluxsite"].get( + "multiprocess", internal.FLUXSITE_DEFAULT_MULTIPROCESS + ) + config["fluxsite"]["experiment"] = config["fluxsite"].get( + "experiment", internal.FLUXSITE_DEFAULT_EXPERIMENT + ) + config["fluxsite"]["pbs"] = config["fluxsite"].get( + "pbs", internal.FLUXSITE_DEFAULT_PBS + ) + def read_config(config_path: str) -> dict: """Reads the config file and returns a dictionary containing the configurations. diff --git a/benchcab/utils/pbs.py b/benchcab/utils/pbs.py index 9ed3e203..b85a7aff 100644 --- a/benchcab/utils/pbs.py +++ b/benchcab/utils/pbs.py @@ -5,6 +5,7 @@ from benchcab import internal + def render_job_script( project: str, config_path: str, @@ -19,20 +20,16 @@ def render_job_script( This includes things such as running CABLE and running bitwise comparison jobs between model output files. """ - pbs_missing_keys = internal.FLUXSITE_DEFAULT_PBS.keys() - pbs_config.keys() if len(pbs_missing_keys) != 0: - raise ValueError(f"Default pbs parameters missing: {sorted(pbs_missing_keys)}") + msg = f"Default pbs parameters missing: {sorted(pbs_missing_keys)}" + raise ValueError(msg) module_load_lines = "\n".join( f"module load {module_name}" for module_name in modules ) verbose_flag = "-v" if verbose else "" - storage_flags = [ - "gdata/ks32", - "gdata/hh5", - *pbs_config["storage"] - ] + storage_flags = ["gdata/ks32", "gdata/hh5", *pbs_config["storage"]] return f"""#!/bin/bash #PBS -l wd #PBS -l ncpus={pbs_config["ncpus"]} diff --git a/tests/test_pbs.py b/tests/test_pbs.py index d547dc16..2eda43e9 100644 --- a/tests/test_pbs.py +++ b/tests/test_pbs.py @@ -6,6 +6,7 @@ import pytest import re + class TestRenderJobScript: """Tests for `render_job_script()`.""" @@ -151,10 +152,11 @@ def test_pbs_config_parameters(self): def test_default_pbs_config(self): """Failure case: if any key(s) of pbs_config is/are empty, fail the test.""" - pbs_missing_keys = ['mem', 'ncpus', 'storage'] + pbs_missing_keys = ["mem", "ncpus", "storage"] with pytest.raises( ValueError, - match=f"Default pbs parameters missing: " + re.escape(str(pbs_missing_keys)) + match="Default pbs parameters missing: " + + re.escape(str(pbs_missing_keys)), ): render_job_script( project="tm70", @@ -162,5 +164,5 @@ def test_default_pbs_config(self): modules=["foo", "bar", "baz"], skip_bitwise_cmp=True, benchcab_path="/absolute/path/to/benchcab", - pbs_config={ "walltime" : "48:00:00" }, - ) \ No newline at end of file + pbs_config={"walltime": "48:00:00"}, + ) From 0af9e4e6751a4c0adbaeccba88994697b6b9f8f4 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Tue, 23 Jan 2024 04:20:27 +1100 Subject: [PATCH 20/50] Set default alias name of branch as None, and fix default key issues --- benchcab/benchcab.py | 9 +++++---- benchcab/config.py | 6 +++--- benchcab/data/config-schema.yml | 1 + 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index d2b13016..dc0c25f7 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -107,7 +107,8 @@ def _get_models(self, config: dict) -> list[Model]: for id, sub_config in enumerate(config["realisations"]): repo = create_repo( spec=sub_config.pop("repo"), - path=internal.SRC_DIR / sub_config["name"], + path=internal.SRC_DIR + / (Path() if sub_config["name"] is None else sub_config["name"]), ) self._models.append(Model(repo=repo, model_id=id, **sub_config)) return self._models @@ -245,8 +246,8 @@ def fluxsite_run_tasks(self, config_path: str, verbose: bool): tasks = self.tasks if self.tasks else self._initialise_tasks(config) print("Running fluxsite tasks...") - if config["multiprocess"]: - ncpus = config["pbs"]["ncpus"] + if config["fluxsite"]["multiprocess"]: + ncpus = config["fluxsite"]["pbs"]["ncpus"] run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=verbose) else: run_tasks(tasks, verbose=verbose) @@ -267,7 +268,7 @@ def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool): comparisons = get_fluxsite_comparisons(tasks) print("Running comparison tasks...") - if config["multiprocess"]: + if config["fluxsite"]["multiprocess"]: ncpus = config["fluxsite"]["pbs"]["ncpus"] run_comparisons_in_parallel(comparisons, n_processes=ncpus, verbose=verbose) else: diff --git a/benchcab/config.py b/benchcab/config.py index 17245b54..bfc4f475 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -70,7 +70,7 @@ def validate_config(config: dict) -> bool: def read_optional_data(config: dict): if "realisations" in config: for r in config["realisations"]: - r["name"] = r.get("name", str(internal.SRC_DIR)) + r["name"] = r.get("name") config["science_configurations"] = config.get( "science_configurations", internal.DEFAULT_SCIENCE_CONFIGURATIONS @@ -84,8 +84,8 @@ def read_optional_data(config: dict): config["fluxsite"]["experiment"] = config["fluxsite"].get( "experiment", internal.FLUXSITE_DEFAULT_EXPERIMENT ) - config["fluxsite"]["pbs"] = config["fluxsite"].get( - "pbs", internal.FLUXSITE_DEFAULT_PBS + config["fluxsite"]["pbs"] = internal.FLUXSITE_DEFAULT_PBS | config["fluxsite"].get( + "pbs", {} ) diff --git a/benchcab/data/config-schema.yml b/benchcab/data/config-schema.yml index 2c8da5bb..87235f87 100644 --- a/benchcab/data/config-schema.yml +++ b/benchcab/data/config-schema.yml @@ -44,6 +44,7 @@ realisations: revision: type: "integer" name: + nullable: true type: "string" required: false build_script: From 7686403c0215ec215aaebfa69ecf2f2869951a04 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Wed, 24 Jan 2024 11:06:55 +1100 Subject: [PATCH 21/50] Refactoring of verbosity --- benchcab/benchcab.py | 69 +++++++++++++++++---------------- benchcab/comparison.py | 25 ++++++------ benchcab/environment_modules.py | 10 ++--- benchcab/fluxsite.py | 68 ++++++++++++++------------------ benchcab/model.py | 41 +++++++++----------- benchcab/utils/fs.py | 18 ++++----- benchcab/utils/repo.py | 36 +++++------------ benchcab/utils/subprocess.py | 8 ++-- benchcab/workdir.py | 12 ++---- tests/conftest.py | 1 - tests/test_fluxsite.py | 2 +- tests/test_model.py | 2 +- tests/test_subprocess.py | 18 +++++++-- 13 files changed, 139 insertions(+), 171 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index d02b94fd..2c4583b8 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -1,3 +1,6 @@ +# Copyright 2022 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0 + """Contains the benchcab application class.""" import grp @@ -142,12 +145,12 @@ def _initialise_tasks(self, config: dict) -> list[Task]: ) return self.tasks - def validate_config(self, config_path: str, verbose: bool): + def validate_config(self, config_path: str): """Endpoint for `benchcab validate_config`.""" _ = self._get_config(config_path) def fluxsite_submit_job( - self, config_path: str, verbose: bool, skip: list[str] + self, config_path: str, skip: list[str] ) -> None: """Submits the PBS job script step in the fluxsite test workflow.""" config = self._get_config(config_path) @@ -168,7 +171,6 @@ def fluxsite_submit_job( project=config["project"], config_path=config_path, modules=config["modules"], - verbose=verbose, skip_bitwise_cmp="fluxsite-bitwise-cmp" in skip, benchcab_path=str(self.benchcab_exe_path), pbs_config=config.get("fluxsite", {}).get("pbs"), @@ -179,7 +181,6 @@ def fluxsite_submit_job( proc = self.subprocess_handler.run_cmd( f"qsub {job_script_path}", capture_output=True, - verbose=verbose, ) except CalledProcessError as exc: self.logger.error("when submitting job to NCI queue, details to follow") @@ -198,17 +199,17 @@ def fluxsite_submit_job( ] ) - def checkout(self, config_path: str, verbose: bool): + def checkout(self, config_path: str): """Endpoint for `benchcab checkout`.""" config = self._get_config(config_path) self._validate_environment(project=config["project"], modules=config["modules"]) - mkdir(internal.SRC_DIR, exist_ok=True, verbose=True) + mkdir(internal.SRC_DIR, exist_ok=True) self.logger.info("Checking out repositories...") rev_number_log = "" for model in self._get_models(config): - model.repo.checkout(verbose=verbose) + model.repo.checkout() rev_number_log += f"{model.name}: {model.repo.get_revision()}\n" # TODO(Sean) we should archive revision numbers for CABLE-AUX @@ -217,14 +218,14 @@ def checkout(self, config_path: str, verbose: bool): branch_path=internal.CABLE_AUX_RELATIVE_SVN_PATH, path=internal.SRC_DIR / "CABLE-AUX", ) - cable_aux_repo.checkout(verbose=verbose) + cable_aux_repo.checkout() rev_number_log_path = next_path("rev_number-*.log") self.logger.info(f"Writing revision number info to {rev_number_log_path}") with rev_number_log_path.open("w", encoding="utf-8") as file: file.write(rev_number_log) - def build(self, config_path: str, verbose: bool): + def build(self, config_path: str): """Endpoint for `benchcab build`.""" config = self._get_config(config_path) self._validate_environment(project=config["project"], modules=config["modules"]) @@ -233,35 +234,35 @@ def build(self, config_path: str, verbose: bool): if repo.build_script: self.logger.info( [ - "Compiling CABLE using custom build script for " + "Compiling CABLE using custom build script for ", f"realisation {repo.name}..." ] ) - repo.custom_build(modules=config["modules"], verbose=verbose) + repo.custom_build(modules=config["modules"]) else: build_mode = "with MPI" if internal.MPI else "serially" self.logger.info( f"Compiling CABLE {build_mode} for realisation {repo.name}..." ) - repo.pre_build(verbose=verbose) - repo.run_build(modules=config["modules"], verbose=verbose) - repo.post_build(verbose=verbose) + repo.pre_build() + repo.run_build(modules=config["modules"]) + repo.post_build() self.logger.info(f"Successfully compiled CABLE for realisation {repo.name}") - def fluxsite_setup_work_directory(self, config_path: str, verbose: bool): + def fluxsite_setup_work_directory(self, config_path: str): """Endpoint for `benchcab fluxsite-setup-work-dir`.""" config = self._get_config(config_path) self._validate_environment(project=config["project"], modules=config["modules"]) tasks = self.tasks if self.tasks else self._initialise_tasks(config) self.logger.info("Setting up run directory tree for fluxsite tests...") - setup_fluxsite_directory_tree(verbose=verbose) + setup_fluxsite_directory_tree() self.logger.info("Setting up tasks...") for task in tasks: - task.setup_task(verbose=verbose) + task.setup_task() self.logger.info("Successfully setup fluxsite tasks") - def fluxsite_run_tasks(self, config_path: str, verbose: bool): + def fluxsite_run_tasks(self, config_path: str): """Endpoint for `benchcab fluxsite-run-tasks`.""" config = self._get_config(config_path) self._validate_environment(project=config["project"], modules=config["modules"]) @@ -276,12 +277,12 @@ def fluxsite_run_tasks(self, config_path: str, verbose: bool): ncpus = config.get("pbs", {}).get( "ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"] ) - run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=verbose) + run_tasks_in_parallel(tasks, n_processes=ncpus) else: - run_tasks(tasks, verbose=verbose) + run_tasks(tasks) self.logger.info("Successfully ran fluxsite tasks") - def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool): + def fluxsite_bitwise_cmp(self, config_path: str): """Endpoint for `benchcab fluxsite-bitwise-cmp`.""" config = self._get_config(config_path) self._validate_environment(project=config["project"], modules=config["modules"]) @@ -304,29 +305,29 @@ def fluxsite_bitwise_cmp(self, config_path: str, verbose: bool): ncpus = config["fluxsite"]["pbs"]["ncpus"] except KeyError: ncpus = internal.FLUXSITE_DEFAULT_PBS["ncpus"] - run_comparisons_in_parallel(comparisons, n_processes=ncpus, verbose=verbose) + run_comparisons_in_parallel(comparisons, n_processes=ncpus) else: - run_comparisons(comparisons, verbose=verbose) + run_comparisons(comparisons) self.logger.info("Successfully ran comparison tasks") def fluxsite( - self, config_path: str, no_submit: bool, verbose: bool, skip: list[str] + self, config_path: str, no_submit: bool, skip: list[str] ): """Endpoint for `benchcab fluxsite`.""" - self.checkout(config_path, verbose) - self.build(config_path, verbose) - self.fluxsite_setup_work_directory(config_path, verbose) + self.checkout(config_path) + self.build(config_path) + self.fluxsite_setup_work_directory(config_path) if no_submit: - self.fluxsite_run_tasks(config_path, verbose) + self.fluxsite_run_tasks(config_path) if "fluxsite-bitwise-cmp" not in skip: - self.fluxsite_bitwise_cmp(config_path, verbose) + self.fluxsite_bitwise_cmp(config_path) else: - self.fluxsite_submit_job(config_path, verbose, skip) + self.fluxsite_submit_job(config_path, skip) - def spatial(self, config_path: str, verbose: bool): + def spatial(self, config_path: str): """Endpoint for `benchcab spatial`.""" - def run(self, config_path: str, no_submit: bool, verbose: bool, skip: list[str]): + def run(self, config_path: str, no_submit: bool, skip: list[str]): """Endpoint for `benchcab run`.""" - self.fluxsite(config_path, no_submit, verbose, skip) - self.spatial(config_path, verbose) + self.fluxsite(config_path, no_submit, skip) + self.spatial(config_path) diff --git a/benchcab/comparison.py b/benchcab/comparison.py index 1af9ca9c..93216456 100644 --- a/benchcab/comparison.py +++ b/benchcab/comparison.py @@ -11,6 +11,7 @@ from benchcab import internal from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface +from benchcab.utils import get_logger class ComparisonTask: @@ -34,45 +35,45 @@ def __init__( """ self.files = files self.task_name = task_name + self.logger = get_logger() - def run(self, verbose=False) -> None: + def run(self) -> None: """Executes `nccmp -df` on the NetCDF files pointed to by `self.files`.""" file_a, file_b = self.files - if verbose: - print(f"Comparing files {file_a.name} and {file_b.name} bitwise...") + self.logger.debug(f"Comparing files {file_a.name} and {file_b.name} bitwise...") try: self.subprocess_handler.run_cmd( f"nccmp -df {file_a} {file_b}", capture_output=True, - verbose=verbose, ) - print(f"Success: files {file_a.name} {file_b.name} are identical") + self.logger.info(f"Success: files {file_a.name} {file_b.name} are identical") except CalledProcessError as exc: output_file = ( internal.FLUXSITE_DIRS["BITWISE_CMP"] / f"{self.task_name}.txt" ) with output_file.open("w", encoding="utf-8") as file: file.write(exc.stdout) - print( - f"Failure: files {file_a.name} {file_b.name} differ. " + + self.logger.error([ + f"Failure: files {file_a.name} {file_b.name} differ. ", f"Results of diff have been written to {output_file}" - ) + ]) + sys.stdout.flush() -def run_comparisons(comparison_tasks: list[ComparisonTask], verbose=False) -> None: +def run_comparisons(comparison_tasks: list[ComparisonTask]) -> None: """Runs bitwise comparison tasks serially.""" for task in comparison_tasks: - task.run(verbose=verbose) + task.run() def run_comparisons_in_parallel( comparison_tasks: list[ComparisonTask], n_processes=internal.FLUXSITE_DEFAULT_PBS["ncpus"], - verbose=False, ) -> None: """Runs bitwise comparison tasks in parallel across multiple processes.""" - run_task = operator.methodcaller("run", verbose=verbose) + run_task = operator.methodcaller("run") with multiprocessing.Pool(n_processes) as pool: pool.map(run_task, comparison_tasks, chunksize=1) diff --git a/benchcab/environment_modules.py b/benchcab/environment_modules.py index c0a91ece..2f22d451 100644 --- a/benchcab/environment_modules.py +++ b/benchcab/environment_modules.py @@ -7,6 +7,7 @@ import sys from abc import ABC as AbstractBaseClass # noqa: N811 from abc import abstractmethod +from benchcab.utils import get_logger sys.path.append("/opt/Modules/v4.3.0/init") try: @@ -48,16 +49,15 @@ def module_unload(self, *args: str) -> None: """Wrapper around `module unload modulefile...`.""" @contextlib.contextmanager - def load(self, modules: list[str], verbose=False): + def load(self, modules: list[str]): """Context manager for loading and unloading modules.""" - if verbose: - print("Loading modules: " + " ".join(modules)) + logger = get_logger() + logger.debug("Loading modules: " + " ".join(modules)) self.module_load(*modules) try: yield finally: - if verbose: - print("Unloading modules: " + " ".join(modules)) + logger.debug("Unloading modules: " + " ".join(modules)) self.module_unload(*modules) diff --git a/benchcab/fluxsite.py b/benchcab/fluxsite.py index a37265f7..b1d1a0c2 100644 --- a/benchcab/fluxsite.py +++ b/benchcab/fluxsite.py @@ -20,6 +20,7 @@ from benchcab.model import Model from benchcab.utils.fs import chdir, mkdir from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface +from benchcab.utils import get_logger # fmt: off # ====================================================== @@ -133,6 +134,7 @@ def __init__( self.met_forcing_file = met_forcing_file self.sci_conf_id = sci_conf_id self.sci_config = sci_config + self.logger = get_logger() def get_task_name(self) -> str: """Returns the file name convention used for this task.""" @@ -147,7 +149,7 @@ def get_log_filename(self) -> str: """Returns the file name convention used for the log file.""" return f"{self.get_task_name()}_log.txt" - def setup_task(self, verbose=False): + def setup_task(self): """Does all file manipulations to run cable in the task directory. These include: @@ -159,25 +161,22 @@ def setup_task(self, verbose=False): 5. make appropriate adjustments to namelist files 6. apply a branch patch if specified """ - if verbose: - print(f"Setting up task: {self.get_task_name()}") + self.logger.debug(f"Setting up task: {self.get_task_name()}") mkdir( internal.FLUXSITE_DIRS["TASKS"] / self.get_task_name(), - verbose=verbose, parents=True, exist_ok=True, ) - self.clean_task(verbose=verbose) - self.fetch_files(verbose=verbose) + self.clean_task() + self.fetch_files() nml_path = ( internal.FLUXSITE_DIRS["TASKS"] / self.get_task_name() / internal.CABLE_NML ) - if verbose: - print(f" Adding base configurations to CABLE namelist file {nml_path}") + self.logger.debug(f" Adding base configurations to CABLE namelist file {nml_path}") patch_namelist( nml_path, { @@ -211,28 +210,24 @@ def setup_task(self, verbose=False): }, ) - if verbose: - print(f" Adding science configurations to CABLE namelist file {nml_path}") + self.logger.debug(f" Adding science configurations to CABLE namelist file {nml_path}") patch_namelist(nml_path, self.sci_config) if self.model.patch: - if verbose: - print( + self.logger.debug( f" Adding branch specific configurations to CABLE namelist file {nml_path}" ) patch_namelist(nml_path, self.model.patch) if self.model.patch_remove: - if verbose: - print( + self.logger.debug( f" Removing branch specific configurations from CABLE namelist file {nml_path}" ) patch_remove_namelist(nml_path, self.model.patch_remove) - def clean_task(self, verbose=False): + def clean_task(self): """Cleans output files, namelist files, log files and cable executables if they exist.""" - if verbose: - print(" Cleaning task") + self.logger.debug(" Cleaning task") task_dir = internal.FLUXSITE_DIRS["TASKS"] / self.get_task_name() @@ -262,7 +257,7 @@ def clean_task(self, verbose=False): return self - def fetch_files(self, verbose=False): + def fetch_files(self): """Retrieves all files necessary to run cable in the task directory. Namely: @@ -271,8 +266,7 @@ def fetch_files(self, verbose=False): """ task_dir = internal.FLUXSITE_DIRS["TASKS"] / self.get_task_name() - if verbose: - print( + self.logger.debug( f" Copying namelist files from {internal.NAMELIST_DIR} to {task_dir}" ) @@ -281,25 +275,23 @@ def fetch_files(self, verbose=False): exe_src = self.model.get_exe_path() exe_dest = task_dir / internal.CABLE_EXE - if verbose: - print(f" Copying CABLE executable from {exe_src} to {exe_dest}") + self.logger.debug(f" Copying CABLE executable from {exe_src} to {exe_dest}") shutil.copy(exe_src, exe_dest) return self - def run(self, verbose=False): + def run(self): """Runs a single fluxsite task.""" task_name = self.get_task_name() task_dir = internal.FLUXSITE_DIRS["TASKS"] / task_name - if verbose: - print( + self.logger.debug([ f"Running task {task_name}... CABLE standard output " f"saved in {task_dir / internal.CABLE_STDOUT_FILENAME}" - ) + ]) try: - self.run_cable(verbose=verbose) - self.add_provenance_info(verbose=verbose) + self.run_cable() + self.add_provenance_info() except CableError: # Note: here we suppress CABLE specific errors so that `benchcab` # exits successfully. This then allows us to run bitwise comparisons @@ -308,7 +300,7 @@ def run(self, verbose=False): pass sys.stdout.flush() - def run_cable(self, verbose=False): + def run_cable(self): """Run the CABLE executable for the given task. Raises `CableError` when CABLE returns a non-zero exit code. @@ -321,14 +313,13 @@ def run_cable(self, verbose=False): with chdir(task_dir): self.subprocess_handler.run_cmd( f"./{internal.CABLE_EXE} {internal.CABLE_NML}", - output_file=stdout_path.relative_to(task_dir), - verbose=verbose, + output_file=stdout_path.relative_to(task_dir) ) except CalledProcessError as exc: - print(f"Error: CABLE returned an error for task {task_name}") + self.logger.error(f"Error: CABLE returned an error for task {task_name}") raise CableError from exc - def add_provenance_info(self, verbose=False): + def add_provenance_info(self): """Adds provenance information to global attributes of netcdf output file. Attributes include branch url, branch revision number and key value pairs in @@ -338,8 +329,7 @@ def add_provenance_info(self, verbose=False): nml = f90nml.read( internal.FLUXSITE_DIRS["TASKS"] / self.get_task_name() / internal.CABLE_NML ) - if verbose: - print(f"Adding attributes to output file: {nc_output_path}") + self.logger.debug(f"Adding attributes to output file: {nc_output_path}") with netCDF4.Dataset(nc_output_path, "r+") as nc_output: nc_output.setncatts( { @@ -378,17 +368,17 @@ def get_fluxsite_tasks( return tasks -def run_tasks(tasks: list[Task], verbose=False): +def run_tasks(tasks: list[Task]): """Runs tasks in `tasks` serially.""" for task in tasks: - task.run(verbose=verbose) + task.run() def run_tasks_in_parallel( - tasks: list[Task], n_processes=internal.FLUXSITE_DEFAULT_PBS["ncpus"], verbose=False + tasks: list[Task], n_processes=internal.FLUXSITE_DEFAULT_PBS["ncpus"] ): """Runs tasks in `tasks` in parallel across multiple processes.""" - run_task = operator.methodcaller("run", verbose=verbose) + run_task = operator.methodcaller("run") with multiprocessing.Pool(n_processes) as pool: pool.map(run_task, tasks, chunksize=1) diff --git a/benchcab/model.py b/benchcab/model.py index 767e1eef..a84f0621 100644 --- a/benchcab/model.py +++ b/benchcab/model.py @@ -15,6 +15,7 @@ from benchcab.utils.fs import chdir, copy2, rename from benchcab.utils.repo import GitRepo, Repo from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface +from benchcab.utils import get_logger class Model: @@ -56,6 +57,7 @@ def __init__( self.build_script = build_script self._model_id = model_id self.src_dir = Path() + self.logger = get_logger() # TODO(Sean) we should not have to know whether `repo` is a `GitRepo` or # `SVNRepo`, we should only be working with the `Repo` interface. # See issue https://github.com/CABLE-LSM/benchcab/issues/210 @@ -80,7 +82,7 @@ def get_exe_path(self) -> Path: internal.SRC_DIR / self.name / self.src_dir / "offline" / internal.CABLE_EXE ) - def custom_build(self, modules: list[str], verbose=False): + def custom_build(self, modules: list[str]): """Build CABLE using a custom build script.""" build_script_path = internal.SRC_DIR / self.name / self.build_script @@ -94,56 +96,50 @@ def custom_build(self, modules: list[str], verbose=False): tmp_script_path = build_script_path.parent / "tmp-build.sh" - if verbose: - print(f"Copying {build_script_path} to {tmp_script_path}") + self.logger.debug(f"Copying {build_script_path} to {tmp_script_path}") shutil.copy(build_script_path, tmp_script_path) - if verbose: - print(f"chmod +x {tmp_script_path}") + self.logger.debug(f"chmod +x {tmp_script_path}") tmp_script_path.chmod(tmp_script_path.stat().st_mode | stat.S_IEXEC) - if verbose: - print( + self.logger.debug([ f"Modifying {tmp_script_path.name}: remove lines that call " "environment modules" - ) + ]) remove_module_lines(tmp_script_path) with chdir(build_script_path.parent), self.modules_handler.load( - modules, verbose=verbose + modules ): self.subprocess_handler.run_cmd( - f"./{tmp_script_path.name}", - verbose=verbose, + f"./{tmp_script_path.name}" ) - def pre_build(self, verbose=False): + def pre_build(self): """Runs CABLE pre-build steps.""" path_to_repo = internal.SRC_DIR / self.name tmp_dir = path_to_repo / self.src_dir / "offline" / ".tmp" if not tmp_dir.exists(): - if verbose: - print(f"mkdir {tmp_dir}") + self.logger.debug(f"mkdir {tmp_dir}") tmp_dir.mkdir() for pattern in internal.OFFLINE_SOURCE_FILES: for path in (path_to_repo / self.src_dir).glob(pattern): if not path.is_file(): continue - copy2(path, tmp_dir, verbose=verbose) + copy2(path, tmp_dir) copy2( path_to_repo / self.src_dir / "offline" / "Makefile", - tmp_dir, - verbose=verbose, + tmp_dir ) - def run_build(self, modules: list[str], verbose=False): + def run_build(self, modules: list[str]): """Runs CABLE build scripts.""" path_to_repo = internal.SRC_DIR / self.name tmp_dir = path_to_repo / self.src_dir / "offline" / ".tmp" - with chdir(tmp_dir), self.modules_handler.load(modules, verbose=verbose): + with chdir(tmp_dir), self.modules_handler.load(modules): env = os.environ.copy() env["NCDIR"] = f"{env['NETCDF_ROOT']}/lib/Intel" env["NCMOD"] = f"{env['NETCDF_ROOT']}/include/Intel" @@ -153,18 +149,17 @@ def run_build(self, modules: list[str], verbose=False): env["FC"] = "mpif90" if internal.MPI else "ifort" self.subprocess_handler.run_cmd( - "make mpi" if internal.MPI else "make", env=env, verbose=verbose + "make mpi" if internal.MPI else "make", env=env ) - def post_build(self, verbose=False): + def post_build(self): """Runs CABLE post-build steps.""" path_to_repo = internal.SRC_DIR / self.name tmp_dir = path_to_repo / self.src_dir / "offline" / ".tmp" rename( tmp_dir / internal.CABLE_EXE, - path_to_repo / self.src_dir / "offline" / internal.CABLE_EXE, - verbose=verbose, + path_to_repo / self.src_dir / "offline" / internal.CABLE_EXE ) diff --git a/benchcab/utils/fs.py b/benchcab/utils/fs.py index d3eee46a..8f764d3f 100644 --- a/benchcab/utils/fs.py +++ b/benchcab/utils/fs.py @@ -7,6 +7,7 @@ import os import shutil from pathlib import Path +from benchcab.utils import get_logger @contextlib.contextmanager @@ -20,17 +21,15 @@ def chdir(newdir: Path): os.chdir(prevdir) -def rename(src: Path, dest: Path, verbose=False): +def rename(src: Path, dest: Path): """A wrapper around `pathlib.Path.rename` with optional loggging.""" - if verbose: - print(f"mv {src} {dest}") + get_logger().debug(f"mv {src} {dest}") src.rename(dest) -def copy2(src: Path, dest: Path, verbose=False): +def copy2(src: Path, dest: Path): """A wrapper around `shutil.copy2` with optional logging.""" - if verbose: - print(f"cp -p {src} {dest}") + get_logger().debug(f"cp -p {src} {dest}") shutil.copy2(src, dest) @@ -58,18 +57,15 @@ def next_path(path_pattern: str, path: Path = Path(), sep: str = "-") -> Path: return Path(f"{common_filename}{sep}{new_file_index}{loc_pattern.suffix}") -def mkdir(new_path: Path, verbose=False, **kwargs): +def mkdir(new_path: Path, **kwargs): """Create the `new_path` directory. Parameters ---------- new_path : Path Path to the directory to be created. - verbose : bool, default False - Additional level of logging if True **kwargs : dict, optional Additional options for `pathlib.Path.mkdir()` """ - if verbose: - print(f"Creating {new_path} directory") + get_logger().debug(f"Creating {new_path} directory") new_path.mkdir(**kwargs) diff --git a/benchcab/utils/repo.py b/benchcab/utils/repo.py index bffbb8df..f7025be5 100644 --- a/benchcab/utils/repo.py +++ b/benchcab/utils/repo.py @@ -20,14 +20,8 @@ class Repo(AbstractBaseClass): """ @abstractmethod - def checkout(self, verbose=False): - """Checkout the source code. - - Parameters - ---------- - verbose: bool, optional - Enable or disable verbose output. By default `verbose` is `False`. - """ + def checkout(self): + """Checkout the source code.""" @abstractmethod def get_revision(self) -> str: @@ -86,20 +80,13 @@ def __init__( self.commit = commit self.logger = get_logger() - def checkout(self, verbose=False): - """Checkout the source code. - - Parameters - ---------- - verbose: bool, optional - Enable or disable verbose output. - """ + def checkout(self): + """Checkout the source code.""" # TODO(Sean) the gitpython package provides an interface for displaying # remote progress. See # https://gitpython.readthedocs.io/en/stable/reference.html#git.remote.RemoteProgress self.subprocess_handler.run_cmd( - f"git clone --branch {self.branch} -- {self.url} {self.path}", - verbose=verbose, + f"git clone --branch {self.branch} -- {self.url} {self.path}" ) if self.commit: self.logger.debug(f"Reset to commit {self.commit} (hard reset)") @@ -170,15 +157,10 @@ def __init__( self.branch_path = branch_path self.revision = revision self.path = path / Path(branch_path).name if path.is_dir() else path + self.logger = get_logger() - def checkout(self, verbose=False): - """Checkout the source code. - - Parameters - ---------- - verbose: bool, optional - Enable or disable verbose output. By default `verbose` is `False`. - """ + def checkout(self): + """Checkout the source code.""" cmd = "svn checkout" if self.revision: @@ -186,7 +168,7 @@ def checkout(self, verbose=False): cmd += f" {internal.CABLE_SVN_ROOT}/{self.branch_path} {self.path}" - self.subprocess_handler.run_cmd(cmd, verbose=verbose) + self.subprocess_handler.run_cmd(cmd) self.logger.info( f"Successfully checked out {self.path.name} - {self.get_revision()}" diff --git a/benchcab/utils/subprocess.py b/benchcab/utils/subprocess.py index fe7e250b..d9faa7e2 100644 --- a/benchcab/utils/subprocess.py +++ b/benchcab/utils/subprocess.py @@ -9,6 +9,7 @@ from abc import ABC as AbstractBaseClass # noqa: N811 from abc import abstractmethod from typing import Any, Optional +from benchcab.utils import get_logger class SubprocessWrapperInterface(AbstractBaseClass): @@ -24,7 +25,6 @@ def run_cmd( cmd: str, capture_output: bool = False, output_file: Optional[pathlib.Path] = None, - verbose: bool = False, env: Optional[dict] = None, ) -> subprocess.CompletedProcess: """A wrapper around the `subprocess.run` function for executing system commands.""" @@ -38,7 +38,6 @@ def run_cmd( cmd: str, capture_output: bool = False, output_file: Optional[pathlib.Path] = None, - verbose: bool = False, env: Optional[dict] = None, ) -> subprocess.CompletedProcess: """Constructor. @@ -51,8 +50,6 @@ def run_cmd( Capture the output, by default False output_file : Optional[pathlib.Path], optional Output file, by default None - verbose : bool, optional - Verbose output, by default False env : Optional[dict], optional Environment vars to pass, by default None @@ -61,6 +58,8 @@ def run_cmd( subprocess.CompletedProcess _description_ """ + # Use the logging level (10 = Debug) to determine verbosity. + verbose = get_logger().getEffectiveLevel() == 10 kwargs: Any = {} with contextlib.ExitStack() as stack: if capture_output: @@ -79,6 +78,7 @@ def run_cmd( if verbose: print(cmd) + proc = subprocess.run(cmd, shell=True, check=True, **kwargs) return proc diff --git a/benchcab/workdir.py b/benchcab/workdir.py index d695337b..27a0a1ca 100644 --- a/benchcab/workdir.py +++ b/benchcab/workdir.py @@ -18,13 +18,7 @@ def clean_directory_tree(): shutil.rmtree(internal.RUN_DIR) -def setup_fluxsite_directory_tree(verbose=False): - """Generate the directory structure used by `benchcab`. - - Parameters - ---------- - verbose : bool, default False - Additional level of logging if True - """ +def setup_fluxsite_directory_tree(): + """Generate the directory structure used by `benchcab`.""" for path in internal.FLUXSITE_DIRS.values(): - mkdir(path, verbose=verbose, parents=True, exist_ok=True) + mkdir(path, parents=True, exist_ok=True) diff --git a/tests/conftest.py b/tests/conftest.py index 6f20dc20..5dcadf78 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -111,7 +111,6 @@ def run_cmd( cmd: str, capture_output: bool = False, output_file: Optional[Path] = None, - verbose: bool = False, env: Optional[dict] = None, ) -> CompletedProcess: self.commands.append(cmd) diff --git a/tests/test_fluxsite.py b/tests/test_fluxsite.py index a4988dac..978d5926 100644 --- a/tests/test_fluxsite.py +++ b/tests/test_fluxsite.py @@ -33,7 +33,7 @@ def __init__(self) -> None: self.branch = "test-branch" self.revision = "1234" - def checkout(self, verbose=False): + def checkout(self): pass def get_branch_name(self) -> str: diff --git a/tests/test_model.py b/tests/test_model.py index a1c68580..5ae06dfa 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -25,7 +25,7 @@ class MockRepo(Repo): def __init__(self) -> None: self.handle = "trunk" - def checkout(self, verbose=False): + def checkout(self): pass def get_branch_name(self) -> str: diff --git a/tests/test_subprocess.py b/tests/test_subprocess.py index 868d12d2..34148b18 100644 --- a/tests/test_subprocess.py +++ b/tests/test_subprocess.py @@ -7,6 +7,8 @@ import pytest from benchcab.utils.subprocess import SubprocessWrapper +from benchcab.utils import get_logger +import logging class TestRunCmd: @@ -19,6 +21,7 @@ def subprocess_handler(self): def test_stdout_is_suppressed_in_non_verbose_mode(self, subprocess_handler, capfd): """Success case: test stdout is suppressed in non-verbose mode.""" + get_logger().setLevel(logging.INFO) # Set logging to info, disabling verbose output subprocess_handler.run_cmd("echo foo") captured = capfd.readouterr() assert not captured.out @@ -26,6 +29,7 @@ def test_stdout_is_suppressed_in_non_verbose_mode(self, subprocess_handler, capf def test_stderr_is_suppressed_in_non_verbose_mode(self, subprocess_handler, capfd): """Success case: test stderr is suppressed in non-verbose mode.""" + get_logger().setLevel(logging.INFO) # Set logging to info, disabling verbose output subprocess_handler.run_cmd("echo foo 1>&2") captured = capfd.readouterr() assert not captured.out @@ -35,7 +39,8 @@ def test_command_and_stdout_is_printed_in_verbose_mode( self, subprocess_handler, capfd ): """Success case: test command and stdout is printed in verbose mode.""" - subprocess_handler.run_cmd("echo foo", verbose=True) + get_logger().setLevel(logging.DEBUG) # Set logging to debug, enabling verbose output + subprocess_handler.run_cmd("echo foo") captured = capfd.readouterr() assert captured.out == "echo foo\nfoo\n" assert not captured.err @@ -44,7 +49,8 @@ def test_command_and_stderr_is_redirected_to_stdout_in_verbose_mode( self, subprocess_handler, capfd ): """Success case: test command and stderr is redirected to stdout in verbose mode.""" - subprocess_handler.run_cmd("echo foo 1>&2", verbose=True) + get_logger().setLevel(logging.DEBUG) # Set logging to debug, enabling verbose output + subprocess_handler.run_cmd("echo foo 1>&2") captured = capfd.readouterr() assert captured.out == "echo foo 1>&2\nfoo\n" assert not captured.err @@ -53,6 +59,7 @@ def test_output_is_captured_with_capture_output_enabled( self, subprocess_handler, capfd ): """Success case: test output is captured with capture_output enabled.""" + get_logger().setLevel(logging.INFO) # Set logging to info, disabling verbose output proc = subprocess_handler.run_cmd("echo foo", capture_output=True) captured = capfd.readouterr() assert not captured.out @@ -73,7 +80,8 @@ def test_command_is_printed_and_stdout_is_captured_in_verbose_mode( self, subprocess_handler, capfd ): """Success case: test command is printed and stdout is captured in verbose mode.""" - proc = subprocess_handler.run_cmd("echo foo", capture_output=True, verbose=True) + get_logger().setLevel(logging.DEBUG) # Set logging to debug, enabling verbose output + proc = subprocess_handler.run_cmd("echo foo", capture_output=True) captured = capfd.readouterr() assert captured.out == "echo foo\n" assert not captured.err @@ -82,6 +90,7 @@ def test_command_is_printed_and_stdout_is_captured_in_verbose_mode( def test_stdout_is_redirected_to_file(self, subprocess_handler, capfd): """Success case: test stdout is redirected to file.""" + get_logger().setLevel(logging.INFO) # Set logging to info, disabling verbose output file_path = Path("out.txt") subprocess_handler.run_cmd("echo foo", output_file=file_path) with file_path.open("r", encoding="utf-8") as file: @@ -94,8 +103,9 @@ def test_command_is_printed_and_stdout_is_redirected_to_file_in_verbose_mode( self, subprocess_handler, capfd ): """Success case: test command is printed and stdout is redirected to file in verbose mode.""" + get_logger().setLevel(logging.DEBUG) # Set logging to debug, enabling verbose output file_path = Path("out.txt") - subprocess_handler.run_cmd("echo foo", output_file=file_path, verbose=True) + subprocess_handler.run_cmd("echo foo", output_file=file_path) with file_path.open("r", encoding="utf-8") as file: assert file.read() == "foo\n" captured = capfd.readouterr() From 222ad4378f1b5ade12d2c12b3c5ac693922d1c60 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Wed, 24 Jan 2024 11:17:29 +1100 Subject: [PATCH 22/50] Black reformatting. #103 --- benchcab/benchcab.py | 10 +++------ benchcab/comparison.py | 18 ++++++++++------- benchcab/environment_modules.py | 1 + benchcab/fluxsite.py | 30 ++++++++++++++++----------- benchcab/model.py | 23 +++++++++------------ benchcab/utils/fs.py | 1 + benchcab/utils/subprocess.py | 7 +++++-- setup.py | 2 +- tests/test_subprocess.py | 36 ++++++++++++++++++++++++--------- 9 files changed, 75 insertions(+), 53 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 2c4583b8..9418597e 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -149,9 +149,7 @@ def validate_config(self, config_path: str): """Endpoint for `benchcab validate_config`.""" _ = self._get_config(config_path) - def fluxsite_submit_job( - self, config_path: str, skip: list[str] - ) -> None: + def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None: """Submits the PBS job script step in the fluxsite test workflow.""" config = self._get_config(config_path) self._validate_environment(project=config["project"], modules=config["modules"]) @@ -235,7 +233,7 @@ def build(self, config_path: str): self.logger.info( [ "Compiling CABLE using custom build script for ", - f"realisation {repo.name}..." + f"realisation {repo.name}...", ] ) repo.custom_build(modules=config["modules"]) @@ -310,9 +308,7 @@ def fluxsite_bitwise_cmp(self, config_path: str): run_comparisons(comparisons) self.logger.info("Successfully ran comparison tasks") - def fluxsite( - self, config_path: str, no_submit: bool, skip: list[str] - ): + def fluxsite(self, config_path: str, no_submit: bool, skip: list[str]): """Endpoint for `benchcab fluxsite`.""" self.checkout(config_path) self.build(config_path) diff --git a/benchcab/comparison.py b/benchcab/comparison.py index 93216456..173ca024 100644 --- a/benchcab/comparison.py +++ b/benchcab/comparison.py @@ -10,8 +10,8 @@ from subprocess import CalledProcessError from benchcab import internal -from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface from benchcab.utils import get_logger +from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface class ComparisonTask: @@ -47,18 +47,22 @@ def run(self) -> None: f"nccmp -df {file_a} {file_b}", capture_output=True, ) - self.logger.info(f"Success: files {file_a.name} {file_b.name} are identical") + self.logger.info( + f"Success: files {file_a.name} {file_b.name} are identical" + ) except CalledProcessError as exc: output_file = ( internal.FLUXSITE_DIRS["BITWISE_CMP"] / f"{self.task_name}.txt" ) with output_file.open("w", encoding="utf-8") as file: file.write(exc.stdout) - - self.logger.error([ - f"Failure: files {file_a.name} {file_b.name} differ. ", - f"Results of diff have been written to {output_file}" - ]) + + self.logger.error( + [ + f"Failure: files {file_a.name} {file_b.name} differ. ", + f"Results of diff have been written to {output_file}", + ] + ) sys.stdout.flush() diff --git a/benchcab/environment_modules.py b/benchcab/environment_modules.py index 2f22d451..70546a5a 100644 --- a/benchcab/environment_modules.py +++ b/benchcab/environment_modules.py @@ -7,6 +7,7 @@ import sys from abc import ABC as AbstractBaseClass # noqa: N811 from abc import abstractmethod + from benchcab.utils import get_logger sys.path.append("/opt/Modules/v4.3.0/init") diff --git a/benchcab/fluxsite.py b/benchcab/fluxsite.py index b1d1a0c2..39900504 100644 --- a/benchcab/fluxsite.py +++ b/benchcab/fluxsite.py @@ -18,9 +18,9 @@ from benchcab import __version__, internal from benchcab.comparison import ComparisonTask from benchcab.model import Model +from benchcab.utils import get_logger from benchcab.utils.fs import chdir, mkdir from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface -from benchcab.utils import get_logger # fmt: off # ====================================================== @@ -176,7 +176,9 @@ def setup_task(self): internal.FLUXSITE_DIRS["TASKS"] / self.get_task_name() / internal.CABLE_NML ) - self.logger.debug(f" Adding base configurations to CABLE namelist file {nml_path}") + self.logger.debug( + f" Adding base configurations to CABLE namelist file {nml_path}" + ) patch_namelist( nml_path, { @@ -210,19 +212,21 @@ def setup_task(self): }, ) - self.logger.debug(f" Adding science configurations to CABLE namelist file {nml_path}") + self.logger.debug( + f" Adding science configurations to CABLE namelist file {nml_path}" + ) patch_namelist(nml_path, self.sci_config) if self.model.patch: self.logger.debug( - f" Adding branch specific configurations to CABLE namelist file {nml_path}" - ) + f" Adding branch specific configurations to CABLE namelist file {nml_path}" + ) patch_namelist(nml_path, self.model.patch) if self.model.patch_remove: self.logger.debug( - f" Removing branch specific configurations from CABLE namelist file {nml_path}" - ) + f" Removing branch specific configurations from CABLE namelist file {nml_path}" + ) patch_remove_namelist(nml_path, self.model.patch_remove) def clean_task(self): @@ -267,8 +271,8 @@ def fetch_files(self): task_dir = internal.FLUXSITE_DIRS["TASKS"] / self.get_task_name() self.logger.debug( - f" Copying namelist files from {internal.NAMELIST_DIR} to {task_dir}" - ) + f" Copying namelist files from {internal.NAMELIST_DIR} to {task_dir}" + ) shutil.copytree(internal.NAMELIST_DIR, task_dir, dirs_exist_ok=True) @@ -285,10 +289,12 @@ def run(self): """Runs a single fluxsite task.""" task_name = self.get_task_name() task_dir = internal.FLUXSITE_DIRS["TASKS"] / task_name - self.logger.debug([ + self.logger.debug( + [ f"Running task {task_name}... CABLE standard output " f"saved in {task_dir / internal.CABLE_STDOUT_FILENAME}" - ]) + ] + ) try: self.run_cable() self.add_provenance_info() @@ -313,7 +319,7 @@ def run_cable(self): with chdir(task_dir): self.subprocess_handler.run_cmd( f"./{internal.CABLE_EXE} {internal.CABLE_NML}", - output_file=stdout_path.relative_to(task_dir) + output_file=stdout_path.relative_to(task_dir), ) except CalledProcessError as exc: self.logger.error(f"Error: CABLE returned an error for task {task_name}") diff --git a/benchcab/model.py b/benchcab/model.py index a84f0621..df78b6fc 100644 --- a/benchcab/model.py +++ b/benchcab/model.py @@ -12,10 +12,10 @@ from benchcab import internal from benchcab.environment_modules import EnvironmentModules, EnvironmentModulesInterface +from benchcab.utils import get_logger from benchcab.utils.fs import chdir, copy2, rename from benchcab.utils.repo import GitRepo, Repo from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface -from benchcab.utils import get_logger class Model: @@ -102,18 +102,16 @@ def custom_build(self, modules: list[str]): self.logger.debug(f"chmod +x {tmp_script_path}") tmp_script_path.chmod(tmp_script_path.stat().st_mode | stat.S_IEXEC) - self.logger.debug([ + self.logger.debug( + [ f"Modifying {tmp_script_path.name}: remove lines that call " "environment modules" - ]) + ] + ) remove_module_lines(tmp_script_path) - with chdir(build_script_path.parent), self.modules_handler.load( - modules - ): - self.subprocess_handler.run_cmd( - f"./{tmp_script_path.name}" - ) + with chdir(build_script_path.parent), self.modules_handler.load(modules): + self.subprocess_handler.run_cmd(f"./{tmp_script_path.name}") def pre_build(self): """Runs CABLE pre-build steps.""" @@ -129,10 +127,7 @@ def pre_build(self): continue copy2(path, tmp_dir) - copy2( - path_to_repo / self.src_dir / "offline" / "Makefile", - tmp_dir - ) + copy2(path_to_repo / self.src_dir / "offline" / "Makefile", tmp_dir) def run_build(self, modules: list[str]): """Runs CABLE build scripts.""" @@ -159,7 +154,7 @@ def post_build(self): rename( tmp_dir / internal.CABLE_EXE, - path_to_repo / self.src_dir / "offline" / internal.CABLE_EXE + path_to_repo / self.src_dir / "offline" / internal.CABLE_EXE, ) diff --git a/benchcab/utils/fs.py b/benchcab/utils/fs.py index 8f764d3f..78bc63c0 100644 --- a/benchcab/utils/fs.py +++ b/benchcab/utils/fs.py @@ -7,6 +7,7 @@ import os import shutil from pathlib import Path + from benchcab.utils import get_logger diff --git a/benchcab/utils/subprocess.py b/benchcab/utils/subprocess.py index d9faa7e2..77341411 100644 --- a/benchcab/utils/subprocess.py +++ b/benchcab/utils/subprocess.py @@ -9,8 +9,11 @@ from abc import ABC as AbstractBaseClass # noqa: N811 from abc import abstractmethod from typing import Any, Optional + from benchcab.utils import get_logger +DEBUG_LEVEL = 10 + class SubprocessWrapperInterface(AbstractBaseClass): """An abstract class (interface) that defines abstract methods for running subprocess commands. @@ -59,7 +62,7 @@ def run_cmd( _description_ """ # Use the logging level (10 = Debug) to determine verbosity. - verbose = get_logger().getEffectiveLevel() == 10 + verbose = get_logger().getEffectiveLevel() == DEBUG_LEVEL kwargs: Any = {} with contextlib.ExitStack() as stack: if capture_output: @@ -78,7 +81,7 @@ def run_cmd( if verbose: print(cmd) - + proc = subprocess.run(cmd, shell=True, check=True, **kwargs) return proc diff --git a/setup.py b/setup.py index 8f1f4985..87b8dbd9 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,10 @@ #!/usr/bin/env python from setuptools import setup + import versioneer setup( version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), ) - diff --git a/tests/test_subprocess.py b/tests/test_subprocess.py index 34148b18..b3bd55b3 100644 --- a/tests/test_subprocess.py +++ b/tests/test_subprocess.py @@ -1,14 +1,14 @@ """`pytest` tests for `utils/subprocess.py`.""" +import logging import os import subprocess from pathlib import Path import pytest -from benchcab.utils.subprocess import SubprocessWrapper from benchcab.utils import get_logger -import logging +from benchcab.utils.subprocess import SubprocessWrapper class TestRunCmd: @@ -21,7 +21,9 @@ def subprocess_handler(self): def test_stdout_is_suppressed_in_non_verbose_mode(self, subprocess_handler, capfd): """Success case: test stdout is suppressed in non-verbose mode.""" - get_logger().setLevel(logging.INFO) # Set logging to info, disabling verbose output + get_logger().setLevel( + logging.INFO + ) # Set logging to info, disabling verbose output subprocess_handler.run_cmd("echo foo") captured = capfd.readouterr() assert not captured.out @@ -29,7 +31,9 @@ def test_stdout_is_suppressed_in_non_verbose_mode(self, subprocess_handler, capf def test_stderr_is_suppressed_in_non_verbose_mode(self, subprocess_handler, capfd): """Success case: test stderr is suppressed in non-verbose mode.""" - get_logger().setLevel(logging.INFO) # Set logging to info, disabling verbose output + get_logger().setLevel( + logging.INFO + ) # Set logging to info, disabling verbose output subprocess_handler.run_cmd("echo foo 1>&2") captured = capfd.readouterr() assert not captured.out @@ -39,7 +43,9 @@ def test_command_and_stdout_is_printed_in_verbose_mode( self, subprocess_handler, capfd ): """Success case: test command and stdout is printed in verbose mode.""" - get_logger().setLevel(logging.DEBUG) # Set logging to debug, enabling verbose output + get_logger().setLevel( + logging.DEBUG + ) # Set logging to debug, enabling verbose output subprocess_handler.run_cmd("echo foo") captured = capfd.readouterr() assert captured.out == "echo foo\nfoo\n" @@ -49,7 +55,9 @@ def test_command_and_stderr_is_redirected_to_stdout_in_verbose_mode( self, subprocess_handler, capfd ): """Success case: test command and stderr is redirected to stdout in verbose mode.""" - get_logger().setLevel(logging.DEBUG) # Set logging to debug, enabling verbose output + get_logger().setLevel( + logging.DEBUG + ) # Set logging to debug, enabling verbose output subprocess_handler.run_cmd("echo foo 1>&2") captured = capfd.readouterr() assert captured.out == "echo foo 1>&2\nfoo\n" @@ -59,7 +67,9 @@ def test_output_is_captured_with_capture_output_enabled( self, subprocess_handler, capfd ): """Success case: test output is captured with capture_output enabled.""" - get_logger().setLevel(logging.INFO) # Set logging to info, disabling verbose output + get_logger().setLevel( + logging.INFO + ) # Set logging to info, disabling verbose output proc = subprocess_handler.run_cmd("echo foo", capture_output=True) captured = capfd.readouterr() assert not captured.out @@ -80,7 +90,9 @@ def test_command_is_printed_and_stdout_is_captured_in_verbose_mode( self, subprocess_handler, capfd ): """Success case: test command is printed and stdout is captured in verbose mode.""" - get_logger().setLevel(logging.DEBUG) # Set logging to debug, enabling verbose output + get_logger().setLevel( + logging.DEBUG + ) # Set logging to debug, enabling verbose output proc = subprocess_handler.run_cmd("echo foo", capture_output=True) captured = capfd.readouterr() assert captured.out == "echo foo\n" @@ -90,7 +102,9 @@ def test_command_is_printed_and_stdout_is_captured_in_verbose_mode( def test_stdout_is_redirected_to_file(self, subprocess_handler, capfd): """Success case: test stdout is redirected to file.""" - get_logger().setLevel(logging.INFO) # Set logging to info, disabling verbose output + get_logger().setLevel( + logging.INFO + ) # Set logging to info, disabling verbose output file_path = Path("out.txt") subprocess_handler.run_cmd("echo foo", output_file=file_path) with file_path.open("r", encoding="utf-8") as file: @@ -103,7 +117,9 @@ def test_command_is_printed_and_stdout_is_redirected_to_file_in_verbose_mode( self, subprocess_handler, capfd ): """Success case: test command is printed and stdout is redirected to file in verbose mode.""" - get_logger().setLevel(logging.DEBUG) # Set logging to debug, enabling verbose output + get_logger().setLevel( + logging.DEBUG + ) # Set logging to debug, enabling verbose output file_path = Path("out.txt") subprocess_handler.run_cmd("echo foo", output_file=file_path) with file_path.open("r", encoding="utf-8") as file: From daa30acd218daec6314f4afcaa647a1bd988daa8 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 24 Jan 2024 11:59:43 +1100 Subject: [PATCH 23/50] Add unit tests for read_optional_key --- tests/test_config.py | 101 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 14 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index b6e0d8a8..ddc7831e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,35 +1,108 @@ -"""`pytest` tests for config.py""" +"""`pytest` tests for config.py.""" +from copy import deepcopy +from pprint import pformat + import pytest -import benchcab.utils as bu + import benchcab.config as bc +import benchcab.utils as bu + +no_optional_config = { + "project": "w97", + "modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"], + "realisations": [ + {"repo": {"svn": {"branch_path": "trunk"}}}, + { + "repo": {"svn": {"branch_path": "branches/Users/ccc561/v3.0-YP-changes"}}, + }, + ], +} + +all_optional_config = { + "project": "w97", + "fluxsite": { + "experiment": "forty-two-site-test", + "multiprocess": True, + "pbs": {"ncpus": 18, "mem": "30GB", "walltime": "6:00:00", "storage": []}, + }, + "modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"], + "realisations": [ + {"name": None, "repo": {"svn": {"branch_path": "trunk"}}}, + { + "name": None, + "repo": {"svn": {"branch_path": "branches/Users/ccc561/v3.0-YP-changes"}}, + }, + ], + "science_configurations": [ + { + "cable": { + "cable_user": {"FWSOIL_SWITCH": "Haverd2013", "GS_SWITCH": "medlyn"} + } + }, + { + "cable": { + "cable_user": {"FWSOIL_SWITCH": "Haverd2013", "GS_SWITCH": "leuning"} + } + }, + {"cable": {"cable_user": {"FWSOIL_SWITCH": "standard", "GS_SWITCH": "medlyn"}}}, + { + "cable": { + "cable_user": {"FWSOIL_SWITCH": "standard", "GS_SWITCH": "leuning"} + } + }, + ], +} + +def test_read_config_file_pass(): + """Test read_config() reads an existing file.""" + existent_path = bu.get_installed_root() / "data" / "test" / "config-valid.yml" -def test_read_config_pass(): - """Test read_config() passes as expected.""" - existent_path = bu.get_installed_root() / 'data' / 'test' / 'config-valid.yml' - # Test for a path that exists - config = bc.read_config(existent_path) - assert config + config = bc.read_config_file(existent_path) + assert pformat(config) == pformat(no_optional_config) -def test_read_config_fail(): - """Test that read_config() fails as expected.""" - nonexistent_path = bu.get_installed_root() / 'data' / 'test' / 'config-missing.yml' +def test_read_config_file_fail(): + """Test that read_config() does not work for a non-existent file.""" + nonexistent_path = bu.get_installed_root() / "data" / "test" / "config-missing.yml" # Test for a path that does not exist. with pytest.raises(FileNotFoundError): - config = bc.read_config(nonexistent_path) + _ = bc.read_config_file(nonexistent_path) def test_validate_config_valid(): """Test validate_config() for a valid config file.""" - valid_config = bu.load_package_data('test/config-valid.yml') + valid_config = bu.load_package_data("test/config-valid.yml") assert bc.validate_config(valid_config) def test_validate_config_invalid(): """Test validate_config() for an invalid config file.""" - invalid_config = bu.load_package_data('test/config-invalid.yml') + invalid_config = bu.load_package_data("test/config-invalid.yml") with pytest.raises(bc.ConfigValidationException): bc.validate_config(invalid_config) + + +def test_read_optional_key_add_data(): + """Test default key-values are added if not provided by config.yaml.""" + # Config having no optional keys + config = deepcopy(no_optional_config) + bc.read_optional_key(config) + assert pformat(config) == pformat(all_optional_config) + + +def test_read_optional_key_same_data(): + """Test optional key-values are unchanged if provided by config.yaml.""" + # Config having all optional keys + config = deepcopy(all_optional_config) + bc.read_optional_key(config) + assert pformat(config) == pformat(all_optional_config) + + +def test_read_config(): + """Test overall behaviour of read_config.""" + expected_config = deepcopy(all_optional_config) + path = bu.get_installed_root() / "data" / "test" / "config-valid.yml" + assert pformat(bc.read_config(path)) == pformat(expected_config) From 1a8c4cbc5b637e3dd11405fb1fefd6b2e0577e39 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 24 Jan 2024 12:12:39 +1100 Subject: [PATCH 24/50] Add docstring for and minor refactoring --- benchcab/benchcab.py | 2 +- benchcab/config.py | 59 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index dc0c25f7..aabfb762 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -108,7 +108,7 @@ def _get_models(self, config: dict) -> list[Model]: repo = create_repo( spec=sub_config.pop("repo"), path=internal.SRC_DIR - / (Path() if sub_config["name"] is None else sub_config["name"]), + / (sub_config["name"] if sub_config["name"] else Path()), ) self._models.append(Model(repo=repo, model_id=id, **sub_config)) return self._models diff --git a/benchcab/config.py b/benchcab/config.py index bfc4f475..e32dc536 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -3,12 +3,20 @@ """A module containing all *_config() functions.""" from pathlib import Path +from typing import TypedDict import yaml -from benchcab import internal from cerberus import Validator + import benchcab.utils as bu +from benchcab import internal + +class PBSConfig(TypedDict): + ncpus: int + mem: str + walltime: str + storage: str class ConfigValidationException(Exception): def __init__(self, validator: Validator): @@ -19,7 +27,6 @@ def __init__(self, validator: Validator): validator: cerberus.Validator A validation object that has been used and has the errors attribute. """ - # Nicely format the errors. errors = [f"{k} = {v}" for k, v in validator.errors.items()] @@ -49,7 +56,6 @@ def validate_config(config: dict) -> bool: ConfigValidationException Raised when the configuration file fails validation. """ - # Load the schema schema = bu.load_package_data("config-schema.yml") @@ -67,7 +73,19 @@ def validate_config(config: dict) -> bool: raise ConfigValidationException(v) -def read_optional_data(config: dict): +def read_optional_key(config: dict): + """Fills all optional keys in config if not already defined. + + The default values for most optional keys are loaded from `internal.py` + Note: We need to ensure that the `name` key for realisations exists for + other modules, but it doesn't have a default value. So we set it to + `None` by default. + + Parameters + ---------- + config : dict + The configuration file with with/without optional keys + """ if "realisations" in config: for r in config["realisations"]: r["name"] = r.get("name") @@ -89,6 +107,26 @@ def read_optional_data(config: dict): ) +def read_config_file(config_path: str) -> dict: + """Load the config file in a dict. + + Parameters + ---------- + config_path : str + Path to the configuration file. + + Returns + ------- + dict + Configuration dict + """ + # Load the configuration file. + with Path.open(Path(config_path), "r", encoding="utf-8") as file: + config = yaml.safe_load(file) + + return config + + def read_config(config_path: str) -> dict: """Reads the config file and returns a dictionary containing the configurations. @@ -100,20 +138,17 @@ def read_config(config_path: str) -> dict: Returns ------- dict - Configuration dict. + Validated configuration dict, with default optional parameters if not specified in file. Raises ------ ConfigValidationError Raised when the configuration file fails validation. """ - - # Load the configuration file. - with open(Path(config_path), "r", encoding="utf-8") as file: - config = yaml.safe_load(file) - - read_optional_data(config) - + # Read configuration file + config = read_config_file(config_path) + # Populate configuration dict with optional keys + read_optional_key(config) # Validate and return. validate_config(config) return config From 7a364a6314feb59efeabc01ccaf3d33ecc0b6e29 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 24 Jan 2024 12:20:17 +1100 Subject: [PATCH 25/50] Apply ruff linting style in config module --- benchcab/config.py | 12 ++++++++---- tests/test_config.py | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/benchcab/config.py b/benchcab/config.py index e32dc536..fb06da2a 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -13,14 +13,18 @@ class PBSConfig(TypedDict): + """Default parameters for PBS runs via benchcab.""" + ncpus: int mem: str walltime: str storage: str -class ConfigValidationException(Exception): +class ConfigValidationError(Exception): + """When config doesn't match with the defined schema.""" + def __init__(self, validator: Validator): - """Config validation exception. + """. Parameters ---------- @@ -70,7 +74,7 @@ def validate_config(config: dict) -> bool: return True # Invalid - raise ConfigValidationException(v) + raise ConfigValidationError(v) def read_optional_key(config: dict): @@ -123,7 +127,7 @@ def read_config_file(config_path: str) -> dict: # Load the configuration file. with Path.open(Path(config_path), "r", encoding="utf-8") as file: config = yaml.safe_load(file) - + return config diff --git a/tests/test_config.py b/tests/test_config.py index ddc7831e..2727e568 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -81,7 +81,7 @@ def test_validate_config_valid(): def test_validate_config_invalid(): """Test validate_config() for an invalid config file.""" invalid_config = bu.load_package_data("test/config-invalid.yml") - with pytest.raises(bc.ConfigValidationException): + with pytest.raises(bc.ConfigValidationError): bc.validate_config(invalid_config) From 22d9477f823098ba36662c5dc1f9b85c5f1ea485 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 24 Jan 2024 12:50:13 +1100 Subject: [PATCH 26/50] Remove tests for partial pbs_config keys --- benchcab/internal.py | 5 +++-- benchcab/utils/pbs.py | 8 ++------ tests/test_pbs.py | 19 +------------------ 3 files changed, 6 insertions(+), 26 deletions(-) diff --git a/benchcab/internal.py b/benchcab/internal.py index 365f4481..31840669 100644 --- a/benchcab/internal.py +++ b/benchcab/internal.py @@ -5,7 +5,8 @@ import os from pathlib import Path -from typing import Any + +from benchcab.config import PBSConfig _, NODENAME, _, _, _ = os.uname() @@ -13,7 +14,7 @@ # Parameters for job script: QSUB_FNAME = "benchmark_cable_qsub.sh" -FLUXSITE_DEFAULT_PBS: Any = { +FLUXSITE_DEFAULT_PBS: PBSConfig = { "ncpus": 18, "mem": "30GB", "walltime": "6:00:00", diff --git a/benchcab/utils/pbs.py b/benchcab/utils/pbs.py index b85a7aff..20369c9c 100644 --- a/benchcab/utils/pbs.py +++ b/benchcab/utils/pbs.py @@ -4,6 +4,7 @@ """Contains helper functions for manipulating PBS job scripts.""" from benchcab import internal +from benchcab.config import PBSConfig def render_job_script( @@ -11,7 +12,7 @@ def render_job_script( config_path: str, modules: list, benchcab_path: str, - pbs_config: dict, + pbs_config: PBSConfig, verbose=False, skip_bitwise_cmp=False, ) -> str: @@ -20,11 +21,6 @@ def render_job_script( This includes things such as running CABLE and running bitwise comparison jobs between model output files. """ - pbs_missing_keys = internal.FLUXSITE_DEFAULT_PBS.keys() - pbs_config.keys() - if len(pbs_missing_keys) != 0: - msg = f"Default pbs parameters missing: {sorted(pbs_missing_keys)}" - raise ValueError(msg) - module_load_lines = "\n".join( f"module load {module_name}" for module_name in modules ) diff --git a/tests/test_pbs.py b/tests/test_pbs.py index 2eda43e9..4f836926 100644 --- a/tests/test_pbs.py +++ b/tests/test_pbs.py @@ -148,21 +148,4 @@ def test_pbs_config_parameters(self): /absolute/path/to/benchcab fluxsite-run-tasks --config=/path/to/config.yaml """ - ) - - def test_default_pbs_config(self): - """Failure case: if any key(s) of pbs_config is/are empty, fail the test.""" - pbs_missing_keys = ["mem", "ncpus", "storage"] - with pytest.raises( - ValueError, - match="Default pbs parameters missing: " - + re.escape(str(pbs_missing_keys)), - ): - render_job_script( - project="tm70", - config_path="/path/to/config.yaml", - modules=["foo", "bar", "baz"], - skip_bitwise_cmp=True, - benchcab_path="/absolute/path/to/benchcab", - pbs_config={"walltime": "48:00:00"}, - ) + ) \ No newline at end of file From b79a2614139a65342cf2acacf461fb45b8eb4afc Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Thu, 25 Jan 2024 02:09:33 +1100 Subject: [PATCH 27/50] Add parameterised-fixture tests --- tests/test_config.py | 212 ++++++++++++++++++++++++------------------- 1 file changed, 121 insertions(+), 91 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 2727e568..41720e04 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,5 +1,6 @@ """`pytest` tests for config.py.""" -from copy import deepcopy +from contextlib import nullcontext as does_not_raise +from pathlib import Path from pprint import pformat import pytest @@ -7,102 +8,131 @@ import benchcab.config as bc import benchcab.utils as bu -no_optional_config = { - "project": "w97", - "modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"], - "realisations": [ - {"repo": {"svn": {"branch_path": "trunk"}}}, - { - "repo": {"svn": {"branch_path": "branches/Users/ccc561/v3.0-YP-changes"}}, - }, - ], -} - -all_optional_config = { - "project": "w97", - "fluxsite": { - "experiment": "forty-two-site-test", - "multiprocess": True, - "pbs": {"ncpus": 18, "mem": "30GB", "walltime": "6:00:00", "storage": []}, - }, - "modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"], - "realisations": [ - {"name": None, "repo": {"svn": {"branch_path": "trunk"}}}, - { - "name": None, - "repo": {"svn": {"branch_path": "branches/Users/ccc561/v3.0-YP-changes"}}, + +@pytest.fixture() +def config_str(request) -> str: + """Provide relative YAML path from data files.""" + return f"test/{request.param}" + + +@pytest.fixture() +def config_path(config_str: str) -> Path: + """Provide absolute YAML Path object from data files.""" + return bu.get_installed_root() / "data" / config_str + + +@pytest.fixture() +def empty_config() -> dict: + """Empty dict Configuration.""" + return {} + + +@pytest.fixture() +def no_optional_config() -> dict: + """Config with no optional parameters.""" + return { + "project": "w97", + "modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"], + "realisations": [ + {"repo": {"svn": {"branch_path": "trunk"}}}, + { + "repo": { + "svn": {"branch_path": "branches/Users/ccc561/v3.0-YP-changes"} + }, + }, + ], + } + + +@pytest.fixture() +def all_optional_config() -> dict: + """Config with all optional parameters.""" + return { + "project": "w97", + "fluxsite": { + "experiment": "forty-two-site-test", + "multiprocess": True, + "pbs": {"ncpus": 18, "mem": "30GB", "walltime": "6:00:00", "storage": []}, }, + "modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"], + "realisations": [ + {"name": None, "repo": {"svn": {"branch_path": "trunk"}}}, + { + "name": None, + "repo": { + "svn": {"branch_path": "branches/Users/ccc561/v3.0-YP-changes"} + }, + }, + ], + "science_configurations": [ + { + "cable": { + "cable_user": {"FWSOIL_SWITCH": "Haverd2013", "GS_SWITCH": "medlyn"} + } + }, + { + "cable": { + "cable_user": { + "FWSOIL_SWITCH": "Haverd2013", + "GS_SWITCH": "leuning", + } + } + }, + { + "cable": { + "cable_user": {"FWSOIL_SWITCH": "standard", "GS_SWITCH": "medlyn"} + } + }, + { + "cable": { + "cable_user": {"FWSOIL_SWITCH": "standard", "GS_SWITCH": "leuning"} + } + }, + ], + } + + +@pytest.mark.parametrize( + ("config_str", "output_config", "pytest_error"), + [ + ("config-valid.yml", "no_optional_config", does_not_raise()), + ("config-missing.yml", "empty_config", pytest.raises(FileNotFoundError)), ], - "science_configurations": [ - { - "cable": { - "cable_user": {"FWSOIL_SWITCH": "Haverd2013", "GS_SWITCH": "medlyn"} - } - }, - { - "cable": { - "cable_user": {"FWSOIL_SWITCH": "Haverd2013", "GS_SWITCH": "leuning"} - } - }, - {"cable": {"cable_user": {"FWSOIL_SWITCH": "standard", "GS_SWITCH": "medlyn"}}}, - { - "cable": { - "cable_user": {"FWSOIL_SWITCH": "standard", "GS_SWITCH": "leuning"} - } - }, + indirect=["config_str"], +) +def test_read_config_file(config_path, output_config, pytest_error, request): + """Test read_config_file() for a file that may/may not exist.""" + with pytest_error: + config = bc.read_config_file(config_path) + assert pformat(config) == pformat(request.getfixturevalue(output_config)) + + +@pytest.mark.parametrize( + ("config_str", "pytest_error"), + [ + ("config-valid.yml", does_not_raise()), + ("config-invalid.yml", pytest.raises(bc.ConfigValidationError)), ], -} - - -def test_read_config_file_pass(): - """Test read_config() reads an existing file.""" - existent_path = bu.get_installed_root() / "data" / "test" / "config-valid.yml" - - # Test for a path that exists - config = bc.read_config_file(existent_path) - assert pformat(config) == pformat(no_optional_config) - - -def test_read_config_file_fail(): - """Test that read_config() does not work for a non-existent file.""" - nonexistent_path = bu.get_installed_root() / "data" / "test" / "config-missing.yml" - - # Test for a path that does not exist. - with pytest.raises(FileNotFoundError): - _ = bc.read_config_file(nonexistent_path) - - -def test_validate_config_valid(): - """Test validate_config() for a valid config file.""" - valid_config = bu.load_package_data("test/config-valid.yml") - assert bc.validate_config(valid_config) - - -def test_validate_config_invalid(): - """Test validate_config() for an invalid config file.""" - invalid_config = bu.load_package_data("test/config-invalid.yml") - with pytest.raises(bc.ConfigValidationError): - bc.validate_config(invalid_config) - - -def test_read_optional_key_add_data(): - """Test default key-values are added if not provided by config.yaml.""" + indirect=["config_str"], +) +def test_validate_config(config_str, pytest_error): + """Test validate_config() for a valid/invalid config file.""" + with pytest_error: + config = bu.load_package_data(config_str) + assert bc.validate_config(config) + + +@pytest.mark.parametrize("input_config", ["no_optional_config", "all_optional_config"]) +def test_read_optional_key_add_data(input_config, all_optional_config, request): + """Test default key-values are added if not provided by config.yaml, and existing keys stay intact.""" # Config having no optional keys - config = deepcopy(no_optional_config) - bc.read_optional_key(config) - assert pformat(config) == pformat(all_optional_config) - - -def test_read_optional_key_same_data(): - """Test optional key-values are unchanged if provided by config.yaml.""" - # Config having all optional keys - config = deepcopy(all_optional_config) + config = request.getfixturevalue(input_config) bc.read_optional_key(config) assert pformat(config) == pformat(all_optional_config) -def test_read_config(): +@pytest.mark.parametrize("config_str", ["config-valid.yml"], indirect=["config_str"]) +def test_read_config(config_path, all_optional_config): """Test overall behaviour of read_config.""" - expected_config = deepcopy(all_optional_config) - path = bu.get_installed_root() / "data" / "test" / "config-valid.yml" - assert pformat(bc.read_config(path)) == pformat(expected_config) + output_config = bc.read_config(config_path) + assert pformat(output_config) == pformat(all_optional_config) From eb57eefd391a1c0c09d614d06452d3d2827adc4e Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Thu, 25 Jan 2024 11:25:04 +1100 Subject: [PATCH 28/50] Remove circular dependency for PBSConfig --- benchcab/config.py | 9 --------- benchcab/internal.py | 2 +- benchcab/utils/pbs.py | 13 ++++++++++--- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/benchcab/config.py b/benchcab/config.py index fb06da2a..34b945f1 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -3,7 +3,6 @@ """A module containing all *_config() functions.""" from pathlib import Path -from typing import TypedDict import yaml from cerberus import Validator @@ -12,14 +11,6 @@ from benchcab import internal -class PBSConfig(TypedDict): - """Default parameters for PBS runs via benchcab.""" - - ncpus: int - mem: str - walltime: str - storage: str - class ConfigValidationError(Exception): """When config doesn't match with the defined schema.""" diff --git a/benchcab/internal.py b/benchcab/internal.py index 819915c3..89056c5e 100644 --- a/benchcab/internal.py +++ b/benchcab/internal.py @@ -6,7 +6,7 @@ import os from pathlib import Path -from benchcab.config import PBSConfig +from benchcab.utils.pbs import PBSConfig _, NODENAME, _, _, _ = os.uname() diff --git a/benchcab/utils/pbs.py b/benchcab/utils/pbs.py index a1eae4af..c03505ab 100644 --- a/benchcab/utils/pbs.py +++ b/benchcab/utils/pbs.py @@ -3,8 +3,16 @@ """Contains helper functions for manipulating PBS job scripts.""" -from benchcab import internal -from benchcab.config import PBSConfig +from typing import TypedDict + + +class PBSConfig(TypedDict): + """Default parameters for PBS runs via benchcab.""" + + ncpus: int + mem: str + walltime: str + storage: str def render_job_script( @@ -25,7 +33,6 @@ def render_job_script( f"module load {module_name}" for module_name in modules ) verbose_flag = "-v" if verbose else "" - # wd9 is subgroup of gdata/ks32 storage_flags = ["gdata/ks32", "gdata/hh5", "gdata/wd9", *pbs_config["storage"]] return f"""#!/bin/bash #PBS -l wd From 5b6c6fa194cad5b2f7c5302722058befacc91c86 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Thu, 25 Jan 2024 11:26:02 +1100 Subject: [PATCH 29/50] Improve documentation for config tests/user guide --- docs/user_guide/index.md | 3 +++ tests/test_config.py | 7 +++---- tests/test_pbs.py | 5 +---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index e45e4f2c..7a379379 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -20,6 +20,7 @@ To use `benchcab`, you need to join the following projects at NCI: - [ks32][ks32_mynci] - [hh5][hh5_mynci] +- [wd9][wd9_mynci] if not part of the [cable][cable_mynci] ## Installation @@ -262,6 +263,8 @@ Alternatively, you can also access the ACCESS-NRI User support via [the ACCESS-H [hh5_mynci]: https://my.nci.org.au/mancini/project/hh5 [ks32_mynci]: https://my.nci.org.au/mancini/project/ks32 +[wd9_mynci]: https://my.nci.org.au/mancini/project/wd9 +[cable_mynci]: https://my.nci.org.au/mancini/project/cable [bench_example]: https://github.com/CABLE-LSM/bench_example.git [config_options]: config_options.md [forum-support]: https://forum.access-hive.org.au/t/access-help-and-support/908 diff --git a/tests/test_config.py b/tests/test_config.py index 41720e04..818da246 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -11,7 +11,7 @@ @pytest.fixture() def config_str(request) -> str: - """Provide relative YAML path from data files.""" + """Provide relative YAML path string from data files.""" return f"test/{request.param}" @@ -101,7 +101,7 @@ def all_optional_config() -> dict: indirect=["config_str"], ) def test_read_config_file(config_path, output_config, pytest_error, request): - """Test read_config_file() for a file that may/may not exist.""" + """Test reading config for a file that may/may not exist.""" with pytest_error: config = bc.read_config_file(config_path) assert pformat(config) == pformat(request.getfixturevalue(output_config)) @@ -116,7 +116,7 @@ def test_read_config_file(config_path, output_config, pytest_error, request): indirect=["config_str"], ) def test_validate_config(config_str, pytest_error): - """Test validate_config() for a valid/invalid config file.""" + """Test schema for a valid/invalid config file.""" with pytest_error: config = bu.load_package_data(config_str) assert bc.validate_config(config) @@ -125,7 +125,6 @@ def test_validate_config(config_str, pytest_error): @pytest.mark.parametrize("input_config", ["no_optional_config", "all_optional_config"]) def test_read_optional_key_add_data(input_config, all_optional_config, request): """Test default key-values are added if not provided by config.yaml, and existing keys stay intact.""" - # Config having no optional keys config = request.getfixturevalue(input_config) bc.read_optional_key(config) assert pformat(config) == pformat(all_optional_config) diff --git a/tests/test_pbs.py b/tests/test_pbs.py index a1470f5d..8c465208 100644 --- a/tests/test_pbs.py +++ b/tests/test_pbs.py @@ -3,9 +3,6 @@ from benchcab import internal from benchcab.utils.pbs import render_job_script -import pytest -import re - class TestRenderJobScript: """Tests for `render_job_script()`.""" @@ -148,4 +145,4 @@ def test_pbs_config_parameters(self): /absolute/path/to/benchcab fluxsite-run-tasks --config=/path/to/config.yaml """ - ) \ No newline at end of file + ) From 2efcd488a49eb135b486e050f33be7af00fa12f7 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Thu, 25 Jan 2024 12:36:18 +1100 Subject: [PATCH 30/50] Improve documentation for config --- docs/user_guide/index.md | 2 +- tests/test_config.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index 7a379379..ead57da0 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -20,7 +20,7 @@ To use `benchcab`, you need to join the following projects at NCI: - [ks32][ks32_mynci] - [hh5][hh5_mynci] -- [wd9][wd9_mynci] if not part of the [cable][cable_mynci] +- [wd9][wd9_mynci] if not part of the [cable][cable_mynci] project ## Installation diff --git a/tests/test_config.py b/tests/test_config.py index 818da246..a1319f2e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -11,13 +11,13 @@ @pytest.fixture() def config_str(request) -> str: - """Provide relative YAML path string from data files.""" + """Provide relative YAML path string of data files.""" return f"test/{request.param}" @pytest.fixture() def config_path(config_str: str) -> Path: - """Provide absolute YAML Path object from data files.""" + """Provide absolute YAML Path object of data files.""" return bu.get_installed_root() / "data" / config_str From 78a9c86ff20bdfb8b0f9a2e1b29f9bbf3d57a4ed Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Tue, 30 Jan 2024 00:43:03 +1100 Subject: [PATCH 31/50] Add payu in benchcab conda environment --- .conda/benchcab-dev.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.conda/benchcab-dev.yaml b/.conda/benchcab-dev.yaml index 012fc6cb..058bb0c7 100644 --- a/.conda/benchcab-dev.yaml +++ b/.conda/benchcab-dev.yaml @@ -1,9 +1,13 @@ name: benchcab-dev channels: - conda-forge + - accessnri + - coecms - defaults + dependencies: - python=3.9 + - payu - f90nml - netcdf4 - pytest-cov From e3ab664b6032479c1185f8efd399372d2ba4e844 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Tue, 30 Jan 2024 12:24:23 +1100 Subject: [PATCH 32/50] Add dev dependencies in benchcab-dev.yml --- .conda/benchcab-dev.yaml | 13 +++++++++++-- .../mkdocs-requirements.txt | 0 .readthedocs.yaml | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) rename mkdocs-requirements.txt => .conda/mkdocs-requirements.txt (100%) diff --git a/.conda/benchcab-dev.yaml b/.conda/benchcab-dev.yaml index 012fc6cb..44fd9320 100644 --- a/.conda/benchcab-dev.yaml +++ b/.conda/benchcab-dev.yaml @@ -4,10 +4,19 @@ channels: - defaults dependencies: - python=3.9 + - pip - f90nml - netcdf4 - - pytest-cov - pyyaml - flatdict - cerberus>=1.3.5 - - gitpython \ No newline at end of file + - gitpython + # CI + - pytest-cov + # Dev Dependencies + - pytest + - mypy + - black + - ruff + - pip: + - -r mkdocs-requirements.txt \ No newline at end of file diff --git a/mkdocs-requirements.txt b/.conda/mkdocs-requirements.txt similarity index 100% rename from mkdocs-requirements.txt rename to .conda/mkdocs-requirements.txt diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 7e354bcb..495ffc0a 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -17,4 +17,4 @@ mkdocs: # Optionally declare the Python requirements required to build your docs python: install: - - requirements: mkdocs-requirements.txt \ No newline at end of file + - requirements: .conda/mkdocs-requirements.txt \ No newline at end of file From 3beb5e26da9c50df8fca7d2134868db69fd2a9ad Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Tue, 30 Jan 2024 11:56:23 +1100 Subject: [PATCH 33/50] Make payu compatible in CI/CD --- .conda/build_env.yaml | 2 ++ .github/workflows/ci.yaml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.conda/build_env.yaml b/.conda/build_env.yaml index 94fc1f21..adf3ba6f 100644 --- a/.conda/build_env.yaml +++ b/.conda/build_env.yaml @@ -2,6 +2,8 @@ channels: - conda-forge + - accessnri + - coecms - default dependencies: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index eae22c34..0312026f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -44,4 +44,4 @@ jobs: - name: Test conda build run: | conda install conda-build - conda build -c conda-forge . \ No newline at end of file + conda build -c conda-forge -c accessnri -c coecms . \ No newline at end of file From 80ed34bfa7f2b64e680c4c8f626e9120f186e919 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Thu, 18 Jan 2024 11:06:35 +1100 Subject: [PATCH 34/50] Improve documentation and add tests for optional project key Make `project` keyword optional in `config.yml` Improve documentation for config Move checks for optional project keyword, add documentation and tests Add tests for optional project key Remove comment for adding validation checks for project and move to another issue --- benchcab/benchcab.py | 1 + benchcab/config.py | 10 ++ benchcab/data/config-schema.yml | 1 + .../{config-valid.yml => config-basic.yml} | 2 +- benchcab/data/test/config-invalid.yml | 1 - benchcab/data/test/config-optional.yml | 35 ++++ benchcab/internal.py | 2 +- docs/user_guide/config_options.md | 3 +- docs/user_guide/index.md | 2 +- tests/test_config.py | 152 ++++++++++++------ 10 files changed, 154 insertions(+), 55 deletions(-) rename benchcab/data/test/{config-valid.yml => config-basic.yml} (93%) create mode 100644 benchcab/data/test/config-optional.yml diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 0583198a..3ff79325 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -133,6 +133,7 @@ def fluxsite_submit_job( ) -> None: """Submits the PBS job script step in the fluxsite test workflow.""" config = self._get_config(config_path) + self._validate_environment(project=config["project"], modules=config["modules"]) if self.benchcab_exe_path is None: msg = "Path to benchcab executable is undefined." diff --git a/benchcab/config.py b/benchcab/config.py index 34b945f1..7c693db4 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 """A module containing all *_config() functions.""" +import os +import sys from pathlib import Path import yaml @@ -81,6 +83,14 @@ def read_optional_key(config: dict): config : dict The configuration file with with/without optional keys """ + if "project" not in config: + if "PROJECT" not in os.environ: + msg = """Couldn't resolve project: check 'project' in config.yaml + and/or $PROJECT set in ~/.config/gadi-login.conf + """ + raise ValueError(msg) + config["project"] = os.environ["PROJECT"] + if "realisations" in config: for r in config["realisations"]: r["name"] = r.get("name") diff --git a/benchcab/data/config-schema.yml b/benchcab/data/config-schema.yml index 87235f87..280c5601 100644 --- a/benchcab/data/config-schema.yml +++ b/benchcab/data/config-schema.yml @@ -1,5 +1,6 @@ project: type: "string" + required: false modules: type: "list" diff --git a/benchcab/data/test/config-valid.yml b/benchcab/data/test/config-basic.yml similarity index 93% rename from benchcab/data/test/config-valid.yml rename to benchcab/data/test/config-basic.yml index e379119f..846e64ff 100644 --- a/benchcab/data/test/config-valid.yml +++ b/benchcab/data/test/config-basic.yml @@ -15,7 +15,7 @@ # # Strings can be given with or without double or single quotes. -project: w97 +# Uses minimal set of parameters required to run benchcab realisations: - repo: diff --git a/benchcab/data/test/config-invalid.yml b/benchcab/data/test/config-invalid.yml index 7657de60..d0781ddc 100644 --- a/benchcab/data/test/config-invalid.yml +++ b/benchcab/data/test/config-invalid.yml @@ -1,5 +1,4 @@ # A sample configuration that should fail validation. -project: w97 fluxsite: experiment: NON EXISTENT EXPERIMENT!!! diff --git a/benchcab/data/test/config-optional.yml b/benchcab/data/test/config-optional.yml new file mode 100644 index 00000000..a8c51abd --- /dev/null +++ b/benchcab/data/test/config-optional.yml @@ -0,0 +1,35 @@ +# Config with optional data +project: optional + +fluxsite: + experiment: AU-Tum + multiprocess: False + pbs: + ncpus: 6 + mem: 10GB + walltime: "10:00:00" + storage: + - scratch/$PROJECT + +science_configurations: + - cable: + cable_user: + GS_SWITCH: "test_gs" + FWSOIL_SWITCH: "test_fw" + +realisations: + - repo: + svn: + branch_path: trunk + name: svn_trunk + - repo: + svn: + branch_path: branches/Users/ccc561/v3.0-YP-changes + name: git_branch + + +modules: [ + intel-compiler/2021.1.1, + netcdf/4.7.4, + openmpi/4.1.0 +] \ No newline at end of file diff --git a/benchcab/internal.py b/benchcab/internal.py index 89056c5e..7aa0e19a 100644 --- a/benchcab/internal.py +++ b/benchcab/internal.py @@ -10,7 +10,7 @@ _, NODENAME, _, _, _ = os.uname() -CONFIG_REQUIRED_KEYS = ["realisations", "project", "modules", "experiment"] +CONFIG_REQUIRED_KEYS = ["realisations", "modules", "experiment"] # Parameters for job script: QSUB_FNAME = "benchmark_cable_qsub.sh" diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index 1d22c314..b1ee911c 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -32,7 +32,8 @@ The different running modes of `benchcab` are solely dependent on the options us ## project -: **Default:** _required key, no default_. :octicons-dash-24: NCI project ID to charge the simulations to. +NCI project ID to charge the simulations to. +This key is _optional_. If ID is not provided, the current workspace project - i.e. the environment variable `$PROJECT` will be used. ``` yaml diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index 7a379379..ead57da0 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -20,7 +20,7 @@ To use `benchcab`, you need to join the following projects at NCI: - [ks32][ks32_mynci] - [hh5][hh5_mynci] -- [wd9][wd9_mynci] if not part of the [cable][cable_mynci] +- [wd9][wd9_mynci] if not part of the [cable][cable_mynci] project ## Installation diff --git a/tests/test_config.py b/tests/test_config.py index 818da246..3d799f86 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,23 +1,38 @@ """`pytest` tests for config.py.""" +import os +import re from contextlib import nullcontext as does_not_raise from pathlib import Path from pprint import pformat +from unittest import mock import pytest import benchcab.config as bc +import benchcab.internal as bi import benchcab.utils as bu +# Temporarily set $PROJECT for testing module +OPTIONAL_CONFIG_PROJECT = "tt1" + + +@pytest.fixture(autouse=True) +def _set_project_env_variable(monkeypatch): + # Clear existing environment variables first + with mock.patch.dict(os.environ, clear=True): + monkeypatch.setenv("PROJECT", OPTIONAL_CONFIG_PROJECT) + yield + @pytest.fixture() def config_str(request) -> str: - """Provide relative YAML path string from data files.""" + """Provide relative YAML path string of data files.""" return f"test/{request.param}" @pytest.fixture() def config_path(config_str: str) -> Path: - """Provide absolute YAML Path object from data files.""" + """Provide absolute YAML Path object of data files.""" return bu.get_installed_root() / "data" / config_str @@ -28,10 +43,12 @@ def empty_config() -> dict: @pytest.fixture() -def no_optional_config() -> dict: - """Config with no optional parameters.""" +def default_only_config() -> dict: + """Config with no optional parameters. + + Reads from config-basic.yml + """ return { - "project": "w97", "modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"], "realisations": [ {"repo": {"svn": {"branch_path": "trunk"}}}, @@ -45,57 +62,65 @@ def no_optional_config() -> dict: @pytest.fixture() -def all_optional_config() -> dict: - """Config with all optional parameters.""" - return { - "project": "w97", +def all_optional_default_config(default_only_config) -> dict: + """Config with all optional parameters set as default. + + Reads from config-basic.yml + """ + config = default_only_config | { + "project": OPTIONAL_CONFIG_PROJECT, "fluxsite": { - "experiment": "forty-two-site-test", - "multiprocess": True, - "pbs": {"ncpus": 18, "mem": "30GB", "walltime": "6:00:00", "storage": []}, + "experiment": bi.FLUXSITE_DEFAULT_EXPERIMENT, + "multiprocess": bi.FLUXSITE_DEFAULT_MULTIPROCESS, + "pbs": bi.FLUXSITE_DEFAULT_PBS, }, - "modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"], - "realisations": [ - {"name": None, "repo": {"svn": {"branch_path": "trunk"}}}, - { - "name": None, - "repo": { - "svn": {"branch_path": "branches/Users/ccc561/v3.0-YP-changes"} - }, + "science_configurations": bi.DEFAULT_SCIENCE_CONFIGURATIONS, + } + for c_r in config["realisations"]: + c_r["name"] = None + + return config + + +@pytest.fixture() +def all_optional_custom_config(default_only_config) -> dict: + """Config with custom optional parameters. + + Reads from config-optional.yml + """ + config = default_only_config | { + "project": "optional", + "fluxsite": { + "experiment": "AU-Tum", + "multiprocess": False, + "pbs": { + "ncpus": 6, + "mem": "10GB", + "walltime": "10:00:00", + "storage": ["scratch/$PROJECT"], }, - ], + }, "science_configurations": [ { "cable": { - "cable_user": {"FWSOIL_SWITCH": "Haverd2013", "GS_SWITCH": "medlyn"} + "cable_user": {"FWSOIL_SWITCH": "test_fw", "GS_SWITCH": "test_gs"} } - }, - { - "cable": { - "cable_user": { - "FWSOIL_SWITCH": "Haverd2013", - "GS_SWITCH": "leuning", - } - } - }, - { - "cable": { - "cable_user": {"FWSOIL_SWITCH": "standard", "GS_SWITCH": "medlyn"} - } - }, - { - "cable": { - "cable_user": {"FWSOIL_SWITCH": "standard", "GS_SWITCH": "leuning"} - } - }, + } ], } + branch_names = ["svn_trunk", "git_branch"] + + for c_r, b_n in zip(config["realisations"], branch_names): + c_r["name"] = b_n + + return config @pytest.mark.parametrize( ("config_str", "output_config", "pytest_error"), [ - ("config-valid.yml", "no_optional_config", does_not_raise()), + ("config-basic.yml", "default_only_config", does_not_raise()), + ("config-optional.yml", "all_optional_custom_config", does_not_raise()), ("config-missing.yml", "empty_config", pytest.raises(FileNotFoundError)), ], indirect=["config_str"], @@ -110,7 +135,8 @@ def test_read_config_file(config_path, output_config, pytest_error, request): @pytest.mark.parametrize( ("config_str", "pytest_error"), [ - ("config-valid.yml", does_not_raise()), + ("config-basic.yml", does_not_raise()), + ("config-optional.yml", does_not_raise()), ("config-invalid.yml", pytest.raises(bc.ConfigValidationError)), ], indirect=["config_str"], @@ -122,16 +148,42 @@ def test_validate_config(config_str, pytest_error): assert bc.validate_config(config) -@pytest.mark.parametrize("input_config", ["no_optional_config", "all_optional_config"]) -def test_read_optional_key_add_data(input_config, all_optional_config, request): +@pytest.mark.parametrize( + ("input_config", "output_config"), + [ + ("default_only_config", "all_optional_default_config"), + ("all_optional_default_config", "all_optional_default_config"), + ("all_optional_custom_config", "all_optional_custom_config"), + ], +) +def test_read_optional_key_add_data(input_config, output_config, request): """Test default key-values are added if not provided by config.yaml, and existing keys stay intact.""" config = request.getfixturevalue(input_config) bc.read_optional_key(config) - assert pformat(config) == pformat(all_optional_config) + assert pformat(config) == pformat(request.getfixturevalue(output_config)) + + +def test_no_project(default_only_config, monkeypatch): + """If project key and $PROJECT are not provided, then raise error.""" + monkeypatch.delenv("PROJECT") + error_msg = re.escape( + """Couldn't resolve project: check 'project' in config.yaml + and/or $PROJECT set in ~/.config/gadi-login.conf + """ + ) + with pytest.raises(ValueError, match=error_msg): + bc.read_optional_key(default_only_config) -@pytest.mark.parametrize("config_str", ["config-valid.yml"], indirect=["config_str"]) -def test_read_config(config_path, all_optional_config): +@pytest.mark.parametrize( + ("config_str", "output_config"), + [ + ("config-basic.yml", "all_optional_default_config"), + ("config-optional.yml", "all_optional_custom_config"), + ], + indirect=["config_str"], +) +def test_read_config(config_path, output_config, request): """Test overall behaviour of read_config.""" - output_config = bc.read_config(config_path) - assert pformat(output_config) == pformat(all_optional_config) + config = bc.read_config(config_path) + assert pformat(config) == pformat(request.getfixturevalue(output_config)) From 1f1f8271bcd045a4deda4b82a21fbb7adc0063e4 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Mon, 29 Jan 2024 21:31:50 +1100 Subject: [PATCH 35/50] Add functionality to validate project groups in config --- benchcab/config.py | 15 +++++++++++-- benchcab/data/test/config-optional.yml | 2 +- tests/test_config.py | 30 +++++++++++++++++++++----- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/benchcab/config.py b/benchcab/config.py index 7c693db4..1602b919 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -91,6 +91,17 @@ def read_optional_key(config: dict): raise ValueError(msg) config["project"] = os.environ["PROJECT"] + # Directory List is obtained from Gadi Resources - https://opus.nci.org.au/display/Help/0.+Welcome+to+Gadi + data_dirs = ["/g/data", "/scratch"] + groups = list( + set([group for data_dir in data_dirs for group in os.listdir(data_dir)]) + ) + + if config["project"] not in groups: + msg = f"User is not a member of project [{config['project']}]: Check if project key is correct" + + raise ValueError(msg) + if "realisations" in config: for r in config["realisations"]: r["name"] = r.get("name") @@ -152,8 +163,8 @@ def read_config(config_path: str) -> dict: """ # Read configuration file config = read_config_file(config_path) - # Populate configuration dict with optional keys - read_optional_key(config) # Validate and return. validate_config(config) + # Populate configuration dict with optional keys + read_optional_key(config) return config diff --git a/benchcab/data/test/config-optional.yml b/benchcab/data/test/config-optional.yml index a8c51abd..79b83a42 100644 --- a/benchcab/data/test/config-optional.yml +++ b/benchcab/data/test/config-optional.yml @@ -1,5 +1,5 @@ # Config with optional data -project: optional +project: hh5 fluxsite: experiment: AU-Tum diff --git a/tests/test_config.py b/tests/test_config.py index 3d799f86..221129eb 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -13,7 +13,7 @@ import benchcab.utils as bu # Temporarily set $PROJECT for testing module -OPTIONAL_CONFIG_PROJECT = "tt1" +OPTIONAL_CONFIG_PROJECT = "ks32" @pytest.fixture(autouse=True) @@ -24,6 +24,13 @@ def _set_project_env_variable(monkeypatch): yield +@pytest.fixture(autouse=True) +def _set_project_validation_dirs(): + with mock.patch("os.listdir") as mocked_listdir: + mocked_listdir.return_value = ["hh5", OPTIONAL_CONFIG_PROJECT] + yield + + @pytest.fixture() def config_str(request) -> str: """Provide relative YAML path string of data files.""" @@ -89,7 +96,7 @@ def all_optional_custom_config(default_only_config) -> dict: Reads from config-optional.yml """ config = default_only_config | { - "project": "optional", + "project": "hh5", "fluxsite": { "experiment": "AU-Tum", "multiprocess": False, @@ -163,15 +170,28 @@ def test_read_optional_key_add_data(input_config, output_config, request): assert pformat(config) == pformat(request.getfixturevalue(output_config)) -def test_no_project(default_only_config, monkeypatch): +def test_no_project_name(default_only_config, monkeypatch): """If project key and $PROJECT are not provided, then raise error.""" monkeypatch.delenv("PROJECT") - error_msg = re.escape( + err_msg = re.escape( """Couldn't resolve project: check 'project' in config.yaml and/or $PROJECT set in ~/.config/gadi-login.conf """ ) - with pytest.raises(ValueError, match=error_msg): + with pytest.raises(ValueError, match=err_msg): + bc.read_optional_key(default_only_config) + + +def test_user_not_in_project(default_only_config): + """If user is not in viewable NCI projects, raise error.""" + default_only_config["project"] = "non_existing" + err_msg = re.escape( + "User is not a member of project [non_existing]: Check if project key is correct" + ) + with pytest.raises( + ValueError, + match=err_msg, + ): bc.read_optional_key(default_only_config) From b78294bacf9f2382194a154eba2a7ab2939ddff6 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Tue, 30 Jan 2024 11:25:14 +1100 Subject: [PATCH 36/50] Improve code, documentation and modularize test cases for config --- benchcab/benchcab.py | 1 - benchcab/config.py | 11 +++-- benchcab/internal.py | 6 ++- tests/test_config.py | 115 ++++++++++++++++++++++--------------------- 4 files changed, 72 insertions(+), 61 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 3ff79325..0583198a 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -133,7 +133,6 @@ def fluxsite_submit_job( ) -> None: """Submits the PBS job script step in the fluxsite test workflow.""" config = self._get_config(config_path) - self._validate_environment(project=config["project"], modules=config["modules"]) if self.benchcab_exe_path is None: msg = "Path to benchcab executable is undefined." diff --git a/benchcab/config.py b/benchcab/config.py index 1602b919..93566895 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -3,7 +3,6 @@ """A module containing all *_config() functions.""" import os -import sys from pathlib import Path import yaml @@ -91,10 +90,14 @@ def read_optional_key(config: dict): raise ValueError(msg) config["project"] = os.environ["PROJECT"] - # Directory List is obtained from Gadi Resources - https://opus.nci.org.au/display/Help/0.+Welcome+to+Gadi - data_dirs = ["/g/data", "/scratch"] groups = list( - set([group for data_dir in data_dirs for group in os.listdir(data_dir)]) + set( + [ + group + for data_dir in internal.USER_PROJECT_DIRS + for group in os.listdir(data_dir) + ] + ) ) if config["project"] not in groups: diff --git a/benchcab/internal.py b/benchcab/internal.py index 7aa0e19a..6ccabaa9 100644 --- a/benchcab/internal.py +++ b/benchcab/internal.py @@ -10,7 +10,7 @@ _, NODENAME, _, _, _ = os.uname() -CONFIG_REQUIRED_KEYS = ["realisations", "modules", "experiment"] +CONFIG_REQUIRED_KEYS = ["realisations", "modules"] # Parameters for job script: QSUB_FNAME = "benchmark_cable_qsub.sh" @@ -28,6 +28,10 @@ # Path to the user's current working directory CWD = Path.cwd() +# Directory List is obtained from Gadi User Guide in Section - Gadi Resources +# https://opus.nci.org.au/display/Help/0.+Welcome+to+Gadi +USER_PROJECT_DIRS = ["/g/data", "/scratch"] + # Path to the user's home directory HOME_DIR = Path(os.environ["HOME"]) diff --git a/tests/test_config.py b/tests/test_config.py index 221129eb..5165c25e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -12,10 +12,21 @@ import benchcab.internal as bi import benchcab.utils as bu -# Temporarily set $PROJECT for testing module +NO_OPTIONAL_CONFIG_PROJECT = "hh5" OPTIONAL_CONFIG_PROJECT = "ks32" +# Temporarily set $PROJECT for testing module +@pytest.fixture(autouse=True, scope="module") +def _set_project_validation_dirs(): + with mock.patch("os.listdir") as mocked_listdir: + mocked_listdir.return_value = [ + NO_OPTIONAL_CONFIG_PROJECT, + OPTIONAL_CONFIG_PROJECT, + ] + yield + + @pytest.fixture(autouse=True) def _set_project_env_variable(monkeypatch): # Clear existing environment variables first @@ -24,13 +35,6 @@ def _set_project_env_variable(monkeypatch): yield -@pytest.fixture(autouse=True) -def _set_project_validation_dirs(): - with mock.patch("os.listdir") as mocked_listdir: - mocked_listdir.return_value = ["hh5", OPTIONAL_CONFIG_PROJECT] - yield - - @pytest.fixture() def config_str(request) -> str: """Provide relative YAML path string of data files.""" @@ -50,10 +54,10 @@ def empty_config() -> dict: @pytest.fixture() -def default_only_config() -> dict: +def no_optional_config() -> dict: """Config with no optional parameters. - Reads from config-basic.yml + Expected value after reading from config-basic.yml """ return { "modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"], @@ -69,12 +73,12 @@ def default_only_config() -> dict: @pytest.fixture() -def all_optional_default_config(default_only_config) -> dict: - """Config with all optional parameters set as default. +def all_optional_default_config(no_optional_config) -> dict: + """Populate all keys in config with default optional values. - Reads from config-basic.yml + Expected value after reading from config-basic.yml """ - config = default_only_config | { + config = no_optional_config | { "project": OPTIONAL_CONFIG_PROJECT, "fluxsite": { "experiment": bi.FLUXSITE_DEFAULT_EXPERIMENT, @@ -90,13 +94,13 @@ def all_optional_default_config(default_only_config) -> dict: @pytest.fixture() -def all_optional_custom_config(default_only_config) -> dict: - """Config with custom optional parameters. +def all_optional_custom_config(no_optional_config) -> dict: + """Populate all keys in config with custom optional values. - Reads from config-optional.yml + Expected value after reading from config-optional.yml """ - config = default_only_config | { - "project": "hh5", + config = no_optional_config | { + "project": NO_OPTIONAL_CONFIG_PROJECT, "fluxsite": { "experiment": "AU-Tum", "multiprocess": False, @@ -126,7 +130,7 @@ def all_optional_custom_config(default_only_config) -> dict: @pytest.mark.parametrize( ("config_str", "output_config", "pytest_error"), [ - ("config-basic.yml", "default_only_config", does_not_raise()), + ("config-basic.yml", "no_optional_config", does_not_raise()), ("config-optional.yml", "all_optional_custom_config", does_not_raise()), ("config-missing.yml", "empty_config", pytest.raises(FileNotFoundError)), ], @@ -155,44 +159,45 @@ def test_validate_config(config_str, pytest_error): assert bc.validate_config(config) -@pytest.mark.parametrize( - ("input_config", "output_config"), - [ - ("default_only_config", "all_optional_default_config"), - ("all_optional_default_config", "all_optional_default_config"), - ("all_optional_custom_config", "all_optional_custom_config"), - ], -) -def test_read_optional_key_add_data(input_config, output_config, request): - """Test default key-values are added if not provided by config.yaml, and existing keys stay intact.""" - config = request.getfixturevalue(input_config) - bc.read_optional_key(config) - assert pformat(config) == pformat(request.getfixturevalue(output_config)) - +class TestReadOptionalKey: + """Tests related to adding optional keys in config.""" -def test_no_project_name(default_only_config, monkeypatch): - """If project key and $PROJECT are not provided, then raise error.""" - monkeypatch.delenv("PROJECT") - err_msg = re.escape( - """Couldn't resolve project: check 'project' in config.yaml - and/or $PROJECT set in ~/.config/gadi-login.conf - """ + @pytest.mark.parametrize( + ("input_config", "output_config"), + [ + ("no_optional_config", "all_optional_default_config"), + ("all_optional_default_config", "all_optional_default_config"), + ("all_optional_custom_config", "all_optional_custom_config"), + ], ) - with pytest.raises(ValueError, match=err_msg): - bc.read_optional_key(default_only_config) - + def test_read_optional_key_add_data(self, input_config, output_config, request): + """Test default key-values are added if not provided by config.yaml, and existing keys stay intact.""" + config = request.getfixturevalue(input_config) + bc.read_optional_key(config) + assert pformat(config) == pformat(request.getfixturevalue(output_config)) -def test_user_not_in_project(default_only_config): - """If user is not in viewable NCI projects, raise error.""" - default_only_config["project"] = "non_existing" - err_msg = re.escape( - "User is not a member of project [non_existing]: Check if project key is correct" - ) - with pytest.raises( - ValueError, - match=err_msg, - ): - bc.read_optional_key(default_only_config) + def test_no_project_name(self, no_optional_config, monkeypatch): + """If project key and $PROJECT are not provided, then raise error.""" + monkeypatch.delenv("PROJECT") + err_msg = re.escape( + """Couldn't resolve project: check 'project' in config.yaml + and/or $PROJECT set in ~/.config/gadi-login.conf + """ + ) + with pytest.raises(ValueError, match=err_msg): + bc.read_optional_key(no_optional_config) + + def test_user_not_in_project(self, no_optional_config): + """If user is not in viewable NCI projects, raise error.""" + no_optional_config["project"] = "non_existing" + err_msg = re.escape( + "User is not a member of project [non_existing]: Check if project key is correct" + ) + with pytest.raises( + ValueError, + match=err_msg, + ): + bc.read_optional_key(no_optional_config) @pytest.mark.parametrize( From e3f1f9ebf15c25eba58c5bcc7cfb99cc99f91274 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 31 Jan 2024 15:36:51 +1100 Subject: [PATCH 37/50] Move user membership in project tests to test_benchcab --- benchcab/benchcab.py | 20 +++++---- benchcab/config.py | 26 ++---------- benchcab/internal.py | 4 -- docs/user_guide/config_options.md | 3 +- tests/test_benchcab.py | 67 +++++++++++++++++++++++++++++++ tests/test_config.py | 42 ++++++------------- 6 files changed, 97 insertions(+), 65 deletions(-) create mode 100644 tests/test_benchcab.py diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 0583198a..ae2643f8 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -64,15 +64,21 @@ def _validate_environment(self, project: str, modules: list): ) sys.exit(1) - required_groups = [project, "ks32", "hh5"] + if project is None: + msg = """Couldn't resolve project: check 'project' in config.yaml + and/or $PROJECT set in ~/.config/gadi-login.conf + """ + raise AttributeError(msg) + + required_groups = set([project, "ks32", "hh5"]) groups = [grp.getgrgid(gid).gr_name for gid in os.getgroups()] - if not set(required_groups).issubset(groups): - print( - "Error: user does not have the required group permissions.", - "The required groups are:", - ", ".join(required_groups), + if not required_groups.issubset(groups): + msg = ( + f"""Error: user does not have the required group permissions., + The required groups are:, + {", ".join(required_groups)}""", ) - sys.exit(1) + raise PermissionError(msg) for modname in modules: if not self.modules_handler.module_is_avail(modname): diff --git a/benchcab/config.py b/benchcab/config.py index 93566895..519b62b9 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -83,27 +83,7 @@ def read_optional_key(config: dict): The configuration file with with/without optional keys """ if "project" not in config: - if "PROJECT" not in os.environ: - msg = """Couldn't resolve project: check 'project' in config.yaml - and/or $PROJECT set in ~/.config/gadi-login.conf - """ - raise ValueError(msg) - config["project"] = os.environ["PROJECT"] - - groups = list( - set( - [ - group - for data_dir in internal.USER_PROJECT_DIRS - for group in os.listdir(data_dir) - ] - ) - ) - - if config["project"] not in groups: - msg = f"User is not a member of project [{config['project']}]: Check if project key is correct" - - raise ValueError(msg) + config["project"] = os.environ.get("PROJECT", None) if "realisations" in config: for r in config["realisations"]: @@ -166,8 +146,8 @@ def read_config(config_path: str) -> dict: """ # Read configuration file config = read_config_file(config_path) - # Validate and return. - validate_config(config) # Populate configuration dict with optional keys read_optional_key(config) + # Validate and return. + validate_config(config) return config diff --git a/benchcab/internal.py b/benchcab/internal.py index 6ccabaa9..aeea3e2d 100644 --- a/benchcab/internal.py +++ b/benchcab/internal.py @@ -28,10 +28,6 @@ # Path to the user's current working directory CWD = Path.cwd() -# Directory List is obtained from Gadi User Guide in Section - Gadi Resources -# https://opus.nci.org.au/display/Help/0.+Welcome+to+Gadi -USER_PROJECT_DIRS = ["/g/data", "/scratch"] - # Path to the user's home directory HOME_DIR = Path(os.environ["HOME"]) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index b1ee911c..2026a725 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -32,8 +32,7 @@ The different running modes of `benchcab` are solely dependent on the options us ## project -NCI project ID to charge the simulations to. -This key is _optional_. If ID is not provided, the current workspace project - i.e. the environment variable `$PROJECT` will be used. +: **Default:** user's default project, _optional key_. :octicons-dash-24: NCI project ID to charge the simulations to. The user's default project defined in the $PROJECT environment variable is used by default. ``` yaml diff --git a/tests/test_benchcab.py b/tests/test_benchcab.py new file mode 100644 index 00000000..5fdc3708 --- /dev/null +++ b/tests/test_benchcab.py @@ -0,0 +1,67 @@ +"""`pytest` tests for `benchcab.py`.""" +import re +from contextlib import nullcontext as does_not_raise +from unittest import mock + +import pytest + +from benchcab.benchcab import Benchcab + + +@pytest.fixture(scope="module", autouse=True) +def _set_user_projects(): + with mock.patch("grp.getgrgid") as mocked_getgrid, mock.patch( + "os.getgroups" + ) as mocked_groups: + type(mocked_getgrid.return_value).gr_name = mock.PropertyMock( + return_value="hh5" + ) + mocked_groups.return_value = [1] + yield + + +@pytest.fixture(scope="module", params=["hh5", "invalid_project_name"]) +def config_project(request): + """Get config project name.""" + return request.param + + +# Error message if config project name cannot be resolved. +no_project_name_msg = re.escape( + """Couldn't resolve project: check 'project' in config.yaml + and/or $PROJECT set in ~/.config/gadi-login.conf + """ +) + +# For testing whether user is member of necessary projects to run benchcab, we need to simulate the environment of Gadi +# TODO: Simulate Gadi environment for running tests for validating environment + + +@pytest.mark.skip() +@pytest.mark.parametrize( + ("config_project", "pytest_error"), + [ + ("hh5", does_not_raise()), + (None, pytest.raises(AttributeError, match=no_project_name_msg)), + ], +) +def test_project_name(config_project, pytest_error): + """Tests whether config project name is suitable to run in Gadi environment.""" + app = Benchcab(benchcab_exe_path=None) + with pytest_error: + app._validate_environment(project=config_project, modules=[]) + + +@pytest.mark.skip() +@pytest.mark.parametrize( + ("config_project", "pytest_error"), + [ + ("hh5", does_not_raise()), + ("invalid_project_name", pytest.raises(PermissionError)), + ], +) +def test_user_project_group(config_project, pytest_error): + """Test _validate_environment for if current user's groups does not contain the project name.""" + app = Benchcab(benchcab_exe_path=None) + with pytest_error: + app._validate_environment(project=config_project, modules=[]) diff --git a/tests/test_config.py b/tests/test_config.py index 5165c25e..1e98eeb4 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -17,16 +17,6 @@ # Temporarily set $PROJECT for testing module -@pytest.fixture(autouse=True, scope="module") -def _set_project_validation_dirs(): - with mock.patch("os.listdir") as mocked_listdir: - mocked_listdir.return_value = [ - NO_OPTIONAL_CONFIG_PROJECT, - OPTIONAL_CONFIG_PROJECT, - ] - yield - - @pytest.fixture(autouse=True) def _set_project_env_variable(monkeypatch): # Clear existing environment variables first @@ -162,6 +152,13 @@ def test_validate_config(config_str, pytest_error): class TestReadOptionalKey: """Tests related to adding optional keys in config.""" + @pytest.fixture() + def all_optional_default_config_no_project( + self, all_optional_default_config + ) -> dict: + """Set project keyword to None.""" + return all_optional_default_config | {"project": None} + @pytest.mark.parametrize( ("input_config", "output_config"), [ @@ -176,28 +173,15 @@ def test_read_optional_key_add_data(self, input_config, output_config, request): bc.read_optional_key(config) assert pformat(config) == pformat(request.getfixturevalue(output_config)) - def test_no_project_name(self, no_optional_config, monkeypatch): + def test_no_project_name( + self, no_optional_config, all_optional_default_config_no_project, monkeypatch + ): """If project key and $PROJECT are not provided, then raise error.""" monkeypatch.delenv("PROJECT") - err_msg = re.escape( - """Couldn't resolve project: check 'project' in config.yaml - and/or $PROJECT set in ~/.config/gadi-login.conf - """ - ) - with pytest.raises(ValueError, match=err_msg): - bc.read_optional_key(no_optional_config) - - def test_user_not_in_project(self, no_optional_config): - """If user is not in viewable NCI projects, raise error.""" - no_optional_config["project"] = "non_existing" - err_msg = re.escape( - "User is not a member of project [non_existing]: Check if project key is correct" + bc.read_optional_key(no_optional_config) + assert pformat(no_optional_config) == pformat( + all_optional_default_config_no_project ) - with pytest.raises( - ValueError, - match=err_msg, - ): - bc.read_optional_key(no_optional_config) @pytest.mark.parametrize( From d8551d74a3f7da2e64d7d55b7e1309cfa0c44a99 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Fri, 2 Feb 2024 13:02:02 +1100 Subject: [PATCH 38/50] Remove spatial configuration tests for hardcoded payu_config --- benchcab/spatial.py | 22 +--------------------- tests/test_spatial.py | 7 ------- 2 files changed, 1 insertion(+), 28 deletions(-) diff --git a/benchcab/spatial.py b/benchcab/spatial.py index f1c1f894..05d3e1dd 100644 --- a/benchcab/spatial.py +++ b/benchcab/spatial.py @@ -81,27 +81,7 @@ def configure_experiment(self, payu_config: Optional[dict] = None, verbose=False # Here we prepend inputs to the `input` list so that payu knows to use # our inputs over the pre-existing inputs in the config file: - config["input"] = [ - # Note: only necessary for CABLE v2 - str( - ( - internal.CABLE_AUX_DIR - / "core" - / "biogeophys" - / "def_veg_params_zr_clitt_albedo_fix.txt" - ).absolute() - ), - # Note: only necessary for CABLE v2 - str( - ( - internal.CABLE_AUX_DIR - / "core" - / "biogeophys" - / "def_soil_params.txt" - ).absolute() - ), - *config.get("input", []), - ] + config["input"] = config.get("input", []) config["laboratory"] = str(internal.PAYU_LABORATORY_DIR.absolute()) diff --git a/tests/test_spatial.py b/tests/test_spatial.py index 93df6235..34df757c 100644 --- a/tests/test_spatial.py +++ b/tests/test_spatial.py @@ -93,13 +93,6 @@ def test_payu_config_parameters(self, task): internal.SRC_DIR / "test-branch" / "offline" / internal.CABLE_MPI_EXE ).absolute() ) - assert config["input"] == [ - str( - internal.CABLE_AUX_DIR - / Path("core/biogeophys/def_veg_params_zr_clitt_albedo_fix.txt") - ), - str(internal.CABLE_AUX_DIR / Path("core/biogeophys/def_soil_params.txt")), - ] assert config["laboratory"] == str(internal.PAYU_LABORATORY_DIR.absolute()) assert config["some_parameter"] == "foo" From 1ba25b0249c784e63f5d994455937175ffba5aec Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Tue, 6 Feb 2024 11:42:46 +1100 Subject: [PATCH 39/50] Reverted to native logger singleton, less flexible but pickles. #103 --- benchcab/benchcab.py | 36 ++++---- benchcab/fluxsite.py | 15 ++-- benchcab/main.py | 3 + benchcab/model.py | 6 +- benchcab/utils/__init__.py | 40 +++++++-- benchcab/utils/singleton.py | 1 + benchcab/utils/singleton_logger.py | 129 ----------------------------- tests/test_utils.py | 10 ++- 8 files changed, 68 insertions(+), 172 deletions(-) delete mode 100644 benchcab/utils/singleton_logger.py diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 9418597e..d109e2b5 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -159,11 +159,11 @@ def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None: job_script_path = Path(internal.QSUB_FNAME) self.logger.info( - [ - "Creating PBS job script to run fluxsite tasks on compute " - f"nodes: {job_script_path}" - ] + 'Creating PBS job script to run fluxsite tasks on compute nodes' ) + + self.logger.info(f'job_script_path = {job_script_path}') + with job_script_path.open("w", encoding="utf-8") as file: contents = render_job_script( project=config["project"], @@ -185,17 +185,13 @@ def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None: self.logger.error(exc.output) raise - self.logger.info( - [ - f"PBS job submitted: {proc.stdout.strip()}", - "The CABLE log file for each task is written to", - f"{internal.FLUXSITE_DIRS['LOG']}/_log.txt", - "The CABLE standard output for each task is written to", - f"{internal.FLUXSITE_DIRS['TASKS']}//out.txt", - "The NetCDF output for each task is written to", - f"{internal.FLUXSITE_DIRS['OUTPUT']}/_out.nc", - ] - ) + self.logger.info(f'PBS job submitted: {proc.stdout.strip()}') + self.logger.info('CABLE log file for each task is written to:') + self.logger.info(f"{internal.FLUXSITE_DIRS['LOG']}/_log.txt") + self.logger.info("The CABLE standard output for each task is written to:") + self.logger.info(f"{internal.FLUXSITE_DIRS['TASKS']}//out.txt") + self.logger.info("The NetCDF output for each task is written to:") + self.logger.info(f"{internal.FLUXSITE_DIRS['OUTPUT']}/_out.nc") def checkout(self, config_path: str): """Endpoint for `benchcab checkout`.""" @@ -230,13 +226,11 @@ def build(self, config_path: str): for repo in self._get_models(config): if repo.build_script: - self.logger.info( - [ - "Compiling CABLE using custom build script for ", - f"realisation {repo.name}...", - ] - ) + + self.logger.info("Compiling CABLE using custom build script for") + self.logger.info(f"realisation {repo.name}") repo.custom_build(modules=config["modules"]) + else: build_mode = "with MPI" if internal.MPI else "serially" self.logger.info( diff --git a/benchcab/fluxsite.py b/benchcab/fluxsite.py index 39900504..8462543d 100644 --- a/benchcab/fluxsite.py +++ b/benchcab/fluxsite.py @@ -18,9 +18,9 @@ from benchcab import __version__, internal from benchcab.comparison import ComparisonTask from benchcab.model import Model -from benchcab.utils import get_logger from benchcab.utils.fs import chdir, mkdir from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface +from benchcab.utils import get_logger # fmt: off # ====================================================== @@ -289,12 +289,9 @@ def run(self): """Runs a single fluxsite task.""" task_name = self.get_task_name() task_dir = internal.FLUXSITE_DIRS["TASKS"] / task_name - self.logger.debug( - [ - f"Running task {task_name}... CABLE standard output " - f"saved in {task_dir / internal.CABLE_STDOUT_FILENAME}" - ] - ) + self.logger.debug(f"Running task {task_name}... CABLE standard output ") + self.logger.debug(f"saved in {task_dir / internal.CABLE_STDOUT_FILENAME}") + try: self.run_cable() self.add_provenance_info() @@ -322,7 +319,7 @@ def run_cable(self): output_file=stdout_path.relative_to(task_dir), ) except CalledProcessError as exc: - self.logger.error(f"Error: CABLE returned an error for task {task_name}") + self.logger.debug(f"Error: CABLE returned an error for task {task_name}") raise CableError from exc def add_provenance_info(self): @@ -334,7 +331,7 @@ def add_provenance_info(self): nc_output_path = internal.FLUXSITE_DIRS["OUTPUT"] / self.get_output_filename() nml = f90nml.read( internal.FLUXSITE_DIRS["TASKS"] / self.get_task_name() / internal.CABLE_NML - ) + ) self.logger.debug(f"Adding attributes to output file: {nc_output_path}") with netCDF4.Dataset(nc_output_path, "r+") as nc_output: nc_output.setncatts( diff --git a/benchcab/main.py b/benchcab/main.py index 762ee0ee..87e56d53 100644 --- a/benchcab/main.py +++ b/benchcab/main.py @@ -21,6 +21,9 @@ def parse_and_dispatch(parser): # Intercept the verbosity flag to engage the logger log_level = "debug" if args.get("verbose", False) is True else "info" + # Remove the verbose argument + _ = args.pop('verbose') + # We just need to instantiate this with the desired level get_logger(level=log_level) diff --git a/benchcab/model.py b/benchcab/model.py index df78b6fc..71af42aa 100644 --- a/benchcab/model.py +++ b/benchcab/model.py @@ -103,11 +103,9 @@ def custom_build(self, modules: list[str]): tmp_script_path.chmod(tmp_script_path.stat().st_mode | stat.S_IEXEC) self.logger.debug( - [ - f"Modifying {tmp_script_path.name}: remove lines that call " - "environment modules" - ] + f"Modifying {tmp_script_path.name}: remove lines that call environment modules" ) + remove_module_lines(tmp_script_path) with chdir(build_script_path.parent), self.modules_handler.load(modules): diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index c771d96c..d8355d57 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -12,7 +12,9 @@ import yaml -from benchcab.utils.singleton_logger import SingletonLogger +# from benchcab.utils.singleton_logger import SingletonLogger +import logging +import sys # List of one-argument decoding functions. PACKAGE_DATA_DECODERS = dict(json=json.loads, yml=yaml.safe_load) @@ -50,19 +52,41 @@ def load_package_data(filename: str) -> dict: return PACKAGE_DATA_DECODERS[ext](raw) -def get_logger(name="benchcab", level="debug"): - """Get a singleton logger object. +def get_logger(name='benchcab', level='debug'): + """Get a logger instance. Parameters ---------- name : str, optional - Name of the logger, by default 'benchcab' + Name, by default 'benchcab' level : str, optional - Level of logging, by default 'debug' + Level, by default 'debug' Returns ------- - benchcab.utils.SingletonLogger - Logger instance. + logging.Logger + A logger instance guaranteed to be singleton if called with the same params. + """ - return SingletonLogger(name=name, level=level) + # Get or create a logger + logger = logging.getLogger(name) + + # Workaround for native singleton property. + # NOTE: This will ignore the provided level and give you whatever was first set. + if logger.level != logging.NOTSET: + return logger + + # Set the level + level = getattr(logging, level.upper()) + logger.setLevel(level) + + # Create the formatter + log_format = "%(asctime)s - %(levelname)s - %(module)s.%(filename)s:%(lineno)s - %(message)s" + formatter = logging.Formatter(log_format) + + # Create/set the handler to point to stdout + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter(formatter) + logger.addHandler(handler) + + return logger \ No newline at end of file diff --git a/benchcab/utils/singleton.py b/benchcab/utils/singleton.py index c3318337..445b5797 100644 --- a/benchcab/utils/singleton.py +++ b/benchcab/utils/singleton.py @@ -13,6 +13,7 @@ def __call__(cls, *args, **kwargs): ------- object The object that metaclasses this base class. + """ if cls not in cls._instances: cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) diff --git a/benchcab/utils/singleton_logger.py b/benchcab/utils/singleton_logger.py deleted file mode 100644 index ed7ad01f..00000000 --- a/benchcab/utils/singleton_logger.py +++ /dev/null @@ -1,129 +0,0 @@ -"""Singleton Logging Object.""" -import logging -import sys -from typing import Union - -from benchcab.utils.singleton import Singleton - - -class SingletonLogger(logging.Logger, metaclass=Singleton): - """A singleton logging interface.""" - - def __init__(self, name: str = "benchcab", level: str = "debug"): - """Singleton logging instance. - - Parameters - ---------- - name : str, optional - Name of the logger, by default 'benchcab' (this ensures singleton-ness) - level : str, optional - Log level, by default 'debug' - """ - # Initialise the logger - super(SingletonLogger, self).__init__(name=name) - - # Set level - level = getattr(logging, level.upper()) - self.setLevel(level) - - # Create the formatter - log_format = "%(asctime)s - %(levelname)s - %(module)s.%(filename)s:%(lineno)s - %(message)s" - formatter = logging.Formatter(log_format) - - # Create/set the handler to point to stdout - handler = logging.StreamHandler(sys.stdout) - handler.setFormatter(formatter) - self.addHandler(handler) - - def _check_multiline(self, msg: Union[str, list, tuple]) -> str: - """Automatically join multiline output. - - Parameters - ---------- - msg : str, list or tuple - Message or message fragments. - - Returns - ------- - str - Original string or fragments joined with newlines. - """ - if type(msg) in [list, tuple]: - return "\n".join([str(m) for m in msg]) - - return msg - - def debug(self, msg, *args, **kwargs): - """Emit a debug line, with multiline support. - - Parameters - ---------- - msg : str or list - Message or message fragments for additional detail. - *args : - Passed to super().debug() - **kwargs : - Passed to super().debug() - """ - msg = self._check_multiline(msg) - super().debug(msg, *args, **kwargs) - - def info(self, msg, *args, **kwargs): - """Emit a info line, with multiline support. - - Parameters - ---------- - msg : str or list - Message or message fragments for additional detail. - *args : - Passed to super().debug() - **kwargs : - Passed to super().debug() - """ - msg = self._check_multiline(msg) - super().info(msg, *args, **kwargs) - - def warn(self, msg, *args, **kwargs): - """Emit a warn line, with multiline support. - - Parameters - ---------- - msg : str or list - Message or message fragments for additional detail. - *args : - Passed to super().debug() - **kwargs : - Passed to super().debug() - """ - msg = self._check_multiline(msg) - super().warn(msg, *args, **kwargs) - - def error(self, msg, *args, **kwargs): - """Emit a error line, with multiline support. - - Parameters - ---------- - msg : str or list - Message or message fragments for additional detail. - *args : - Passed to super().debug() - **kwargs : - Passed to super().debug() - """ - msg = self._check_multiline(msg) - super().error(msg, *args, **kwargs) - - def critical(self, msg, *args, **kwargs): - """Emit a critical line, with multiline support. - - Parameters - ---------- - msg : str or list - Message or message fragments for additional detail. - *args : - Passed to super().debug() - **kwargs : - Passed to super().debug() - """ - msg = self._check_multiline(msg) - super().critical(msg, *args, **kwargs) diff --git a/tests/test_utils.py b/tests/test_utils.py index 61514e4a..fda578b2 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -21,9 +21,17 @@ def test_load_package_data_fail(): bu.load_package_data("config-missing.yml") -def test_get_logger_singleton(): +def test_get_logger_singleton_pass(): """Test get_logger() returns a singleton object...""" logger1 = bu.get_logger(name="benchcab") logger2 = bu.get_logger(name="benchcab") assert logger1 is logger2 + + +def test_get_logger_singleton_fail(): + """Test get_logger() returns a singleton object...""" + logger1 = bu.get_logger(name="benchcab") + logger2 = bu.get_logger(name="benchcab2") + + assert logger1 is not logger2 \ No newline at end of file From 7d845320cc24820f70e62945f5d9e27664879da9 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Tue, 6 Feb 2024 11:58:40 +1100 Subject: [PATCH 40/50] Applied black/ruff checks, ran integration tests successfully. Fixes #103 --- benchcab/benchcab.py | 14 +++++++------- benchcab/fluxsite.py | 2 +- benchcab/main.py | 2 +- benchcab/model.py | 2 +- benchcab/utils/__init__.py | 10 ++++++---- benchcab/utils/singleton.py | 2 +- tests/test_config.py | 1 + tests/test_utils.py | 3 ++- 8 files changed, 20 insertions(+), 16 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index d109e2b5..11d55438 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -159,11 +159,11 @@ def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None: job_script_path = Path(internal.QSUB_FNAME) self.logger.info( - 'Creating PBS job script to run fluxsite tasks on compute nodes' + "Creating PBS job script to run fluxsite tasks on compute nodes" ) - self.logger.info(f'job_script_path = {job_script_path}') - + self.logger.info(f"job_script_path = {job_script_path}") + with job_script_path.open("w", encoding="utf-8") as file: contents = render_job_script( project=config["project"], @@ -185,8 +185,8 @@ def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None: self.logger.error(exc.output) raise - self.logger.info(f'PBS job submitted: {proc.stdout.strip()}') - self.logger.info('CABLE log file for each task is written to:') + self.logger.info(f"PBS job submitted: {proc.stdout.strip()}") + self.logger.info("CABLE log file for each task is written to:") self.logger.info(f"{internal.FLUXSITE_DIRS['LOG']}/_log.txt") self.logger.info("The CABLE standard output for each task is written to:") self.logger.info(f"{internal.FLUXSITE_DIRS['TASKS']}//out.txt") @@ -226,11 +226,11 @@ def build(self, config_path: str): for repo in self._get_models(config): if repo.build_script: - + self.logger.info("Compiling CABLE using custom build script for") self.logger.info(f"realisation {repo.name}") repo.custom_build(modules=config["modules"]) - + else: build_mode = "with MPI" if internal.MPI else "serially" self.logger.info( diff --git a/benchcab/fluxsite.py b/benchcab/fluxsite.py index 8462543d..c2b45bd1 100644 --- a/benchcab/fluxsite.py +++ b/benchcab/fluxsite.py @@ -331,7 +331,7 @@ def add_provenance_info(self): nc_output_path = internal.FLUXSITE_DIRS["OUTPUT"] / self.get_output_filename() nml = f90nml.read( internal.FLUXSITE_DIRS["TASKS"] / self.get_task_name() / internal.CABLE_NML - ) + ) self.logger.debug(f"Adding attributes to output file: {nc_output_path}") with netCDF4.Dataset(nc_output_path, "r+") as nc_output: nc_output.setncatts( diff --git a/benchcab/main.py b/benchcab/main.py index 87e56d53..c935ebf7 100644 --- a/benchcab/main.py +++ b/benchcab/main.py @@ -22,7 +22,7 @@ def parse_and_dispatch(parser): log_level = "debug" if args.get("verbose", False) is True else "info" # Remove the verbose argument - _ = args.pop('verbose') + _ = args.pop("verbose") # We just need to instantiate this with the desired level get_logger(level=log_level) diff --git a/benchcab/model.py b/benchcab/model.py index 71af42aa..426e9dd2 100644 --- a/benchcab/model.py +++ b/benchcab/model.py @@ -105,7 +105,7 @@ def custom_build(self, modules: list[str]): self.logger.debug( f"Modifying {tmp_script_path.name}: remove lines that call environment modules" ) - + remove_module_lines(tmp_script_path) with chdir(build_script_path.parent), self.modules_handler.load(modules): diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index d8355d57..d103f4e8 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -52,7 +52,7 @@ def load_package_data(filename: str) -> dict: return PACKAGE_DATA_DECODERS[ext](raw) -def get_logger(name='benchcab', level='debug'): +def get_logger(name="benchcab", level="debug"): """Get a logger instance. Parameters @@ -66,7 +66,7 @@ def get_logger(name='benchcab', level='debug'): ------- logging.Logger A logger instance guaranteed to be singleton if called with the same params. - + """ # Get or create a logger logger = logging.getLogger(name) @@ -81,7 +81,9 @@ def get_logger(name='benchcab', level='debug'): logger.setLevel(level) # Create the formatter - log_format = "%(asctime)s - %(levelname)s - %(module)s.%(filename)s:%(lineno)s - %(message)s" + log_format = ( + "%(asctime)s - %(levelname)s - %(module)s.%(filename)s:%(lineno)s - %(message)s" + ) formatter = logging.Formatter(log_format) # Create/set the handler to point to stdout @@ -89,4 +91,4 @@ def get_logger(name='benchcab', level='debug'): handler.setFormatter(formatter) logger.addHandler(handler) - return logger \ No newline at end of file + return logger diff --git a/benchcab/utils/singleton.py b/benchcab/utils/singleton.py index 445b5797..bdf5f427 100644 --- a/benchcab/utils/singleton.py +++ b/benchcab/utils/singleton.py @@ -13,7 +13,7 @@ def __call__(cls, *args, **kwargs): ------- object The object that metaclasses this base class. - + """ if cls not in cls._instances: cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) diff --git a/tests/test_config.py b/tests/test_config.py index 6f2d3d6a..0a0cbf36 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,4 +1,5 @@ """`pytest` tests for config.py.""" + import pytest import benchcab.config as bc diff --git a/tests/test_utils.py b/tests/test_utils.py index fda578b2..a18dd7e5 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,5 @@ """Tests for utilities.""" + import pytest import benchcab.utils as bu @@ -34,4 +35,4 @@ def test_get_logger_singleton_fail(): logger1 = bu.get_logger(name="benchcab") logger2 = bu.get_logger(name="benchcab2") - assert logger1 is not logger2 \ No newline at end of file + assert logger1 is not logger2 From 2387d1fb00bf3e0ea667b8dcf61556d7f19123ce Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Thu, 8 Feb 2024 12:30:22 +1100 Subject: [PATCH 41/50] Added template interpolation routines. #251. --- benchcab/data/test/template.j2 | 1 + benchcab/utils/__init__.py | 44 +++++++++++++++++++++++++++++++++- tests/test_utils.py | 26 +++++++++++++++++++- 3 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 benchcab/data/test/template.j2 diff --git a/benchcab/data/test/template.j2 b/benchcab/data/test/template.j2 new file mode 100644 index 00000000..356d8daa --- /dev/null +++ b/benchcab/data/test/template.j2 @@ -0,0 +1 @@ +This is a template. {{myarg}} \ No newline at end of file diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index 9ac1e955..a8e830d0 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -8,10 +8,14 @@ import os from importlib import resources from pathlib import Path +from jinja2 import Environment, BaseLoader # List of one-argument decoding functions. -PACKAGE_DATA_DECODERS = dict(json=json.loads, yml=yaml.safe_load) +PACKAGE_DATA_DECODERS = dict( + json=json.loads, + yml=yaml.safe_load +) def get_installed_root() -> Path: @@ -42,5 +46,43 @@ def load_package_data(filename: str) -> dict: # Extract from the installations data directory. raw = pkgutil.get_data("benchcab", os.path.join("data", filename)).decode("utf-8") + # If there is no explicit decoder, just return the raw text + if ext not in PACKAGE_DATA_DECODERS.keys(): + return raw + # Decode and return. return PACKAGE_DATA_DECODERS[ext](raw) + + +def interpolate_string_template(template, **kwargs): + """Interpolate a string template with kwargs. + + Parameters + ---------- + template : str + Template string to interpolate over. + + Returns + ------- + str + Interpolated string. + """ + _template = Environment(loader=BaseLoader()).from_string(template) + return _template.render(**kwargs) + + +def interpolate_file_template(template_file, **kwargs): + """Interpolate kwargs directly into a j2 template file from the data directory. + + Parameters + ---------- + template_file : str + Filepath slug in the benchcab data directory. + + Returns + ------- + str + Interpolated template string. + """ + template = load_package_data(template_file) + return interpolate_string_template(template, **kwargs) \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py index 848161db..483ef6d0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -20,4 +20,28 @@ def test_load_package_data_fail(): """Test load_package_data() fails as expected.""" with pytest.raises(FileNotFoundError): - missing = bu.load_package_data('config-missing.yml') \ No newline at end of file + missing = bu.load_package_data('config-missing.yml') + + +def test_interpolate_string_template_pass(): + """Test interpolate_string_template() passes as expected.""" + result = bu.interpolate_string_template('I should {{status}}', status='pass') + assert result == 'I should pass' + + +def test_interpolate_string_template_fail(): + """Test interpolate_string_template() fails as expected.""" + result = bu.interpolate_string_template('I should {{status}}', status='fail') + assert result != 'I should not pass' + + +def test_interpolate_file_template_pass(): + """Test interpolate_file_template() passes as expected.""" + result = bu.interpolate_file_template('test/template.j2', myarg='PASS') + assert result == 'This is a template. PASS' + + +def test_interpolate_file_template_fail(): + """Test interpolate_file_template() fails as expected.""" + result = bu.interpolate_file_template('test/template.j2', notmyarg='PASS') + assert result != 'This is a template. PASS' \ No newline at end of file From 222d16dd5d05068fae8765367ab9971b3efd5ebc Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Tue, 13 Feb 2024 12:00:32 +1100 Subject: [PATCH 42/50] Ruff checks, pytest passes. #103. --- benchcab/benchcab.py | 14 ++++---------- benchcab/fluxsite.py | 2 +- benchcab/model.py | 2 +- benchcab/utils/__init__.py | 2 -- 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index db7746da..dc9e401c 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -169,6 +169,7 @@ def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None: project=config["project"], config_path=config_path, modules=config["modules"], + pbs_config=config["fluxsite"]["pbs"], skip_bitwise_cmp="fluxsite-bitwise-cmp" in skip, benchcab_path=str(self.benchcab_exe_path), ) @@ -201,18 +202,11 @@ def checkout(self, config_path: str): self.logger.info("Checking out repositories...") rev_number_log = "" + for model in self._get_models(config): model.repo.checkout() rev_number_log += f"{model.name}: {model.repo.get_revision()}\n" - # TODO(Sean) we should archive revision numbers for CABLE-AUX - cable_aux_repo = SVNRepo( - svn_root=internal.CABLE_SVN_ROOT, - branch_path=internal.CABLE_AUX_RELATIVE_SVN_PATH, - path=internal.SRC_DIR / "CABLE-AUX", - ) - cable_aux_repo.checkout(verbose=verbose) - rev_number_log_path = next_path("rev_number-*.log") self.logger.info(f"Writing revision number info to {rev_number_log_path}") with rev_number_log_path.open("w", encoding="utf-8") as file: @@ -259,7 +253,7 @@ def fluxsite_run_tasks(self, config_path: str): self._validate_environment(project=config["project"], modules=config["modules"]) tasks = self.tasks if self.tasks else self._initialise_tasks(config) - print("Running fluxsite tasks...") + self.logger.debug("Running fluxsite tasks...") try: multiprocess = config["fluxsite"]["multiprocess"] except KeyError: @@ -286,7 +280,7 @@ def fluxsite_bitwise_cmp(self, config_path: str): tasks = self.tasks if self.tasks else self._initialise_tasks(config) comparisons = get_fluxsite_comparisons(tasks) - print("Running comparison tasks...") + self.logger.debug("Running comparison tasks...") try: multiprocess = config["fluxsite"]["multiprocess"] except KeyError: diff --git a/benchcab/fluxsite.py b/benchcab/fluxsite.py index 70a0dcd2..bd3c41a8 100644 --- a/benchcab/fluxsite.py +++ b/benchcab/fluxsite.py @@ -124,7 +124,7 @@ def __init__( model : Model Model. met_forcing_file : str - Met forcinf file. + Met forcing file. sci_conf_id : int Science configuration ID. sci_config : dict diff --git a/benchcab/model.py b/benchcab/model.py index 426e9dd2..5642eecf 100644 --- a/benchcab/model.py +++ b/benchcab/model.py @@ -38,7 +38,7 @@ def __init__( Parameters ---------- repo : Repo - Respository. + Repository. name : Optional[str], optional Name, by default None patch : Optional[dict], optional diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index d103f4e8..9493886e 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -11,8 +11,6 @@ from pathlib import Path import yaml - -# from benchcab.utils.singleton_logger import SingletonLogger import logging import sys From 9b49c13d4e6918b515a182eef705431281209ad3 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Tue, 13 Feb 2024 14:03:28 +1100 Subject: [PATCH 43/50] Black reapplied. #103 --- benchcab/benchcab.py | 14 +++++++------- benchcab/comparison.py | 9 +++------ benchcab/config.py | 5 +++++ benchcab/environment_modules.py | 4 ++++ benchcab/fluxsite.py | 4 +++- benchcab/main.py | 1 + benchcab/model.py | 1 + benchcab/utils/__init__.py | 6 ++++-- benchcab/utils/fs.py | 1 + benchcab/utils/repo.py | 11 +++++++++++ benchcab/utils/subprocess.py | 1 + tests/test_benchcab.py | 1 + tests/test_config.py | 2 +- 13 files changed, 43 insertions(+), 17 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index dc9e401c..76ce38fe 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -26,7 +26,7 @@ from benchcab.utils import get_logger from benchcab.utils.fs import mkdir, next_path from benchcab.utils.pbs import render_job_script -from benchcab.utils.repo import SVNRepo, create_repo +from benchcab.utils.repo import create_repo from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface from benchcab.workdir import setup_fluxsite_directory_tree @@ -50,6 +50,7 @@ def __init__( Path to the executable. validate_env : bool, optional Validate the environment, by default True + """ self.benchcab_exe_path = benchcab_exe_path self.validate_env = validate_env @@ -106,11 +107,10 @@ def _validate_environment(self, project: str, modules: list): paths = list(internal.MET_DIR.glob(f"{site_id}*")) if not paths: self.logger.error( - [ - f"Failed to infer met file for site id '{site_id}' in " - f"{internal.MET_DIR}." - ] + f"Failed to infer met file for site id '{site_id}' in " ) + self.logger.error(f"{internal.MET_DIR}.") + sys.exit(1) if len(paths) > 1: self.logger.error( @@ -262,7 +262,7 @@ def fluxsite_run_tasks(self, config_path: str): ncpus = config.get("pbs", {}).get( "ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"] ) - run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=verbose) + run_tasks_in_parallel(tasks, n_processes=ncpus) else: run_tasks(tasks) self.logger.info("Successfully ran fluxsite tasks") @@ -290,7 +290,7 @@ def fluxsite_bitwise_cmp(self, config_path: str): ncpus = config["fluxsite"]["pbs"]["ncpus"] except KeyError: ncpus = internal.FLUXSITE_DEFAULT_PBS["ncpus"] - run_comparisons_in_parallel(comparisons, n_processes=ncpus, verbose=verbose) + run_comparisons_in_parallel(comparisons, n_processes=ncpus) else: run_comparisons(comparisons) self.logger.info("Successfully ran comparison tasks") diff --git a/benchcab/comparison.py b/benchcab/comparison.py index 173ca024..495b9d7c 100644 --- a/benchcab/comparison.py +++ b/benchcab/comparison.py @@ -32,6 +32,7 @@ def __init__( Files. task_name : str Name of the task. + """ self.files = files self.task_name = task_name @@ -57,12 +58,8 @@ def run(self) -> None: with output_file.open("w", encoding="utf-8") as file: file.write(exc.stdout) - self.logger.error( - [ - f"Failure: files {file_a.name} {file_b.name} differ. ", - f"Results of diff have been written to {output_file}", - ] - ) + self.logger.error(f"Failure: files {file_a.name} {file_b.name} differ. ") + self.logger.error(f"Results of diff have been written to {output_file}") sys.stdout.flush() diff --git a/benchcab/config.py b/benchcab/config.py index 25547d03..2cc51153 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -22,6 +22,7 @@ def __init__(self, validator: Validator): ---------- validator: cerberus.Validator A validation object that has been used and has the errors attribute. + """ # Nicely format the errors. errors = [f"{k} = {v}" for k, v in validator.errors.items()] @@ -51,6 +52,7 @@ def validate_config(config: dict) -> bool: ------ ConfigValidationError Raised when the configuration file fails validation. + """ # Load the schema schema = bu.load_package_data("config-schema.yml") @@ -81,6 +83,7 @@ def read_optional_key(config: dict): ---------- config : dict The configuration file with with/without optional keys + """ if "project" not in config: config["project"] = os.environ.get("PROJECT", None) @@ -118,6 +121,7 @@ def read_config_file(config_path: str) -> dict: ------- dict Configuration dict + """ # Load the configuration file. with Path.open(Path(config_path), "r", encoding="utf-8") as file: @@ -143,6 +147,7 @@ def read_config(config_path: str) -> dict: ------ ConfigValidationError Raised when the configuration file fails validation. + """ # Read configuration file config = read_config_file(config_path) diff --git a/benchcab/environment_modules.py b/benchcab/environment_modules.py index 70546a5a..fe845e80 100644 --- a/benchcab/environment_modules.py +++ b/benchcab/environment_modules.py @@ -72,6 +72,7 @@ def module_is_avail(self, *args: str) -> bool: ------- bool True if available, False otherwise. + """ return module("is-avail", *args) @@ -82,6 +83,7 @@ def module_is_loaded(self, *args: str) -> bool: ------- bool True if loaded, False otherwise. + """ return module("is-loaded", *args) @@ -92,6 +94,7 @@ def module_load(self, *args: str) -> None: ------ EnvironmentModulesError Raised when module fails to load. + """ if not module("load", *args): raise EnvironmentModulesError("Failed to load modules: " + " ".join(args)) @@ -103,6 +106,7 @@ def module_unload(self, *args: str) -> None: ------ EnvironmentModulesError Raised when module fails to unload. + """ if not module("unload", *args): raise EnvironmentModulesError("Failed to unload modules: " + " ".join(args)) diff --git a/benchcab/fluxsite.py b/benchcab/fluxsite.py index bd3c41a8..14ea70a1 100644 --- a/benchcab/fluxsite.py +++ b/benchcab/fluxsite.py @@ -18,9 +18,9 @@ from benchcab import __version__, internal from benchcab.comparison import ComparisonTask from benchcab.model import Model +from benchcab.utils import get_logger from benchcab.utils.fs import chdir, mkdir from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface -from benchcab.utils import get_logger # fmt: off # ====================================================== @@ -44,6 +44,7 @@ def deep_update(mapping: Dict[KeyType, Any], *updating_mappings: Dict[KeyType, A ------- Dict[KeyType, Any] Updated mapping. + """ updated_mapping = mapping.copy() for updating_mapping in updating_mappings: @@ -129,6 +130,7 @@ def __init__( Science configuration ID. sci_config : dict Science configuration. + """ self.model = model self.met_forcing_file = met_forcing_file diff --git a/benchcab/main.py b/benchcab/main.py index c935ebf7..f7dc7157 100644 --- a/benchcab/main.py +++ b/benchcab/main.py @@ -15,6 +15,7 @@ def parse_and_dispatch(parser): ---- parser : argparse.ArgumentParser Parser object. + """ args = vars(parser.parse_args(sys.argv[1:] if sys.argv[1:] else ["-h"])) diff --git a/benchcab/model.py b/benchcab/model.py index 5642eecf..1bbf014b 100644 --- a/benchcab/model.py +++ b/benchcab/model.py @@ -49,6 +49,7 @@ def __init__( Build script, by default None model_id : Optional[int], optional Model ID, by default None + """ self.repo = repo self.name = name if name else repo.get_branch_name() diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index 9493886e..aa09428a 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -5,14 +5,14 @@ """Top-level utilities.""" import json +import logging import os import pkgutil +import sys from importlib import resources from pathlib import Path import yaml -import logging -import sys # List of one-argument decoding functions. PACKAGE_DATA_DECODERS = dict(json=json.loads, yml=yaml.safe_load) @@ -25,6 +25,7 @@ def get_installed_root() -> Path: ------- Path Path to the installed root. + """ return Path(resources.files("benchcab")) @@ -36,6 +37,7 @@ def load_package_data(filename: str) -> dict: ---------- filename : str Filename of the file to load out of the data directory. + """ # Work out the encoding of requested file. ext = filename.split(".")[-1] diff --git a/benchcab/utils/fs.py b/benchcab/utils/fs.py index 78bc63c0..2be838f8 100644 --- a/benchcab/utils/fs.py +++ b/benchcab/utils/fs.py @@ -67,6 +67,7 @@ def mkdir(new_path: Path, **kwargs): Path to the directory to be created. **kwargs : dict, optional Additional options for `pathlib.Path.mkdir()` + """ get_logger().debug(f"Creating {new_path} directory") new_path.mkdir(**kwargs) diff --git a/benchcab/utils/repo.py b/benchcab/utils/repo.py index f7025be5..2f891725 100644 --- a/benchcab/utils/repo.py +++ b/benchcab/utils/repo.py @@ -31,6 +31,7 @@ def get_revision(self) -> str: ------- str Human readable string describing the latest revision. + """ @abstractmethod @@ -41,6 +42,7 @@ def get_branch_name(self) -> str: ------- str Branch name of the source code. + """ @@ -51,6 +53,7 @@ class GitRepo(Repo): ---------- subprocess_handler: SubprocessWrapper Object for handling subprocess calls. + """ subprocess_handler = SubprocessWrapper() @@ -73,6 +76,7 @@ def __init__( commit: str, optional Commit hash (long). When specified the repository will reset to this commit when cloning. + """ self.url = url self.branch = branch @@ -103,6 +107,7 @@ def get_revision(self) -> str: ------- str Human readable string describing the latest revision. + """ repo = git.Repo(self.path) return f"commit {repo.head.commit.hexsha}" @@ -114,6 +119,7 @@ def get_branch_name(self) -> str: ------- str Branch name of the source code. + """ return self.branch @@ -125,6 +131,7 @@ class SVNRepo(Repo): ---------- subprocess_handler: SubprocessWrapper Object for handling subprocess calls. + """ subprocess_handler: SubprocessWrapperInterface = SubprocessWrapper() @@ -152,6 +159,7 @@ def __init__( revision: int, optional SVN revision number. When specified the branch will be set to this revision on checkout. + """ self.svn_root = svn_root self.branch_path = branch_path @@ -181,6 +189,7 @@ def get_revision(self) -> str: ------- str Human readable string describing the latest revision. + """ proc = self.subprocess_handler.run_cmd( f"svn info --show-item last-changed-revision {self.path}", @@ -195,6 +204,7 @@ def get_branch_name(self) -> str: ------- str Branch name of the source code. + """ return Path(self.branch_path).name @@ -218,6 +228,7 @@ def create_repo(spec: dict, path: Path) -> Repo: ------- Repo A subclass instance of `Repo`. + """ if "git" in spec: if "url" not in spec["git"]: diff --git a/benchcab/utils/subprocess.py b/benchcab/utils/subprocess.py index 77341411..8951c19a 100644 --- a/benchcab/utils/subprocess.py +++ b/benchcab/utils/subprocess.py @@ -60,6 +60,7 @@ def run_cmd( ------- subprocess.CompletedProcess _description_ + """ # Use the logging level (10 = Debug) to determine verbosity. verbose = get_logger().getEffectiveLevel() == DEBUG_LEVEL diff --git a/tests/test_benchcab.py b/tests/test_benchcab.py index 5fdc3708..12b5c69c 100644 --- a/tests/test_benchcab.py +++ b/tests/test_benchcab.py @@ -1,4 +1,5 @@ """`pytest` tests for `benchcab.py`.""" + import re from contextlib import nullcontext as does_not_raise from unittest import mock diff --git a/tests/test_config.py b/tests/test_config.py index 1e98eeb4..dcf306b2 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,6 +1,6 @@ """`pytest` tests for config.py.""" + import os -import re from contextlib import nullcontext as does_not_raise from pathlib import Path from pprint import pformat From 6706a7361fece0ce640db3358e727c9ed3e8b996 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Tue, 13 Feb 2024 23:16:29 +1100 Subject: [PATCH 44/50] Make build step work for hh5 environment --- benchcab/benchcab.py | 15 +++++++++++++++ benchcab/internal.py | 3 +++ 2 files changed, 18 insertions(+) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index ae2643f8..9d48e347 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -48,6 +48,13 @@ def __init__( self._models: list[Model] = [] self.tasks: list[Task] = [] # initialise fluxsite tasks lazily + self._set_environment() + + def _set_environment(self): + """Sets environment variables on current user environment.""" + # Prioritize system binaries over externally set $PATHs (#220) + os.environ["PATH"] = f"{':'.join(internal.SYSTEM_PATHS)}:{os.environ['PATH']}" + def _validate_environment(self, project: str, modules: list): """Performs checks on current user environment.""" if not self.validate_env: @@ -85,6 +92,14 @@ def _validate_environment(self, project: str, modules: list): print(f"Error: module ({modname}) is not available.") sys.exit(1) + system_paths = os.getenv("PATH").split(":")[: len(internal.SYSTEM_PATHS)] + if set(system_paths) != set(internal.SYSTEM_PATHS): + msg = f"""Error: System paths are not prioritized over user-defined paths + Currently set as: {system_paths} + The required system paths are: {internal.SYSTEM_PATHS} + """ + raise EnvironmentError(msg) + all_site_ids = set( internal.MEORG_EXPERIMENTS["five-site-test"] + internal.MEORG_EXPERIMENTS["forty-two-site-test"] diff --git a/benchcab/internal.py b/benchcab/internal.py index aeea3e2d..4f2ebc39 100644 --- a/benchcab/internal.py +++ b/benchcab/internal.py @@ -28,6 +28,9 @@ # Path to the user's current working directory CWD = Path.cwd() +# Default system paths in Unix +SYSTEM_PATHS = ["/bin", "/usr/bin", "/usr/local/bin"] + # Path to the user's home directory HOME_DIR = Path(os.environ["HOME"]) From b6bca5b25545d2abcb96169813fe97ae1e2135d3 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Thu, 15 Feb 2024 10:46:44 +1100 Subject: [PATCH 45/50] Remaining clobbered code fix. #103. --- benchcab/benchcab.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 76ce38fe..3d175dd2 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -281,19 +281,12 @@ def fluxsite_bitwise_cmp(self, config_path: str): comparisons = get_fluxsite_comparisons(tasks) self.logger.debug("Running comparison tasks...") - try: - multiprocess = config["fluxsite"]["multiprocess"] - except KeyError: - multiprocess = internal.FLUXSITE_DEFAULT_MULTIPROCESS - if multiprocess: - try: - ncpus = config["fluxsite"]["pbs"]["ncpus"] - except KeyError: - ncpus = internal.FLUXSITE_DEFAULT_PBS["ncpus"] + if config["fluxsite"]["multiprocess"]: + ncpus = config["fluxsite"]["pbs"]["ncpus"] run_comparisons_in_parallel(comparisons, n_processes=ncpus) else: run_comparisons(comparisons) - self.logger.info("Successfully ran comparison tasks") + self.logger.debug("Successfully ran comparison tasks") def fluxsite(self, config_path: str, no_submit: bool, skip: list[str]): """Endpoint for `benchcab fluxsite`.""" From b65ddbb67bd1d187108c4f4e3b15d45486cc8629 Mon Sep 17 00:00:00 2001 From: "C. Carouge" Date: Thu, 15 Feb 2024 15:57:37 +1100 Subject: [PATCH 46/50] Last clobbered merge changes. Fixes #103 --- benchcab/benchcab.py | 16 +++++----------- benchcab/utils/singleton.py | 21 --------------------- 2 files changed, 5 insertions(+), 32 deletions(-) delete mode 100644 benchcab/utils/singleton.py diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index c57d68aa..141595da 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -267,15 +267,9 @@ def fluxsite_run_tasks(self, config_path: str): self._validate_environment(project=config["project"], modules=config["modules"]) tasks = self.tasks if self.tasks else self._initialise_tasks(config) - self.logger.debug("Running fluxsite tasks...") - try: - multiprocess = config["fluxsite"]["multiprocess"] - except KeyError: - multiprocess = internal.FLUXSITE_DEFAULT_MULTIPROCESS - if multiprocess: - ncpus = config.get("pbs", {}).get( - "ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"] - ) + self.logger.info("Running fluxsite tasks...") + if config["fluxsite"]["multiprocess"]: + ncpus = config["fluxsite"]["pbs"]["ncpus"] run_tasks_in_parallel(tasks, n_processes=ncpus) else: run_tasks(tasks) @@ -294,13 +288,13 @@ def fluxsite_bitwise_cmp(self, config_path: str): tasks = self.tasks if self.tasks else self._initialise_tasks(config) comparisons = get_fluxsite_comparisons(tasks) - self.logger.debug("Running comparison tasks...") + self.logger.info("Running comparison tasks...") if config["fluxsite"]["multiprocess"]: ncpus = config["fluxsite"]["pbs"]["ncpus"] run_comparisons_in_parallel(comparisons, n_processes=ncpus) else: run_comparisons(comparisons) - self.logger.debug("Successfully ran comparison tasks") + self.logger.info("Successfully ran comparison tasks") def fluxsite(self, config_path: str, no_submit: bool, skip: list[str]): """Endpoint for `benchcab fluxsite`.""" diff --git a/benchcab/utils/singleton.py b/benchcab/utils/singleton.py deleted file mode 100644 index bdf5f427..00000000 --- a/benchcab/utils/singleton.py +++ /dev/null @@ -1,21 +0,0 @@ -"""Singleton Object.""" - - -class Singleton(type): - """Singleton base (meta) class.""" - - _instances = {} - - def __call__(cls, *args, **kwargs): - """Create the object on first call, return otherwise. - - Returns - ------- - object - The object that metaclasses this base class. - - """ - if cls not in cls._instances: - cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) - - return cls._instances[cls] From 1ee5870c20175a4e270d22e446d970a0638d9630 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Fri, 16 Feb 2024 11:45:23 +1100 Subject: [PATCH 47/50] Updated source to pass ruff. #103 --- benchcab/utils/__init__.py | 14 ++++++++------ benchcab/utils/pbs.py | 16 +++++++--------- ruff.toml | 1 + tests/test_pbs.py | 6 +++--- tests/test_utils.py | 20 ++++++++++---------- 5 files changed, 29 insertions(+), 28 deletions(-) diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index 58492ebf..11cbfd01 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -12,14 +12,10 @@ from importlib import resources from pathlib import Path from jinja2 import Environment, BaseLoader - import yaml # List of one-argument decoding functions. -PACKAGE_DATA_DECODERS = dict( - json=json.loads, - yml=yaml.safe_load -) +PACKAGE_DATA_DECODERS = dict(json=json.loads, yml=yaml.safe_load) def get_installed_root() -> Path: @@ -67,11 +63,14 @@ def interpolate_string_template(template, **kwargs): ---------- template : str Template string to interpolate over. + **kwargs : + Keyword arguments to interpolate into the string. Returns ------- str Interpolated string. + """ _template = Environment(loader=BaseLoader()).from_string(template) return _template.render(**kwargs) @@ -84,11 +83,14 @@ def interpolate_file_template(template_file, **kwargs): ---------- template_file : str Filepath slug in the benchcab data directory. + **kwargs : + Keyword arguments to interpolate into the file. Returns ------- str Interpolated template string. + """ template = load_package_data(template_file) return interpolate_string_template(template, **kwargs) @@ -133,4 +135,4 @@ def get_logger(name="benchcab", level="debug"): handler.setFormatter(formatter) logger.addHandler(handler) - return logger \ No newline at end of file + return logger diff --git a/benchcab/utils/pbs.py b/benchcab/utils/pbs.py index 892c5d50..ed767413 100644 --- a/benchcab/utils/pbs.py +++ b/benchcab/utils/pbs.py @@ -30,26 +30,24 @@ def render_job_script( This includes things such as running CABLE and running bitwise comparison jobs between model output files. """ - module_load_lines = "\n".join( - f"module load {module_name}" for module_name in modules - ) verbose_flag = " -v" if verbose else "" storage_flags = ["gdata/ks32", "gdata/hh5", "gdata/wd9", *pbs_config["storage"]] context = dict( modules=modules, verbose_flag=verbose_flag, - ncpus=pbs_config['ncpus'], - mem=pbs_config['mem'], - walltime=pbs_config['walltime'], + ncpus=pbs_config["ncpus"], + mem=pbs_config["mem"], + walltime=pbs_config["walltime"], project=project, - storage='+'.join(storage_flags), + storage="+".join(storage_flags), benchcab_path=benchcab_path, config_path=config_path, - skip_bitwise_cmp=skip_bitwise_cmp + skip_bitwise_cmp=skip_bitwise_cmp, ) - return interpolate_file_template('pbs_jobscript.j2', **context) + return interpolate_file_template("pbs_jobscript.j2", **context) + # return f"""#!/bin/bash # #PBS -l wd diff --git a/ruff.toml b/ruff.toml index 733b6572..5c742507 100644 --- a/ruff.toml +++ b/ruff.toml @@ -6,4 +6,5 @@ ignore = [ "D104", # D104 Missing docstring in public package "PLR0913", # PLR0913 Too many arguments in function definition "D100", # D100 Missing docstring in public module + "I001", # Import block is un-sorted or un-formatted ] \ No newline at end of file diff --git a/tests/test_pbs.py b/tests/test_pbs.py index 78f205fa..90c9a506 100644 --- a/tests/test_pbs.py +++ b/tests/test_pbs.py @@ -16,7 +16,7 @@ def test_default_job_script(self): modules=["foo", "bar", "baz"], pbs_config=internal.FLUXSITE_DEFAULT_PBS, benchcab_path="/absolute/path/to/benchcab", - ) == load_package_data('test/pbs_jobscript_default.sh') + ) == load_package_data("test/pbs_jobscript_default.sh") def test_verbose_flag_added_to_command_line_arguments(self): """Success case: test verbose flag is added to command line arguments.""" @@ -27,7 +27,7 @@ def test_verbose_flag_added_to_command_line_arguments(self): pbs_config=internal.FLUXSITE_DEFAULT_PBS, verbose=True, benchcab_path="/absolute/path/to/benchcab", - ) == load_package_data('test/pbs_jobscript_verbose.sh') + ) == load_package_data("test/pbs_jobscript_verbose.sh") def test_skip_bitwise_comparison_step(self): """Success case: skip fluxsite-bitwise-cmp step.""" @@ -38,4 +38,4 @@ def test_skip_bitwise_comparison_step(self): pbs_config=internal.FLUXSITE_DEFAULT_PBS, skip_bitwise_cmp=True, benchcab_path="/absolute/path/to/benchcab", - ) == load_package_data('test/pbs_jobscript_skip_bitwise.sh') \ No newline at end of file + ) == load_package_data("test/pbs_jobscript_skip_bitwise.sh") diff --git a/tests/test_utils.py b/tests/test_utils.py index fedc4768..a99fadd0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -19,31 +19,31 @@ def test_load_package_data_pass(): def test_load_package_data_fail(): """Test load_package_data() fails as expected.""" with pytest.raises(FileNotFoundError): - missing = bu.load_package_data('config-missing.yml') + _ = bu.load_package_data("config-missing.yml") def test_interpolate_string_template_pass(): """Test interpolate_string_template() passes as expected.""" - result = bu.interpolate_string_template('I should {{status}}', status='pass') - assert result == 'I should pass' + result = bu.interpolate_string_template("I should {{status}}", status="pass") + assert result == "I should pass" def test_interpolate_string_template_fail(): """Test interpolate_string_template() fails as expected.""" - result = bu.interpolate_string_template('I should {{status}}', status='fail') - assert result != 'I should not pass' + result = bu.interpolate_string_template("I should {{status}}", status="fail") + assert result != "I should not pass" def test_interpolate_file_template_pass(): """Test interpolate_file_template() passes as expected.""" - result = bu.interpolate_file_template('test/template.j2', myarg='PASS') - assert result == 'This is a template. PASS' + result = bu.interpolate_file_template("test/template.j2", myarg="PASS") + assert result == "This is a template. PASS" def test_interpolate_file_template_fail(): """Test interpolate_file_template() fails as expected.""" - result = bu.interpolate_file_template('test/template.j2', notmyarg='PASS') - assert result != 'This is a template. PASS' + result = bu.interpolate_file_template("test/template.j2", notmyarg="PASS") + assert result != "This is a template. PASS" def test_get_logger_singleton_pass(): @@ -59,4 +59,4 @@ def test_get_logger_singleton_fail(): logger1 = bu.get_logger(name="benchcab") logger2 = bu.get_logger(name="benchcab2") - assert logger1 is not logger2 \ No newline at end of file + assert logger1 is not logger2 From c9f14277421999ebb3122a5d4c4e65a18ca71458 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Fri, 16 Feb 2024 12:34:56 +1100 Subject: [PATCH 48/50] Removed commented code. #251 --- benchcab/utils/pbs.py | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/benchcab/utils/pbs.py b/benchcab/utils/pbs.py index ed767413..56e9d4a1 100644 --- a/benchcab/utils/pbs.py +++ b/benchcab/utils/pbs.py @@ -46,27 +46,4 @@ def render_job_script( skip_bitwise_cmp=skip_bitwise_cmp, ) - return interpolate_file_template("pbs_jobscript.j2", **context) - - -# return f"""#!/bin/bash -# #PBS -l wd -# #PBS -l ncpus={pbs_config["ncpus"]} -# #PBS -l mem={pbs_config["mem"]} -# #PBS -l walltime={pbs_config["walltime"]} -# #PBS -q normal -# #PBS -P {project} -# #PBS -j oe -# #PBS -m e -# #PBS -l storage={'+'.join(storage_flags)} - -# module purge -# {module_load_lines} - -# set -ev - -# {benchcab_path} fluxsite-run-tasks --config={config_path} {verbose_flag} -# {'' if skip_bitwise_cmp else f''' -# {benchcab_path} fluxsite-bitwise-cmp --config={config_path} {verbose_flag} -# ''' } -# """ + return interpolate_file_template("pbs_jobscript.j2", **context) \ No newline at end of file From 9bd29102d8edd692e50c37463397e50dd057b121 Mon Sep 17 00:00:00 2001 From: "C. Carouge" Date: Tue, 20 Feb 2024 10:56:28 +1100 Subject: [PATCH 49/50] Ruff linting fixes --- benchcab/fluxsite.py | 2 +- benchcab/utils/__init__.py | 3 ++- benchcab/utils/pbs.py | 3 ++- ruff.toml | 1 - 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/benchcab/fluxsite.py b/benchcab/fluxsite.py index 14ea70a1..1bf62764 100644 --- a/benchcab/fluxsite.py +++ b/benchcab/fluxsite.py @@ -44,7 +44,7 @@ def deep_update(mapping: Dict[KeyType, Any], *updating_mappings: Dict[KeyType, A ------- Dict[KeyType, Any] Updated mapping. - + """ updated_mapping = mapping.copy() for updating_mapping in updating_mappings: diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index 11cbfd01..e0d7ed1d 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -11,8 +11,9 @@ import sys from importlib import resources from pathlib import Path -from jinja2 import Environment, BaseLoader + import yaml +from jinja2 import BaseLoader, Environment # List of one-argument decoding functions. PACKAGE_DATA_DECODERS = dict(json=json.loads, yml=yaml.safe_load) diff --git a/benchcab/utils/pbs.py b/benchcab/utils/pbs.py index 56e9d4a1..2ae8a4fe 100644 --- a/benchcab/utils/pbs.py +++ b/benchcab/utils/pbs.py @@ -4,6 +4,7 @@ """Contains helper functions for manipulating PBS job scripts.""" from typing import TypedDict + from benchcab.utils import interpolate_file_template @@ -46,4 +47,4 @@ def render_job_script( skip_bitwise_cmp=skip_bitwise_cmp, ) - return interpolate_file_template("pbs_jobscript.j2", **context) \ No newline at end of file + return interpolate_file_template("pbs_jobscript.j2", **context) diff --git a/ruff.toml b/ruff.toml index 5c742507..733b6572 100644 --- a/ruff.toml +++ b/ruff.toml @@ -6,5 +6,4 @@ ignore = [ "D104", # D104 Missing docstring in public package "PLR0913", # PLR0913 Too many arguments in function definition "D100", # D100 Missing docstring in public module - "I001", # Import block is un-sorted or un-formatted ] \ No newline at end of file From 623c0a3697f9425752cf8919154dac25d4d2e950 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Wed, 21 Feb 2024 10:19:04 +1100 Subject: [PATCH 50/50] Fixed typing, another ruff complaint. Fixes #251 --- benchcab/utils/__init__.py | 8 +++++++- tests/test_pbs.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index e0d7ed1d..5d67dc7f 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -11,6 +11,7 @@ import sys from importlib import resources from pathlib import Path +from typing import Union import yaml from jinja2 import BaseLoader, Environment @@ -31,7 +32,7 @@ def get_installed_root() -> Path: return Path(resources.files("benchcab")) -def load_package_data(filename: str) -> dict: +def load_package_data(filename: str) -> Union[str, dict]: """Load data out of the installed package data directory. Parameters @@ -39,6 +40,11 @@ def load_package_data(filename: str) -> dict: filename : str Filename of the file to load out of the data directory. + Returns + ------- + str or dict + String or dictionary, depending on format of data read. + """ # Work out the encoding of requested file. ext = filename.split(".")[-1] diff --git a/tests/test_pbs.py b/tests/test_pbs.py index 90c9a506..fb84edd3 100644 --- a/tests/test_pbs.py +++ b/tests/test_pbs.py @@ -1,8 +1,8 @@ """`pytest` tests for `utils/pbs.py`.""" from benchcab import internal -from benchcab.utils.pbs import render_job_script from benchcab.utils import load_package_data +from benchcab.utils.pbs import render_job_script class TestRenderJobScript: