From e143205f1f9073b197502ca0e09dad74feebf0a7 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Fri, 6 Oct 2023 11:22:26 +1100 Subject: [PATCH 1/6] Config validation (Fixes #143). I have also implemented the data installation part of #151. --- .conda/benchcab-dev.yaml | 2 +- .conda/meta.yaml | 1 + benchcab/config.py | 213 ++++++---------- benchcab/data/config-schema.json | 89 +++++++ benchcab/data/config-schema.yml | 67 ++++++ benchcab/data/test/config-invalid.yml | 19 ++ benchcab/data/test/config-valid.yml | 35 +++ benchcab/utils/__init__.py | 50 ++++ setup.cfg | 5 +- tests/test_config.py | 334 ++------------------------ tests/test_utils.py | 23 ++ 11 files changed, 389 insertions(+), 449 deletions(-) create mode 100644 benchcab/data/config-schema.json create mode 100644 benchcab/data/config-schema.yml create mode 100644 benchcab/data/test/config-invalid.yml create mode 100644 benchcab/data/test/config-valid.yml create mode 100644 tests/test_utils.py diff --git a/.conda/benchcab-dev.yaml b/.conda/benchcab-dev.yaml index 6be15d1e..2652f852 100644 --- a/.conda/benchcab-dev.yaml +++ b/.conda/benchcab-dev.yaml @@ -6,7 +6,7 @@ dependencies: - python=3.9 - f90nml - netcdf4 - - numpy - pytest-cov - pyyaml - flatdict + - cerberus>=1.3.5 \ No newline at end of file diff --git a/.conda/meta.yaml b/.conda/meta.yaml index 0ba08259..6745ab42 100644 --- a/.conda/meta.yaml +++ b/.conda/meta.yaml @@ -26,3 +26,4 @@ requirements: - PyYAML - f90nml - flatdict + - cerberus >= 1.3.5 diff --git a/benchcab/config.py b/benchcab/config.py index eccf1605..ab8b3a17 100644 --- a/benchcab/config.py +++ b/benchcab/config.py @@ -1,153 +1,92 @@ """A module containing all *_config() functions.""" - from pathlib import Path import yaml - from benchcab import internal +from cerberus import Validator +import benchcab.utils as bu + + +class ConfigValidationException(Exception): + + def __init__(self, validator: Validator): + """Config validation exception. + + Parameters + ---------- + validator: cerberus.Validator + A validation object that has been used and has the errors attribute. + """ + + # Nicely format the errors. + errors = [f'{k} = {v}' for k, v in validator.errors.items()] + + # Assemble the error message and + msg = '\n\nThe following errors were raised when validating the config file.\n' + msg += '\n'.join(errors) + '\n' + # Raise to super. + super().__init__(msg) -def check_config(config: dict): - """Performs input validation on config file. - If the config is invalid, an exception is raised. Otherwise, do nothing. +def validate_config(config: dict) -> bool: + """Validate the configuration dictionary. + + Parameters + ---------- + config : dict + Dictionary of configuration loaded from the yaml file. + + Returns + ------- + bool + True if valid, exception raised otherwise. + + Raises + ------ + ConfigValidationException + Raised when the configuration file fails validation. """ + + # Load the schema + schema = bu.load_package_data('config-schema.yml') + + # Create a validator + v = Validator(schema) - if any(key not in config for key in internal.CONFIG_REQUIRED_KEYS): - raise ValueError( - "Keys are missing from the config file: " - + ", ".join( - key for key in internal.CONFIG_REQUIRED_KEYS if key not in config - ) - ) - - if not isinstance(config["project"], str): - raise TypeError("The 'project' key must be a string.") - - if not isinstance(config["modules"], list): - raise TypeError("The 'modules' key must be a list.") - - if not isinstance(config["experiment"], str): - raise TypeError("The 'experiment' key must be a string.") - - # the "science_configurations" key is optional - if "science_configurations" in config: - if not isinstance(config["science_configurations"], list): - raise TypeError("The 'science_configurations' key must be a list.") - if config["science_configurations"] == []: - raise ValueError("The 'science_configurations' key cannot be empty.") - if not all( - isinstance(value, dict) for value in config["science_configurations"] - ): - raise TypeError( - "Science config settings must be specified using a dictionary " - "that is compatible with the f90nml python package." - ) - - # the "fluxsite" key is optional - if "fluxsite" in config: - if not isinstance(config["fluxsite"], dict): - raise TypeError("The 'fluxsite' key must be a dictionary.") - # the "pbs" key is optional - if "pbs" in config["fluxsite"]: - if not isinstance(config["fluxsite"]["pbs"], dict): - raise TypeError("The 'pbs' key must be a dictionary.") - # the "ncpus" key is optional - if "ncpus" in config["fluxsite"]["pbs"] and not isinstance( - config["fluxsite"]["pbs"]["ncpus"], int - ): - raise TypeError("The 'ncpus' key must be an integer.") - # the "mem" key is optional - if "mem" in config["fluxsite"]["pbs"] and not isinstance( - config["fluxsite"]["pbs"]["mem"], str - ): - raise TypeError("The 'mem' key must be a string.") - # the "walltime" key is optional - if "walltime" in config["fluxsite"]["pbs"] and not isinstance( - config["fluxsite"]["pbs"]["walltime"], str - ): - raise TypeError("The 'walltime' key must be a string.") - # the "storage" key is optional - if "storage" in config["fluxsite"]["pbs"]: - if not isinstance(config["fluxsite"]["pbs"]["storage"], list) or any( - not isinstance(val, str) - for val in config["fluxsite"]["pbs"]["storage"] - ): - raise TypeError("The 'storage' key must be a list of strings.") - # the "multiprocessing" key is optional - if "multiprocessing" in config["fluxsite"] and not isinstance( - config["fluxsite"]["multiprocessing"], bool - ): - raise TypeError("The 'multiprocessing' key must be a boolean.") - - valid_experiments = ( - list(internal.MEORG_EXPERIMENTS) + internal.MEORG_EXPERIMENTS["five-site-test"] - ) - if config["experiment"] not in valid_experiments: - raise ValueError( - "The 'experiment' key is invalid.\n" - "Valid experiments are: " + ", ".join(valid_experiments) - ) - - if not isinstance(config["realisations"], list): - raise TypeError("The 'realisations' key must be a list.") - - if config["realisations"] == []: - raise ValueError("The 'realisations' key cannot be empty.") - - for branch_id, branch_config in enumerate(config["realisations"]): - if not isinstance(branch_config, dict): - raise TypeError(f"Realisation '{branch_id}' must be a dictionary object.") - if "path" not in branch_config: - raise ValueError( - f"Realisation '{branch_id}' must specify the `path` field." - ) - if not isinstance(branch_config["path"], str): - raise TypeError( - f"The 'path' field in realisation '{branch_id}' must be a string." - ) - # the "name" key is optional - if "name" in branch_config and not isinstance(branch_config["name"], str): - raise TypeError( - f"The 'name' field in realisation '{branch_id}' must be a string." - ) - # the "revision" key is optional - if "revision" in branch_config and not isinstance( - branch_config["revision"], int - ): - raise TypeError( - f"The 'revision' field in realisation '{branch_id}' must be an " - "integer." - ) - # the "patch" key is optional - if "patch" in branch_config and not isinstance(branch_config["patch"], dict): - raise TypeError( - f"The 'patch' field in realisation '{branch_id}' must be a " - "dictionary that is compatible with the f90nml python package." - ) - # the "patch_remove" key is optional - if "patch_remove" in branch_config and not isinstance( - branch_config["patch_remove"], dict - ): - raise TypeError( - f"The 'patch_remove' field in realisation '{branch_id}' must be a " - "dictionary that is compatible with the f90nml python package." - ) - # the "build_script" key is optional - if "build_script" in branch_config and not isinstance( - branch_config["build_script"], str - ): - raise TypeError( - f"The 'build_script' field in realisation '{branch_id}' must be a " - "string." - ) + # Validate + is_valid = v.validate(config) + + # Valid + if is_valid: + return True + + # Invalid + raise ConfigValidationException(v) def read_config(config_path: str) -> dict: - """Reads the config file and returns a dictionary containing the configurations.""" + """Reads the config file and returns a dictionary containing the configurations. + + Parameters + ---------- + config_path : str + Path to the configuration file. + + Returns + ------- + dict + Configuration dict. + + Raises + ------ + ConfigValidationError + Raised when the configuration file fails validation. + """ + # Load the configuration file. with open(Path(config_path), "r", encoding="utf-8") as file: config = yaml.safe_load(file) - check_config(config) - - return config + # Validate and return. + validate_config(config) + return config \ No newline at end of file diff --git a/benchcab/data/config-schema.json b/benchcab/data/config-schema.json new file mode 100644 index 00000000..1c4b468f --- /dev/null +++ b/benchcab/data/config-schema.json @@ -0,0 +1,89 @@ +{ + "project": { + "type": "integer" + }, + "modules": { + "type": "list", + "schema": { + "type": "string" + } + }, + "experiment": { + "type": "string", + "allowed": [ + "five-site-test", + "forty-two-site-test", + "AU-Tum", + "AU-How", + "FI-Hyy", + "US-Var", + "US-Whs" + ] + }, + "science_configurations": { + "type": "list", + "schema": { + "type": "dict" + } + }, + "realisations": { + "type": "list", + "required": true, + "schema": { + "type": "dict", + "schema": { + "path": { + "type": "string" + }, + "name": { + "type": "string", + "required": false + }, + "build_script": { + "type": "string", + "required": false + }, + "revision": { + "type": "string", + "required": false + }, + "patch": { + "type": "dict", + "required": false + } + } + } + }, + "fluxsite": { + "type": "dict", + "required": false, + "schema": { + "multiprocessing": { + "type": "boolean" + }, + "pbs": { + "type": "dict", + "schema": { + "ncpus": { + "type": "integer", + "required": false + }, + "mem": { + "type": "integer", + "required": false + }, + "walltime": { + "type": "string", + "required": false + }, + "storage": { + "type": "list", + "schema": { + "type": "string" + } + } + } + } + } + } +} diff --git a/benchcab/data/config-schema.yml b/benchcab/data/config-schema.yml new file mode 100644 index 00000000..37ff37d5 --- /dev/null +++ b/benchcab/data/config-schema.yml @@ -0,0 +1,67 @@ +project: + type: "string" + +modules: + type: "list" + schema: + type: "string" + +experiment: + type: "string" + allowed: [ + "five-site-test", + "forty-two-site-test", + "AU-Tum", + "AU-How", + "FI-Hyy", + "US-Var", + "US-Whs" + ] + +science_configuration: + type: "list" + schema: + type: "dict" + +realisations: + type: "list" + required: true + schema: + type: "dict" + schema: + path: + type: "string" + name: + type: "string" + required: false + build_script: + type: "string" + required: false + revision: + type: "string" + required: false + patch: + type: "dict" + required: false + +fluxsite: + type: "dict" + required: false + schema: + multiprocessing: + type: "boolean" + pbs: + type: "dict" + schema: + ncpus: + type: "integer" + required: false + mem: + type: "integer" + required: false + walltime: + type: "integer" + required: false + storage: + type: "integer" + required: false \ No newline at end of file diff --git a/benchcab/data/test/config-invalid.yml b/benchcab/data/test/config-invalid.yml new file mode 100644 index 00000000..0e2f6e6b --- /dev/null +++ b/benchcab/data/test/config-invalid.yml @@ -0,0 +1,19 @@ +# A sample configuration that should fail validation. +project: w97 + +experiment: NON EXISTENT EXPERIMENT!!! + +realisations: [ + { + path: "trunk", + }, + { + path: "branches/Users/ccc561/v3.0-YP-changes", + } +] + +modules: [ + intel-compiler/2021.1.1, + netcdf/4.7.4, + openmpi/4.1.0 +] \ No newline at end of file diff --git a/benchcab/data/test/config-valid.yml b/benchcab/data/test/config-valid.yml new file mode 100644 index 00000000..902a3249 --- /dev/null +++ b/benchcab/data/test/config-valid.yml @@ -0,0 +1,35 @@ +# Example configuration file for running the CABLE benchmarking. +# +# Note, optional keys are available in this config file. See +# https://benchcab.readthedocs.io/en/stable/user_guide/config_options/ +# for documentation on all the available keys. +# +# This file is in YAML format. You can get information on the syntax here: +# https://yaml.org/spec/1.2.2/#chapter-2-language-overview +# Quick tips: +# You need a space after the ":" +# +# It uses the same syntax as Python for: +# - lists (aka sequences) +# - dictionaries (aka mappings with key/value pairs) +# +# Strings can be given with or without double or single quotes. + +project: w97 + +experiment: five-site-test + +realisations: [ + { + path: "trunk", + }, + { + path: "branches/Users/ccc561/v3.0-YP-changes", + } +] + +modules: [ + intel-compiler/2021.1.1, + netcdf/4.7.4, + openmpi/4.1.0 +] \ No newline at end of file diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index e69de29b..d8b84f32 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -0,0 +1,50 @@ +"""Top-level utilities.""" +import pkgutil +import json +import yaml +import os +import importlib +from pathlib import Path + + +# List of one-argument decoding functions. +PACKAGE_DATA_DECODERS = dict( + json=json.loads, + yml=yaml.safe_load +) + + +def get_installed_root() -> Path: + """Get the installed root of the benchcab installation. + + Returns + ------- + Path + Path to the installed root. + """ + return Path(importlib.resources.files('benchcab')) + + + +def load_package_data(filename: str) -> dict: + """Load data out of the installed package data directory. + + Parameters + ---------- + filename : str + Filename of the file to load out of the data directory. + """ + # Work out the encoding of requested file. + ext = filename.split('.')[-1] + + # Alias yaml and yml. + ext = ext if ext != 'yaml' else 'yml' + + # Make sure it is one of the supported types. + assert ext in PACKAGE_DATA_DECODERS.keys() + + # Extract from the installations data directory. + raw = pkgutil.get_data('benchcab', os.path.join('data', filename)).decode('utf-8') + + # Decode and return. + return PACKAGE_DATA_DECODERS[ext](raw) \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 7b5ebde0..2a878959 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,7 @@ [metadata] name=benchcab summary= Software to run a benchmarking suite for CABLE LSM -version=2.0.0 +# version=2.0.0 description=To benchmark CABLE simulations url=https://github.com/CABLE-LSM/benchcab license=Apache 2.0 @@ -21,3 +21,6 @@ console_scripts = [tool:pytest] addopts = --doctest-modules --doctest-glob='*.rst' --ignore setup.py --ignore conftest.py --ignore docs/conf.py + +[options] +include_package_data = True \ No newline at end of file diff --git a/tests/test_config.py b/tests/test_config.py index 19ae4b54..b76de27c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,321 +1,35 @@ """`pytest` tests for config.py""" - -import os import pytest -import yaml - -from tests.common import TMP_DIR -from tests.common import get_mock_config -from benchcab.config import check_config, read_config -from benchcab import internal - - -def test_check_config(): - """Tests for `check_config()`.""" - - # Success case: test barebones config is valid - config = get_mock_config() - check_config(config) - - # Success case: branch configuration with missing name key - config = get_mock_config() - config["realisations"][0].pop("name") - check_config(config) - - # Success case: branch configuration with missing revision key - config = get_mock_config() - config["realisations"][0].pop("revision") - check_config(config) - - # Success case: branch configuration with missing patch key - config = get_mock_config() - config["realisations"][0].pop("patch") - check_config(config) - - # Success case: branch configuration with missing patch_remove key - config = get_mock_config() - config["realisations"][0].pop("patch_remove") - check_config(config) - - # Success case: test config when realisations contains more than two keys - config = get_mock_config() - config["realisations"].append({"path": "path/to/my_new_branch"}) - assert len(config["realisations"]) > 2 - check_config(config) - - # Success case: test config when realisations contains less than two keys - config = get_mock_config() - config["realisations"].pop() - assert len(config["realisations"]) < 2 - check_config(config) - - # Success case: test experiment with site id from the - # five-site-test is valid - config = get_mock_config() - config["experiment"] = "AU-Tum" - check_config(config) - - # Success case: test config without science_configurations is valid - config = get_mock_config() - config.pop("science_configurations") - check_config(config) - - # Success case: test config without fluxsite key is valid - config = get_mock_config() - config.pop("fluxsite") - check_config(config) - - # Success case: test config without multiprocessing key is valid - config = get_mock_config() - config["fluxsite"].pop("multiprocessing") - check_config(config) - - # Success case: test config without pbs key is valid - config = get_mock_config() - config["fluxsite"].pop("pbs") - check_config(config) - - # Success case: test config without ncpus key is valid - config = get_mock_config() - config["fluxsite"]["pbs"].pop("ncpus") - check_config(config) - - # Success case: test config without mem key is valid - config = get_mock_config() - config["fluxsite"]["pbs"].pop("mem") - check_config(config) - - # Success case: test config without walltime key is valid - config = get_mock_config() - config["fluxsite"]["pbs"].pop("walltime") - check_config(config) - - # Success case: test config without storage key is valid - config = get_mock_config() - config["fluxsite"]["pbs"].pop("storage") - check_config(config) - - # Failure case: test missing required keys raises an exception - with pytest.raises( - ValueError, - match="Keys are missing from the config file: project, experiment", - ): - config = get_mock_config() - config.pop("project") - config.pop("experiment") - check_config(config) - - # Failure case: test config with empty realisations key raises an exception - with pytest.raises(ValueError, match="The 'realisations' key cannot be empty."): - config = get_mock_config() - config["realisations"] = [] - check_config(config) - - # Failure case: test config with invalid experiment key raises an exception - with pytest.raises( - ValueError, - match="The 'experiment' key is invalid.\n" - "Valid experiments are: " - + ", ".join( - list(internal.MEORG_EXPERIMENTS) - + internal.MEORG_EXPERIMENTS["five-site-test"] - ), - ): - config = get_mock_config() - config["experiment"] = "foo" - check_config(config) - - # Failure case: test config with invalid experiment key (not a subset of - # five-site-test) raises an exception - with pytest.raises( - ValueError, - match="The 'experiment' key is invalid.\n" - "Valid experiments are: " - + ", ".join( - list(internal.MEORG_EXPERIMENTS) - + internal.MEORG_EXPERIMENTS["five-site-test"] - ), - ): - config = get_mock_config() - config["experiment"] = "CH-Dav" - check_config(config) - - # Failure case: 'path' key is missing in branch configuration - with pytest.raises( - ValueError, match="Realisation '1' must specify the `path` field." - ): - config = get_mock_config() - config["realisations"][1].pop("path") - check_config(config) - - # Failure case: test config with empty science_configurations key - # raises an exception - with pytest.raises( - ValueError, match="The 'science_configurations' key cannot be empty." - ): - config = get_mock_config() - config["science_configurations"] = [] - check_config(config) - - # Failure case: project key is not a string - with pytest.raises(TypeError, match="The 'project' key must be a string."): - config = get_mock_config() - config["project"] = 123 - check_config(config) - - # Failure case: realisations key is not a list - with pytest.raises(TypeError, match="The 'realisations' key must be a list."): - config = get_mock_config() - config["realisations"] = {"foo": "bar"} - check_config(config) - - # Failure case: realisations key is not a list of dict - with pytest.raises(TypeError, match="Realisation '0' must be a dictionary object."): - config = get_mock_config() - config["realisations"] = ["foo"] - check_config(config) - - # Failure case: type of name is not a string - with pytest.raises( - TypeError, match="The 'name' field in realisation '1' must be a string." - ): - config = get_mock_config() - config["realisations"][1]["name"] = 1234 - check_config(config) - - # Failure case: type of path is not a string - with pytest.raises( - TypeError, match="The 'path' field in realisation '1' must be a string." - ): - config = get_mock_config() - config["realisations"][1]["path"] = 1234 - check_config(config) - - # Failure case: type of revision key is not an integer - with pytest.raises( - TypeError, match="The 'revision' field in realisation '1' must be an integer." - ): - config = get_mock_config() - config["realisations"][1]["revision"] = "-1" - check_config(config) - - # Failure case: type of patch key is not a dictionary - with pytest.raises( - TypeError, - match="The 'patch' field in realisation '1' must be a dictionary that is " - "compatible with the f90nml python package.", - ): - config = get_mock_config() - config["realisations"][1]["patch"] = r"cable_user%ENABLE_SOME_FEATURE = .FALSE." - check_config(config) - - # Failure case: type of patch_remove key is not a dictionary - with pytest.raises( - TypeError, - match="The 'patch_remove' field in realisation '1' must be a dictionary that is " - "compatible with the f90nml python package.", - ): - config = get_mock_config() - config["realisations"][1]["patch_remove"] = r"cable_user%ENABLE_SOME_FEATURE" - check_config(config) - - # Failure case: type of build_script key is not a string - with pytest.raises( - TypeError, match="The 'build_script' field in realisation '1' must be a string." - ): - config = get_mock_config() - config["realisations"][1]["build_script"] = ["echo", "hello"] - check_config(config) - - # Failure case: modules key is not a list - with pytest.raises(TypeError, match="The 'modules' key must be a list."): - config = get_mock_config() - config["modules"] = "netcdf" - check_config(config) - - # Failure case: experiment key is not a string - with pytest.raises(TypeError, match="The 'experiment' key must be a string."): - config = get_mock_config() - config["experiment"] = 0 - check_config(config) - - # Failure case: type of config["science_configurations"] is not a list - with pytest.raises( - TypeError, match="The 'science_configurations' key must be a list." - ): - config = get_mock_config() - config["science_configurations"] = r"cable_user%GS_SWITCH = 'medlyn'" - check_config(config) - - # Failure case: type of config["science_configurations"] is not a list of dict - with pytest.raises( - TypeError, - match="Science config settings must be specified using a dictionary " - "that is compatible with the f90nml python package.", - ): - config = get_mock_config() - config["science_configurations"] = [r"cable_user%GS_SWITCH = 'medlyn'"] - check_config(config) - - # Failure case: type of config["fluxsite"] is not a dict - with pytest.raises(TypeError, match="The 'fluxsite' key must be a dictionary."): - config = get_mock_config() - config["fluxsite"] = ["ncpus: 16\nmem: 64GB\n"] - check_config(config) - - # Failure case: type of config["pbs"] is not a dict - with pytest.raises(TypeError, match="The 'pbs' key must be a dictionary."): - config = get_mock_config() - config["fluxsite"]["pbs"] = "-l ncpus=16" - check_config(config) - - # Failure case: type of config["pbs"]["ncpus"] is not an int - with pytest.raises(TypeError, match="The 'ncpus' key must be an integer."): - config = get_mock_config() - config["fluxsite"]["pbs"]["ncpus"] = "16" - check_config(config) - - # Failure case: type of config["pbs"]["mem"] is not a string - with pytest.raises(TypeError, match="The 'mem' key must be a string."): - config = get_mock_config() - config["fluxsite"]["pbs"]["mem"] = 64 - check_config(config) +import benchcab.utils as bu +import benchcab.config as bc - # Failure case: type of config["pbs"]["walltime"] is not a string - with pytest.raises(TypeError, match="The 'walltime' key must be a string."): - config = get_mock_config() - config["fluxsite"]["pbs"]["walltime"] = 60 - check_config(config) - # Failure case: type of config["pbs"]["storage"] is not a list - with pytest.raises(TypeError, match="The 'storage' key must be a list of strings."): - config = get_mock_config() - config["fluxsite"]["pbs"]["storage"] = "gdata/foo+gdata/bar" - check_config(config) +def test_read_config_pass(): + """Test read_config() passes as expected.""" + existent_path = bu.get_installed_root() / 'data' / 'test' / 'config-valid.yml' + + # Test for a path that exists + config = bc.read_config(existent_path) + assert config - # Failure case: type of config["pbs"]["storage"] is not a list of strings - with pytest.raises(TypeError, match="The 'storage' key must be a list of strings."): - config = get_mock_config() - config["fluxsite"]["pbs"]["storage"] = [1, 2, 3] - check_config(config) - # Failure case: type of config["multiprocessing"] is not a bool - with pytest.raises(TypeError, match="The 'multiprocessing' key must be a boolean."): - config = get_mock_config() - config["fluxsite"]["multiprocessing"] = 1 - check_config(config) +def test_read_config_fail(): + """Test that read_config() fails as expected.""" + nonexistent_path = bu.get_installed_root() / 'data' / 'test' / 'config-missing.yml' + # Test for a path that does not exist. + with pytest.raises(FileNotFoundError): + config = bc.read_config(nonexistent_path) -def test_read_config(): - """Tests for `read_config()`.""" - # Success case: write config to file, then read config from file - config = get_mock_config() - filename = TMP_DIR / "config-barebones.yaml" +def test_validate_config_valid(): + """Test validate_config() for a valid confiog file.""" + valid_config = bu.load_package_data('test/config-valid.yml') + assert bc.validate_config(valid_config) - with open(filename, "w", encoding="utf-8") as file: - yaml.dump(config, file) - res = read_config(filename) - os.remove(filename) - assert config == res +def test_validate_config_invalid(): + """Test validate_config() for an invalid config file.""" + invalid_config = bu.load_package_data('test/config-invalid.yml') + with pytest.raises(bc.ConfigValidationException): + bc.validate_config(invalid_config) \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..af8d9554 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,23 @@ +"""Tests for utilities.""" +import pytest +import benchcab.utils as bu + + +def test_get_installed_root(): + """Test get_installed_root().""" + + # Test it actually returns something. We should be able to mock this. + assert bu.get_installed_root() + + +def test_load_package_data_pass(): + """Test load_package_data() passes as expected.""" + + assert isinstance(bu.load_package_data('config-schema.yml'), dict) + + +def test_load_package_data_fail(): + """Test load_package_data() fails as expected.""" + + with pytest.raises(FileNotFoundError): + missing = bu.load_package_data('config-missing.yml'), dict() \ No newline at end of file From d0379a800739a9473b499253f9ae7b3d4b0dbe6c Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Fri, 6 Oct 2023 11:23:54 +1100 Subject: [PATCH 2/6] Removing JSON version of schema, YML is cleaner. --- benchcab/data/config-schema.json | 89 -------------------------------- 1 file changed, 89 deletions(-) delete mode 100644 benchcab/data/config-schema.json diff --git a/benchcab/data/config-schema.json b/benchcab/data/config-schema.json deleted file mode 100644 index 1c4b468f..00000000 --- a/benchcab/data/config-schema.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "project": { - "type": "integer" - }, - "modules": { - "type": "list", - "schema": { - "type": "string" - } - }, - "experiment": { - "type": "string", - "allowed": [ - "five-site-test", - "forty-two-site-test", - "AU-Tum", - "AU-How", - "FI-Hyy", - "US-Var", - "US-Whs" - ] - }, - "science_configurations": { - "type": "list", - "schema": { - "type": "dict" - } - }, - "realisations": { - "type": "list", - "required": true, - "schema": { - "type": "dict", - "schema": { - "path": { - "type": "string" - }, - "name": { - "type": "string", - "required": false - }, - "build_script": { - "type": "string", - "required": false - }, - "revision": { - "type": "string", - "required": false - }, - "patch": { - "type": "dict", - "required": false - } - } - } - }, - "fluxsite": { - "type": "dict", - "required": false, - "schema": { - "multiprocessing": { - "type": "boolean" - }, - "pbs": { - "type": "dict", - "schema": { - "ncpus": { - "type": "integer", - "required": false - }, - "mem": { - "type": "integer", - "required": false - }, - "walltime": { - "type": "string", - "required": false - }, - "storage": { - "type": "list", - "schema": { - "type": "string" - } - } - } - } - } - } -} From d71de8168991fbb72e76909e044ace62140e2117 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Mon, 9 Oct 2023 09:53:10 +1100 Subject: [PATCH 3/6] Pinned cerberus version in meta now that it has been published. Fixes #143. --- .conda/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.conda/meta.yaml b/.conda/meta.yaml index 6745ab42..21cd03c7 100644 --- a/.conda/meta.yaml +++ b/.conda/meta.yaml @@ -26,4 +26,4 @@ requirements: - PyYAML - f90nml - flatdict - - cerberus >= 1.3.5 + - cerberus >=1.3.5 From 0b36493f81fbd75c89829c8dc4d65ac6d7968609 Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Wed, 18 Oct 2023 12:24:02 +1100 Subject: [PATCH 4/6] Added fixes for PR. Fixes #143 --- benchcab/data/config-schema.yml | 20 +++++++++++++++----- benchcab/utils/__init__.py | 3 --- setup.cfg | 2 +- tests/test_config.py | 2 +- tests/test_utils.py | 2 +- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/benchcab/data/config-schema.yml b/benchcab/data/config-schema.yml index 37ff37d5..a9325ac5 100644 --- a/benchcab/data/config-schema.yml +++ b/benchcab/data/config-schema.yml @@ -18,7 +18,7 @@ experiment: "US-Whs" ] -science_configuration: +science_configurations: type: "list" schema: type: "dict" @@ -43,6 +43,9 @@ realisations: patch: type: "dict" required: false + patch_remove: + type: "dict" + required: false fluxsite: type: "dict" @@ -50,6 +53,7 @@ fluxsite: schema: multiprocessing: type: "boolean" + required: false pbs: type: "dict" schema: @@ -57,11 +61,17 @@ fluxsite: type: "integer" required: false mem: - type: "integer" + type: "string" + regex: "^[0-9]+(?i)(mb|gb)$" required: false walltime: - type: "integer" + type: "string" + regex: "^[0-4][0-9]:[0-5][0-9]:[0-5][0-9]$" required: false storage: - type: "integer" - required: false \ No newline at end of file + type: list + required: false + schema: + type: "string" + required: false + \ No newline at end of file diff --git a/benchcab/utils/__init__.py b/benchcab/utils/__init__.py index 01510edb..59095572 100644 --- a/benchcab/utils/__init__.py +++ b/benchcab/utils/__init__.py @@ -39,9 +39,6 @@ def load_package_data(filename: str) -> dict: # Alias yaml and yml. ext = ext if ext != 'yaml' else 'yml' - # Make sure it is one of the supported types. - assert ext in PACKAGE_DATA_DECODERS.keys() - # Extract from the installations data directory. raw = pkgutil.get_data('benchcab', os.path.join('data', filename)).decode('utf-8') diff --git a/setup.cfg b/setup.cfg index 2a878959..13bc5ab3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,7 @@ [metadata] name=benchcab summary= Software to run a benchmarking suite for CABLE LSM -# version=2.0.0 +version=2.0.0 description=To benchmark CABLE simulations url=https://github.com/CABLE-LSM/benchcab license=Apache 2.0 diff --git a/tests/test_config.py b/tests/test_config.py index 95076507..b6e0d8a8 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -23,7 +23,7 @@ def test_read_config_fail(): def test_validate_config_valid(): - """Test validate_config() for a valid confiog file.""" + """Test validate_config() for a valid config file.""" valid_config = bu.load_package_data('test/config-valid.yml') assert bc.validate_config(valid_config) diff --git a/tests/test_utils.py b/tests/test_utils.py index af8d9554..848161db 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -20,4 +20,4 @@ def test_load_package_data_fail(): """Test load_package_data() fails as expected.""" with pytest.raises(FileNotFoundError): - missing = bu.load_package_data('config-missing.yml'), dict() \ No newline at end of file + missing = bu.load_package_data('config-missing.yml') \ No newline at end of file From 4368e222be0c05caa0b97c3d305b29ccb8358c6a Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Tue, 24 Oct 2023 12:06:53 +1100 Subject: [PATCH 5/6] Added integration tests. Fixes #143. --- benchcab/data/test/integration.sh | 38 +++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 benchcab/data/test/integration.sh diff --git a/benchcab/data/test/integration.sh b/benchcab/data/test/integration.sh new file mode 100644 index 00000000..f261217f --- /dev/null +++ b/benchcab/data/test/integration.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +set -ex + +TEST_DIR=/scratch/rp23/$USER/benchcab/integration +EXAMPLE_REPO="git@github.com:CABLE-LSM/bench_example.git" + +# Remove the test work space, then recreate +rm -rf $TEST_DIR +mkdir -p $TEST_DIR + +# Clone the example repo +git clone $EXAMPLE_REPO $TEST_DIR +cd $TEST_DIR +git reset --hard 6287539e96fc8ef36dc578201fbf9847314147fb + +cat > config.yaml << EOL +project: rp23 + +experiment: AU-Tum + +realisations: + - path: trunk + - path: branches/Users/sb8430/test-branch + +modules: [ + intel-compiler/2021.1.1, + netcdf/4.7.4, + openmpi/4.1.0 +] + +fluxsite: + pbs: + storage: + - scratch/rp23 +EOL + +benchcab run -v \ No newline at end of file From b8a92f935fad7b479acd0385e1505a9891bb45dd Mon Sep 17 00:00:00 2001 From: Ben Schroeter Date: Tue, 24 Oct 2023 12:51:26 +1100 Subject: [PATCH 6/6] Apply suggestions from code review Co-authored-by: Sean Bryan <39685865+SeanBryan51@users.noreply.github.com> --- benchcab/data/test/integration.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchcab/data/test/integration.sh b/benchcab/data/test/integration.sh index f261217f..f3440cbe 100644 --- a/benchcab/data/test/integration.sh +++ b/benchcab/data/test/integration.sh @@ -2,7 +2,7 @@ set -ex -TEST_DIR=/scratch/rp23/$USER/benchcab/integration +TEST_DIR=/scratch/$PROJECT/$USER/benchcab/integration EXAMPLE_REPO="git@github.com:CABLE-LSM/bench_example.git" # Remove the test work space, then recreate @@ -15,7 +15,7 @@ cd $TEST_DIR git reset --hard 6287539e96fc8ef36dc578201fbf9847314147fb cat > config.yaml << EOL -project: rp23 +project: $PROJECT experiment: AU-Tum @@ -32,7 +32,7 @@ modules: [ fluxsite: pbs: storage: - - scratch/rp23 + - scratch/$PROJECT EOL benchcab run -v \ No newline at end of file