Skip to content

Commit

Permalink
Merge pull request #185 from CABLE-LSM/143-configyaml-input-validation
Browse files Browse the repository at this point in the history
  • Loading branch information
bschroeter authored Oct 25, 2023
2 parents c5f6dc9 + b8a92f9 commit 92181f8
Show file tree
Hide file tree
Showing 11 changed files with 350 additions and 479 deletions.
1 change: 1 addition & 0 deletions .conda/benchcab-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ dependencies:
- pytest-cov
- pyyaml
- flatdict
- cerberus>=1.3.5
1 change: 1 addition & 0 deletions .conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ requirements:
- PyYAML
- f90nml
- flatdict
- cerberus >=1.3.5
241 changes: 82 additions & 159 deletions benchcab/config.py
Original file line number Diff line number Diff line change
@@ -1,170 +1,93 @@
"""A module containing all *_config() functions."""

from pathlib import Path

import yaml

from benchcab import internal
from cerberus import Validator
import benchcab.utils as bu


class ConfigValidationException(Exception):

def __init__(self, validator: Validator):
"""Config validation exception.
Parameters
----------
validator: cerberus.Validator
A validation object that has been used and has the errors attribute.
"""

# Nicely format the errors.
errors = [f'{k} = {v}' for k, v in validator.errors.items()]

# Assemble the error message and
msg = '\n\nThe following errors were raised when validating the config file.\n'
msg += '\n'.join(errors) + '\n'

def check_config(config: dict):
"""Performs input validation on config file.
# Raise to super.
super().__init__(msg)

If the config is invalid, an exception is raised. Otherwise, do nothing.

def validate_config(config: dict) -> bool:
"""Validate the configuration dictionary.
Parameters
----------
config : dict
Dictionary of configuration loaded from the yaml file.
Returns
-------
bool
True if valid, exception raised otherwise.
Raises
------
ConfigValidationException
Raised when the configuration file fails validation.
"""

# Load the schema
schema = bu.load_package_data('config-schema.yml')

# Create a validator
v = Validator(schema)

# Validate
is_valid = v.validate(config)

# Valid
if is_valid:
return True

# Invalid
raise ConfigValidationException(v)


def read_config(config_path: str) -> dict:
"""Reads the config file and returns a dictionary containing the configurations.
Parameters
----------
config_path : str
Path to the configuration file.
Returns
-------
dict
Configuration dict.
Raises
------
ConfigValidationError
Raised when the configuration file fails validation.
"""
if any(key not in config for key in internal.CONFIG_REQUIRED_KEYS):
raise ValueError(
"Keys are missing from the config file: "
+ ", ".join(
key for key in internal.CONFIG_REQUIRED_KEYS if key not in config
)
)

if not isinstance(config["project"], str):
msg = "The 'project' key must be a string."
raise TypeError(msg)

if not isinstance(config["modules"], list):
msg = "The 'modules' key must be a list."
raise TypeError(msg)

if not isinstance(config["experiment"], str):
msg = "The 'experiment' key must be a string."
raise TypeError(msg)

# the "science_configurations" key is optional
if "science_configurations" in config:
if not isinstance(config["science_configurations"], list):
msg = "The 'science_configurations' key must be a list."
raise TypeError(msg)
if config["science_configurations"] == []:
msg = "The 'science_configurations' key cannot be empty."
raise ValueError(msg)
if not all(
isinstance(value, dict) for value in config["science_configurations"]
):
msg = (
"Science config settings must be specified using a dictionary "
"that is compatible with the f90nml python package."
)
raise TypeError(msg)

# the "fluxsite" key is optional
if "fluxsite" in config:
if not isinstance(config["fluxsite"], dict):
msg = "The 'fluxsite' key must be a dictionary."
raise TypeError(msg)
# the "pbs" key is optional
if "pbs" in config["fluxsite"]:
if not isinstance(config["fluxsite"]["pbs"], dict):
msg = "The 'pbs' key must be a dictionary."
raise TypeError(msg)
# the "ncpus" key is optional
if "ncpus" in config["fluxsite"]["pbs"] and not isinstance(
config["fluxsite"]["pbs"]["ncpus"], int
):
msg = "The 'ncpus' key must be an integer."
raise TypeError(msg)
# the "mem" key is optional
if "mem" in config["fluxsite"]["pbs"] and not isinstance(
config["fluxsite"]["pbs"]["mem"], str
):
msg = "The 'mem' key must be a string."
raise TypeError(msg)
# the "walltime" key is optional
if "walltime" in config["fluxsite"]["pbs"] and not isinstance(
config["fluxsite"]["pbs"]["walltime"], str
):
msg = "The 'walltime' key must be a string."
raise TypeError(msg)
# the "storage" key is optional
if "storage" in config["fluxsite"]["pbs"]:
if not isinstance(config["fluxsite"]["pbs"]["storage"], list) or any(
not isinstance(val, str)
for val in config["fluxsite"]["pbs"]["storage"]
):
msg = "The 'storage' key must be a list of strings."
raise TypeError(msg)
# the "multiprocessing" key is optional
if "multiprocessing" in config["fluxsite"] and not isinstance(
config["fluxsite"]["multiprocessing"], bool
):
msg = "The 'multiprocessing' key must be a boolean."
raise TypeError(msg)

valid_experiments = (
list(internal.MEORG_EXPERIMENTS) + internal.MEORG_EXPERIMENTS["five-site-test"]
)
if config["experiment"] not in valid_experiments:
msg = (
"The 'experiment' key is invalid.\n"
"Valid experiments are: " + ", ".join(valid_experiments)
)
raise ValueError(msg)

if not isinstance(config["realisations"], list):
msg = "The 'realisations' key must be a list."
raise TypeError(msg)

if config["realisations"] == []:
msg = "The 'realisations' key cannot be empty."
raise ValueError(msg)

for branch_id, branch_config in enumerate(config["realisations"]):
if not isinstance(branch_config, dict):
msg = f"Realisation '{branch_id}' must be a dictionary object."
raise TypeError(msg)
if "path" not in branch_config:
msg = f"Realisation '{branch_id}' must specify the `path` field."
raise ValueError(msg)
if not isinstance(branch_config["path"], str):
msg = f"The 'path' field in realisation '{branch_id}' must be a string."
raise TypeError(msg)
# the "name" key is optional
if "name" in branch_config and not isinstance(branch_config["name"], str):
msg = f"The 'name' field in realisation '{branch_id}' must be a string."
raise TypeError(msg)
# the "revision" key is optional
if "revision" in branch_config and not isinstance(
branch_config["revision"], int
):
msg = (
f"The 'revision' field in realisation '{branch_id}' must be an "
"integer."
)
raise TypeError(msg)
# the "patch" key is optional
if "patch" in branch_config and not isinstance(branch_config["patch"], dict):
msg = (
f"The 'patch' field in realisation '{branch_id}' must be a "
"dictionary that is compatible with the f90nml python package."
)
raise TypeError(msg)
# the "patch_remove" key is optional
if "patch_remove" in branch_config and not isinstance(
branch_config["patch_remove"], dict
):
msg = (
f"The 'patch_remove' field in realisation '{branch_id}' must be a "
"dictionary that is compatible with the f90nml python package."
)
raise TypeError(msg)
# the "build_script" key is optional
if "build_script" in branch_config and not isinstance(
branch_config["build_script"], str
):
msg = (
f"The 'build_script' field in realisation '{branch_id}' must be a "
"string."
)
raise TypeError(msg)


def read_config(config_path: Path) -> dict:
"""Reads the config file and returns a dictionary containing the configurations."""
with config_path.open("r", encoding="utf-8") as file:
config = yaml.safe_load(file)

check_config(config)
# Load the configuration file.
with open(Path(config_path), "r", encoding="utf-8") as file:
config = yaml.safe_load(file)

return config
# Validate and return.
validate_config(config)
return config
77 changes: 77 additions & 0 deletions benchcab/data/config-schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
project:
type: "string"

modules:
type: "list"
schema:
type: "string"

experiment:
type: "string"
allowed: [
"five-site-test",
"forty-two-site-test",
"AU-Tum",
"AU-How",
"FI-Hyy",
"US-Var",
"US-Whs"
]

science_configurations:
type: "list"
schema:
type: "dict"

realisations:
type: "list"
required: true
schema:
type: "dict"
schema:
path:
type: "string"
name:
type: "string"
required: false
build_script:
type: "string"
required: false
revision:
type: "string"
required: false
patch:
type: "dict"
required: false
patch_remove:
type: "dict"
required: false

fluxsite:
type: "dict"
required: false
schema:
multiprocessing:
type: "boolean"
required: false
pbs:
type: "dict"
schema:
ncpus:
type: "integer"
required: false
mem:
type: "string"
regex: "^[0-9]+(?i)(mb|gb)$"
required: false
walltime:
type: "string"
regex: "^[0-4][0-9]:[0-5][0-9]:[0-5][0-9]$"
required: false
storage:
type: list
required: false
schema:
type: "string"
required: false

19 changes: 19 additions & 0 deletions benchcab/data/test/config-invalid.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# A sample configuration that should fail validation.
project: w97

experiment: NON EXISTENT EXPERIMENT!!!

realisations: [
{
path: "trunk",
},
{
path: "branches/Users/ccc561/v3.0-YP-changes",
}
]

modules: [
intel-compiler/2021.1.1,
netcdf/4.7.4,
openmpi/4.1.0
]
Loading

0 comments on commit 92181f8

Please sign in to comment.