forked from mdekauwe/CABLE_benchmarking
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #185 from CABLE-LSM/143-configyaml-input-validation
- Loading branch information
Showing
11 changed files
with
350 additions
and
479 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,4 @@ dependencies: | |
- pytest-cov | ||
- pyyaml | ||
- flatdict | ||
- cerberus>=1.3.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,3 +26,4 @@ requirements: | |
- PyYAML | ||
- f90nml | ||
- flatdict | ||
- cerberus >=1.3.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,170 +1,93 @@ | ||
"""A module containing all *_config() functions.""" | ||
|
||
from pathlib import Path | ||
|
||
import yaml | ||
|
||
from benchcab import internal | ||
from cerberus import Validator | ||
import benchcab.utils as bu | ||
|
||
|
||
class ConfigValidationException(Exception): | ||
|
||
def __init__(self, validator: Validator): | ||
"""Config validation exception. | ||
Parameters | ||
---------- | ||
validator: cerberus.Validator | ||
A validation object that has been used and has the errors attribute. | ||
""" | ||
|
||
# Nicely format the errors. | ||
errors = [f'{k} = {v}' for k, v in validator.errors.items()] | ||
|
||
# Assemble the error message and | ||
msg = '\n\nThe following errors were raised when validating the config file.\n' | ||
msg += '\n'.join(errors) + '\n' | ||
|
||
def check_config(config: dict): | ||
"""Performs input validation on config file. | ||
# Raise to super. | ||
super().__init__(msg) | ||
|
||
If the config is invalid, an exception is raised. Otherwise, do nothing. | ||
|
||
def validate_config(config: dict) -> bool: | ||
"""Validate the configuration dictionary. | ||
Parameters | ||
---------- | ||
config : dict | ||
Dictionary of configuration loaded from the yaml file. | ||
Returns | ||
------- | ||
bool | ||
True if valid, exception raised otherwise. | ||
Raises | ||
------ | ||
ConfigValidationException | ||
Raised when the configuration file fails validation. | ||
""" | ||
|
||
# Load the schema | ||
schema = bu.load_package_data('config-schema.yml') | ||
|
||
# Create a validator | ||
v = Validator(schema) | ||
|
||
# Validate | ||
is_valid = v.validate(config) | ||
|
||
# Valid | ||
if is_valid: | ||
return True | ||
|
||
# Invalid | ||
raise ConfigValidationException(v) | ||
|
||
|
||
def read_config(config_path: str) -> dict: | ||
"""Reads the config file and returns a dictionary containing the configurations. | ||
Parameters | ||
---------- | ||
config_path : str | ||
Path to the configuration file. | ||
Returns | ||
------- | ||
dict | ||
Configuration dict. | ||
Raises | ||
------ | ||
ConfigValidationError | ||
Raised when the configuration file fails validation. | ||
""" | ||
if any(key not in config for key in internal.CONFIG_REQUIRED_KEYS): | ||
raise ValueError( | ||
"Keys are missing from the config file: " | ||
+ ", ".join( | ||
key for key in internal.CONFIG_REQUIRED_KEYS if key not in config | ||
) | ||
) | ||
|
||
if not isinstance(config["project"], str): | ||
msg = "The 'project' key must be a string." | ||
raise TypeError(msg) | ||
|
||
if not isinstance(config["modules"], list): | ||
msg = "The 'modules' key must be a list." | ||
raise TypeError(msg) | ||
|
||
if not isinstance(config["experiment"], str): | ||
msg = "The 'experiment' key must be a string." | ||
raise TypeError(msg) | ||
|
||
# the "science_configurations" key is optional | ||
if "science_configurations" in config: | ||
if not isinstance(config["science_configurations"], list): | ||
msg = "The 'science_configurations' key must be a list." | ||
raise TypeError(msg) | ||
if config["science_configurations"] == []: | ||
msg = "The 'science_configurations' key cannot be empty." | ||
raise ValueError(msg) | ||
if not all( | ||
isinstance(value, dict) for value in config["science_configurations"] | ||
): | ||
msg = ( | ||
"Science config settings must be specified using a dictionary " | ||
"that is compatible with the f90nml python package." | ||
) | ||
raise TypeError(msg) | ||
|
||
# the "fluxsite" key is optional | ||
if "fluxsite" in config: | ||
if not isinstance(config["fluxsite"], dict): | ||
msg = "The 'fluxsite' key must be a dictionary." | ||
raise TypeError(msg) | ||
# the "pbs" key is optional | ||
if "pbs" in config["fluxsite"]: | ||
if not isinstance(config["fluxsite"]["pbs"], dict): | ||
msg = "The 'pbs' key must be a dictionary." | ||
raise TypeError(msg) | ||
# the "ncpus" key is optional | ||
if "ncpus" in config["fluxsite"]["pbs"] and not isinstance( | ||
config["fluxsite"]["pbs"]["ncpus"], int | ||
): | ||
msg = "The 'ncpus' key must be an integer." | ||
raise TypeError(msg) | ||
# the "mem" key is optional | ||
if "mem" in config["fluxsite"]["pbs"] and not isinstance( | ||
config["fluxsite"]["pbs"]["mem"], str | ||
): | ||
msg = "The 'mem' key must be a string." | ||
raise TypeError(msg) | ||
# the "walltime" key is optional | ||
if "walltime" in config["fluxsite"]["pbs"] and not isinstance( | ||
config["fluxsite"]["pbs"]["walltime"], str | ||
): | ||
msg = "The 'walltime' key must be a string." | ||
raise TypeError(msg) | ||
# the "storage" key is optional | ||
if "storage" in config["fluxsite"]["pbs"]: | ||
if not isinstance(config["fluxsite"]["pbs"]["storage"], list) or any( | ||
not isinstance(val, str) | ||
for val in config["fluxsite"]["pbs"]["storage"] | ||
): | ||
msg = "The 'storage' key must be a list of strings." | ||
raise TypeError(msg) | ||
# the "multiprocessing" key is optional | ||
if "multiprocessing" in config["fluxsite"] and not isinstance( | ||
config["fluxsite"]["multiprocessing"], bool | ||
): | ||
msg = "The 'multiprocessing' key must be a boolean." | ||
raise TypeError(msg) | ||
|
||
valid_experiments = ( | ||
list(internal.MEORG_EXPERIMENTS) + internal.MEORG_EXPERIMENTS["five-site-test"] | ||
) | ||
if config["experiment"] not in valid_experiments: | ||
msg = ( | ||
"The 'experiment' key is invalid.\n" | ||
"Valid experiments are: " + ", ".join(valid_experiments) | ||
) | ||
raise ValueError(msg) | ||
|
||
if not isinstance(config["realisations"], list): | ||
msg = "The 'realisations' key must be a list." | ||
raise TypeError(msg) | ||
|
||
if config["realisations"] == []: | ||
msg = "The 'realisations' key cannot be empty." | ||
raise ValueError(msg) | ||
|
||
for branch_id, branch_config in enumerate(config["realisations"]): | ||
if not isinstance(branch_config, dict): | ||
msg = f"Realisation '{branch_id}' must be a dictionary object." | ||
raise TypeError(msg) | ||
if "path" not in branch_config: | ||
msg = f"Realisation '{branch_id}' must specify the `path` field." | ||
raise ValueError(msg) | ||
if not isinstance(branch_config["path"], str): | ||
msg = f"The 'path' field in realisation '{branch_id}' must be a string." | ||
raise TypeError(msg) | ||
# the "name" key is optional | ||
if "name" in branch_config and not isinstance(branch_config["name"], str): | ||
msg = f"The 'name' field in realisation '{branch_id}' must be a string." | ||
raise TypeError(msg) | ||
# the "revision" key is optional | ||
if "revision" in branch_config and not isinstance( | ||
branch_config["revision"], int | ||
): | ||
msg = ( | ||
f"The 'revision' field in realisation '{branch_id}' must be an " | ||
"integer." | ||
) | ||
raise TypeError(msg) | ||
# the "patch" key is optional | ||
if "patch" in branch_config and not isinstance(branch_config["patch"], dict): | ||
msg = ( | ||
f"The 'patch' field in realisation '{branch_id}' must be a " | ||
"dictionary that is compatible with the f90nml python package." | ||
) | ||
raise TypeError(msg) | ||
# the "patch_remove" key is optional | ||
if "patch_remove" in branch_config and not isinstance( | ||
branch_config["patch_remove"], dict | ||
): | ||
msg = ( | ||
f"The 'patch_remove' field in realisation '{branch_id}' must be a " | ||
"dictionary that is compatible with the f90nml python package." | ||
) | ||
raise TypeError(msg) | ||
# the "build_script" key is optional | ||
if "build_script" in branch_config and not isinstance( | ||
branch_config["build_script"], str | ||
): | ||
msg = ( | ||
f"The 'build_script' field in realisation '{branch_id}' must be a " | ||
"string." | ||
) | ||
raise TypeError(msg) | ||
|
||
|
||
def read_config(config_path: Path) -> dict: | ||
"""Reads the config file and returns a dictionary containing the configurations.""" | ||
with config_path.open("r", encoding="utf-8") as file: | ||
config = yaml.safe_load(file) | ||
|
||
check_config(config) | ||
# Load the configuration file. | ||
with open(Path(config_path), "r", encoding="utf-8") as file: | ||
config = yaml.safe_load(file) | ||
|
||
return config | ||
# Validate and return. | ||
validate_config(config) | ||
return config |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
project: | ||
type: "string" | ||
|
||
modules: | ||
type: "list" | ||
schema: | ||
type: "string" | ||
|
||
experiment: | ||
type: "string" | ||
allowed: [ | ||
"five-site-test", | ||
"forty-two-site-test", | ||
"AU-Tum", | ||
"AU-How", | ||
"FI-Hyy", | ||
"US-Var", | ||
"US-Whs" | ||
] | ||
|
||
science_configurations: | ||
type: "list" | ||
schema: | ||
type: "dict" | ||
|
||
realisations: | ||
type: "list" | ||
required: true | ||
schema: | ||
type: "dict" | ||
schema: | ||
path: | ||
type: "string" | ||
name: | ||
type: "string" | ||
required: false | ||
build_script: | ||
type: "string" | ||
required: false | ||
revision: | ||
type: "string" | ||
required: false | ||
patch: | ||
type: "dict" | ||
required: false | ||
patch_remove: | ||
type: "dict" | ||
required: false | ||
|
||
fluxsite: | ||
type: "dict" | ||
required: false | ||
schema: | ||
multiprocessing: | ||
type: "boolean" | ||
required: false | ||
pbs: | ||
type: "dict" | ||
schema: | ||
ncpus: | ||
type: "integer" | ||
required: false | ||
mem: | ||
type: "string" | ||
regex: "^[0-9]+(?i)(mb|gb)$" | ||
required: false | ||
walltime: | ||
type: "string" | ||
regex: "^[0-4][0-9]:[0-5][0-9]:[0-5][0-9]$" | ||
required: false | ||
storage: | ||
type: list | ||
required: false | ||
schema: | ||
type: "string" | ||
required: false | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# A sample configuration that should fail validation. | ||
project: w97 | ||
|
||
experiment: NON EXISTENT EXPERIMENT!!! | ||
|
||
realisations: [ | ||
{ | ||
path: "trunk", | ||
}, | ||
{ | ||
path: "branches/Users/ccc561/v3.0-YP-changes", | ||
} | ||
] | ||
|
||
modules: [ | ||
intel-compiler/2021.1.1, | ||
netcdf/4.7.4, | ||
openmpi/4.1.0 | ||
] |
Oops, something went wrong.