Skip to content

Commit

Permalink
Add requested changes from code review
Browse files Browse the repository at this point in the history
Co-authored-by: Claire Carouge <[email protected]>
  • Loading branch information
SeanBryan51 and ccarouge committed Aug 7, 2023
1 parent da2233a commit 03a724c
Show file tree
Hide file tree
Showing 10 changed files with 124 additions and 85 deletions.
58 changes: 34 additions & 24 deletions benchcab/bench_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,31 +43,41 @@ def check_config(config: dict):
"that is compatible with the f90nml python package."
)

# the "pbs" key is optional
if "pbs" in config:
if not isinstance(config["pbs"], dict):
raise TypeError("The 'pbs' key must be a dictionary.")
# the "ncpus" key is optional
if "ncpus" in config["pbs"] and not isinstance(config["pbs"]["ncpus"], int):
raise TypeError("The 'ncpus' key must be an integer.")
# the "mem" key is optional
if "mem" in config["pbs"] and not isinstance(config["pbs"]["mem"], str):
raise TypeError("The 'mem' key must be a string.")
# the "walltime" key is optional
if "walltime" in config["pbs"] and not isinstance(
config["pbs"]["walltime"], str
):
raise TypeError("The 'walltime' key must be a string.")
# the "storage" key is optional
if "storage" in config["pbs"]:
if not isinstance(config["pbs"]["storage"], list) or any(
not isinstance(val, str) for val in config["pbs"]["storage"]
# the "fluxsite" key is optional
if "fluxsite" in config:
if not isinstance(config["fluxsite"], dict):
raise TypeError("The 'fluxsite' key must be a dictionary.")
# the "pbs" key is optional
if "pbs" in config["fluxsite"]:
if not isinstance(config["fluxsite"]["pbs"], dict):
raise TypeError("The 'pbs' key must be a dictionary.")
# the "ncpus" key is optional
if "ncpus" in config["fluxsite"]["pbs"] and not isinstance(
config["fluxsite"]["pbs"]["ncpus"], int
):
raise TypeError("The 'storage' key must be a list of strings.")

# the "multiprocessing" key is optional
if "multiprocessing" in config and not isinstance(config["multiprocessing"], bool):
raise TypeError("The 'multiprocessing' key must be a boolean.")
raise TypeError("The 'ncpus' key must be an integer.")
# the "mem" key is optional
if "mem" in config["fluxsite"]["pbs"] and not isinstance(
config["fluxsite"]["pbs"]["mem"], str
):
raise TypeError("The 'mem' key must be a string.")
# the "walltime" key is optional
if "walltime" in config["fluxsite"]["pbs"] and not isinstance(
config["fluxsite"]["pbs"]["walltime"], str
):
raise TypeError("The 'walltime' key must be a string.")
# the "storage" key is optional
if "storage" in config["fluxsite"]["pbs"]:
if not isinstance(config["fluxsite"]["pbs"]["storage"], list) or any(
not isinstance(val, str)
for val in config["fluxsite"]["pbs"]["storage"]
):
raise TypeError("The 'storage' key must be a list of strings.")
# the "multiprocessing" key is optional
if "multiprocessing" in config["fluxsite"] and not isinstance(
config["fluxsite"]["multiprocessing"], bool
):
raise TypeError("The 'multiprocessing' key must be a boolean.")

valid_experiments = (
list(internal.MEORG_EXPERIMENTS) + internal.MEORG_EXPERIMENTS["five-site-test"]
Expand Down
19 changes: 13 additions & 6 deletions benchcab/benchcab.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,13 @@ def fluxsite_run_tasks(self):
"""Endpoint for `benchcab fluxsite-run-tasks`."""
tasks = self.tasks if self.tasks else self._initialise_tasks()
print("Running fluxsite tasks...")
multiprocess = self.config.get("multiprocessing", internal.DEFAULT_MULTIPROCESS)
try:
multiprocess = self.config["fluxsite"]["multiprocess"]
except KeyError:
multiprocess = internal.FLUXSITE_DEFAULT_MULTIPROCESS
if multiprocess:
ncpus = self.config.get("pbs", {}).get(

Check warning on line 219 in benchcab/benchcab.py

View check run for this annotation

Codecov / codecov/patch

benchcab/benchcab.py#L214-L219

Added lines #L214 - L219 were not covered by tests
"ncpus", internal.DEFAULT_PBS["ncpus"]
"ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"]
)
run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=self.args.verbose)

Check warning on line 222 in benchcab/benchcab.py

View check run for this annotation

Codecov / codecov/patch

benchcab/benchcab.py#L222

Added line #L222 was not covered by tests
else:
Expand All @@ -234,11 +237,15 @@ def fluxsite_bitwise_cmp(self):
comparisons = get_fluxsite_comparisons(tasks)

print("Running comparison tasks...")
multiprocess = self.config.get("multiprocessing", internal.DEFAULT_MULTIPROCESS)
try:
multiprocess = self.config["fluxsite"]["multiprocess"]
except KeyError:
multiprocess = internal.FLUXSITE_DEFAULT_MULTIPROCESS
if multiprocess:
ncpus = self.config.get("pbs", {}).get(
"ncpus", internal.DEFAULT_PBS["ncpus"]
)
try:
ncpus = self.config["fluxsite"]["pbs"]["ncpus"]
except KeyError:
ncpus = internal.FLUXSITE_DEFAULT_PBS["ncpus"]
run_comparisons_in_parallel(

Check warning on line 249 in benchcab/benchcab.py

View check run for this annotation

Codecov / codecov/patch

benchcab/benchcab.py#L240-L249

Added lines #L240 - L249 were not covered by tests
comparisons, n_processes=ncpus, verbose=self.args.verbose
)
Expand Down
2 changes: 1 addition & 1 deletion benchcab/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def run_comparisons(comparison_tasks: list[ComparisonTask], verbose=False) -> No

def run_comparisons_in_parallel(
comparison_tasks: list[ComparisonTask],
n_processes=internal.DEFAULT_PBS["ncpus"],
n_processes=internal.FLUXSITE_DEFAULT_PBS["ncpus"],
verbose=False,
) -> None:
"""Runs bitwise comparison tasks in parallel across multiple processes."""
Expand Down
9 changes: 7 additions & 2 deletions benchcab/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,14 @@

# Parameters for job script:
QSUB_FNAME = "benchmark_cable_qsub.sh"
DEFAULT_PBS: Any = {"ncpus": 18, "mem": "30GB", "walltime": "6:00:00", "storage": []}
FLUXSITE_DEFAULT_PBS: Any = {
"ncpus": 18,
"mem": "30GB",
"walltime": "6:00:00",
"storage": [],
}
MPI = False
DEFAULT_MULTIPROCESS = True
FLUXSITE_DEFAULT_MULTIPROCESS = True

# DIRECTORY PATHS/STRUCTURE:

Expand Down
2 changes: 1 addition & 1 deletion benchcab/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def run_tasks(tasks: list[Task], verbose=False):


def run_tasks_in_parallel(
tasks: list[Task], n_processes=internal.DEFAULT_PBS["ncpus"], verbose=False
tasks: list[Task], n_processes=internal.FLUXSITE_DEFAULT_PBS["ncpus"], verbose=False
):
"""Runs tasks in `tasks` in parallel across multiple processes."""

Expand Down
10 changes: 5 additions & 5 deletions benchcab/utils/pbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,20 @@ def render_job_script(
"""

if pbs_config is None:
pbs_config = internal.DEFAULT_PBS
pbs_config = internal.FLUXSITE_DEFAULT_PBS

module_load_lines = "\n".join(
f"module load {module_name}" for module_name in modules
)
verbose_flag = "-v" if verbose else ""
ncpus = pbs_config.get("ncpus", internal.DEFAULT_PBS["ncpus"])
mem = pbs_config.get("mem", internal.DEFAULT_PBS["mem"])
walltime = pbs_config.get("walltime", internal.DEFAULT_PBS["walltime"])
ncpus = pbs_config.get("ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"])
mem = pbs_config.get("mem", internal.FLUXSITE_DEFAULT_PBS["mem"])
walltime = pbs_config.get("walltime", internal.FLUXSITE_DEFAULT_PBS["walltime"])
storage_flags = [
"gdata/ks32",
"gdata/hh5",
f"gdata/{project}",
*pbs_config.get("storage", internal.DEFAULT_PBS["storage"]),
*pbs_config.get("storage", internal.FLUXSITE_DEFAULT_PBS["storage"]),
]
return f"""#!/bin/bash
#PBS -l wd
Expand Down
34 changes: 19 additions & 15 deletions docs/user_guide/config_options.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,44 +43,48 @@ The different running modes of `benchcab` are solely dependent on the options us

: NCI modules to use for compiling CABLE

### `pbs`
### `fluxsite`
: Contains settings specific to fluxsite tests.

#### `pbs`

: Contains settings specific to the PBS scheduler at NCI for the PBS script running the CABLE simulations at FLUXNET sites and the bitwise comparison for these simulations.

#### `ncpus`
##### `ncpus`

: The number of CPU cores to allocate for the PBS job, i.e. the `-l ncpus=<4>` PBS flag (see [PBS Directives Explained][nci-pbs-directives]).
: This key is **optional** and can be omitted from the config file. By default `ncpus` is set to `18`.

#### `mem`
##### `mem`

: The total memory limit for the PBS job, i.e. the `-l mem=<10GB>` PBS flag (see [PBS Directives Explained][nci-pbs-directives]).
: This key is **optional** and can be omitted from the config file. By default `mem` is set to `30GB`.

#### `walltime`
##### `walltime`

: The wall clock time limit for the PBS job, i.e. `-l walltime=<HH:MM:SS>` PBS flag (see [PBS Directives Explained][nci-pbs-directives]).
: This key is **optional** and can be omitted from the config file. By default `walltime` is set to `6:00:00`.

#### `storage`
##### `storage`

: A list of extra storage flags required for the PBS job, i.e. `-l storage=<scratch/a00>` (see [PBS Directives Explained][nci-pbs-directives]).
: This key is **optional** and can be omitted from the config file. By default `storage` is set to `[]`.

Example:
```yaml
pbs:
ncpus: 16
mem: 64GB
walltime: 00:01:00
storage: [scratch/a00, gdata/xy11]
```
### `multiprocessing`
#### `multiprocessing`

: Enables or disables multiprocessing for executing embarrassingly parallel tasks.
: This key is **optional** and can be omitted from the config file. By default `multiprocessing` is set to `True`.

Example:
```yaml
fluxsite:
pbs:
ncpus: 16
mem: 64GB
walltime: 00:01:00
storage: [scratch/a00, gdata/xy11]
multiprocessing: True
```
## Simulations options
Expand Down
14 changes: 8 additions & 6 deletions tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,15 @@ def get_mock_config() -> dict:
}
},
],
"pbs": {
"ncpus": 16,
"mem": "64G",
"walltime": "01:00:00",
"storage": ["gdata/foo123"],
"fluxsite": {
"pbs": {
"ncpus": 16,
"mem": "64G",
"walltime": "01:00:00",
"storage": ["gdata/foo123"],
},
"multiprocessing": True,
},
"multiprocessing": True,
}
return config

Expand Down
37 changes: 24 additions & 13 deletions tests/test_bench_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,34 +55,39 @@ def test_check_config():
config.pop("science_configurations")
check_config(config)

# Success case: test config without fluxsite key is valid
config = get_mock_config()
config.pop("fluxsite")
check_config(config)

# Success case: test config without multiprocessing key is valid
config = get_mock_config()
config.pop("multiprocessing")
config["fluxsite"].pop("multiprocessing")
check_config(config)

# Success case: test config without pbs key is valid
config = get_mock_config()
config.pop("pbs")
config["fluxsite"].pop("pbs")
check_config(config)

# Success case: test config without ncpus key is valid
config = get_mock_config()
config["pbs"].pop("ncpus")
config["fluxsite"]["pbs"].pop("ncpus")
check_config(config)

# Success case: test config without mem key is valid
config = get_mock_config()
config["pbs"].pop("mem")
config["fluxsite"]["pbs"].pop("mem")
check_config(config)

# Success case: test config without walltime key is valid
config = get_mock_config()
config["pbs"].pop("walltime")
config["fluxsite"]["pbs"].pop("walltime")
check_config(config)

# Success case: test config without storage key is valid
config = get_mock_config()
config["pbs"].pop("storage")
config["fluxsite"]["pbs"].pop("storage")
check_config(config)

# Failure case: test missing required keys raises an exception
Expand Down Expand Up @@ -237,46 +242,52 @@ def test_check_config():
config["science_configurations"] = [r"cable_user%GS_SWITCH = 'medlyn'"]
check_config(config)

# Failure case: type of config["fluxsite"] is not a dict
with pytest.raises(TypeError, match="The 'fluxsite' key must be a dictionary."):
config = get_mock_config()
config["fluxsite"] = ["ncpus: 16\nmem: 64GB\n"]
check_config(config)

# Failure case: type of config["pbs"] is not a dict
with pytest.raises(TypeError, match="The 'pbs' key must be a dictionary."):
config = get_mock_config()
config["pbs"] = "-l ncpus=16"
config["fluxsite"]["pbs"] = "-l ncpus=16"
check_config(config)

# Failure case: type of config["pbs"]["ncpus"] is not an int
with pytest.raises(TypeError, match="The 'ncpus' key must be an integer."):
config = get_mock_config()
config["pbs"]["ncpus"] = "16"
config["fluxsite"]["pbs"]["ncpus"] = "16"
check_config(config)

# Failure case: type of config["pbs"]["mem"] is not a string
with pytest.raises(TypeError, match="The 'mem' key must be a string."):
config = get_mock_config()
config["pbs"]["mem"] = 64
config["fluxsite"]["pbs"]["mem"] = 64
check_config(config)

# Failure case: type of config["pbs"]["walltime"] is not a string
with pytest.raises(TypeError, match="The 'walltime' key must be a string."):
config = get_mock_config()
config["pbs"]["walltime"] = 60
config["fluxsite"]["pbs"]["walltime"] = 60
check_config(config)

# Failure case: type of config["pbs"]["storage"] is not a list
with pytest.raises(TypeError, match="The 'storage' key must be a list of strings."):
config = get_mock_config()
config["pbs"]["storage"] = "gdata/foo+gdata/bar"
config["fluxsite"]["pbs"]["storage"] = "gdata/foo+gdata/bar"
check_config(config)

# Failure case: type of config["pbs"]["storage"] is not a list of strings
with pytest.raises(TypeError, match="The 'storage' key must be a list of strings."):
config = get_mock_config()
config["pbs"]["storage"] = [1, 2, 3]
config["fluxsite"]["pbs"]["storage"] = [1, 2, 3]
check_config(config)

# Failure case: type of config["multiprocessing"] is not a bool
with pytest.raises(TypeError, match="The 'multiprocessing' key must be a boolean."):
config = get_mock_config()
config["multiprocessing"] = 1
config["fluxsite"]["multiprocessing"] = 1
check_config(config)


Expand Down
Loading

0 comments on commit 03a724c

Please sign in to comment.