From 78ad39dddd555cf2ec9e5ac9d759064dcde760ce Mon Sep 17 00:00:00 2001 From: Sean Bryan Date: Fri, 28 Jul 2023 13:20:28 +1000 Subject: [PATCH 01/10] Promote PBS constants to optional parameters This change promotes PBS related constants to optional parameters in the configuration file so that PBS flags can be set at runtime. This is useful in running multiple benchcab instances with different job parameters such as memory and the number of CPUs. This will also allow us to easily find an optimal number of CPUs to use to maximise performance. This change also adds the ability to switch on and off multiprocessing at runtime via an optional parameter in the config file. Fixes #104 --- benchcab/bench_config.py | 26 +++++++++++ benchcab/benchcab.py | 20 ++++++-- benchcab/comparison.py | 6 ++- benchcab/internal.py | 7 ++- benchcab/task.py | 6 ++- benchcab/utils/pbs.py | 23 ++++++++-- docs/user_guide/config_options.md | 42 ++++++++++++++++- tests/common.py | 7 +++ tests/test_bench_config.py | 72 +++++++++++++++++++++++++++++ tests/test_pbs.py | 76 +++++++++++++++++++++++-------- 10 files changed, 246 insertions(+), 39 deletions(-) diff --git a/benchcab/bench_config.py b/benchcab/bench_config.py index d902ee2a..a85852e7 100644 --- a/benchcab/bench_config.py +++ b/benchcab/bench_config.py @@ -43,6 +43,32 @@ def check_config(config: dict): "that is compatible with the f90nml python package." ) + # the "pbs" key is optional + if "pbs" in config: + if not isinstance(config["pbs"], dict): + raise TypeError("The 'pbs' key must be a dictionary.") + # the "ncpus" key is optional + if "ncpus" in config["pbs"] and not isinstance(config["pbs"]["ncpus"], int): + raise TypeError("The 'ncpus' key must be an integer.") + # the "mem" key is optional + if "mem" in config["pbs"] and not isinstance(config["pbs"]["mem"], str): + raise TypeError("The 'mem' key must be a string.") + # the "walltime" key is optional + if "walltime" in config["pbs"] and not isinstance( + config["pbs"]["walltime"], str + ): + raise TypeError("The 'walltime' key must be a string.") + # the "storage" key is optional + if "storage" in config["pbs"]: + if not isinstance(config["pbs"]["storage"], list) or any( + not isinstance(val, str) for val in config["pbs"]["storage"] + ): + raise TypeError("The 'storage' key must be a list of strings.") + + # the "multiprocessing" key is optional + if "multiprocessing" in config and not isinstance(config["multiprocessing"], bool): + raise TypeError("The 'multiprocessing' key must be a boolean.") + valid_experiments = ( list(internal.MEORG_EXPERIMENTS) + internal.MEORG_EXPERIMENTS["five-site-test"] ) diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 6b04fc11..7bb47d40 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -132,10 +132,10 @@ def fluxsite_submit_job(self) -> None: project=self.config["project"], config_path=self.args.config, modules=self.config["modules"], - storage_flags=[], # TODO(Sean) add storage flags option to config verbose=self.args.verbose, skip_bitwise_cmp="fluxsite-bitwise-cmp" in self.args.skip, benchcab_path=str(self.benchcab_exe_path), + pbs_config=self.config.get("pbs"), ) file.write(contents) @@ -211,8 +211,12 @@ def fluxsite_run_tasks(self): """Endpoint for `benchcab fluxsite-run-tasks`.""" tasks = self.tasks if self.tasks else self._initialise_tasks() print("Running fluxsite tasks...") - if internal.MULTIPROCESS: - run_tasks_in_parallel(tasks, verbose=self.args.verbose) + multiprocess = self.config.get("multiprocessing", internal.DEFAULT_MULTIPROCESS) + if multiprocess: + ncpus = self.config.get("pbs", {}).get( + "ncpus", internal.DEFAULT_PBS["ncpus"] + ) + run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=self.args.verbose) else: run_tasks(tasks, verbose=self.args.verbose) print("Successfully ran fluxsite tasks") @@ -230,8 +234,14 @@ def fluxsite_bitwise_cmp(self): comparisons = get_fluxsite_comparisons(tasks) print("Running comparison tasks...") - if internal.MULTIPROCESS: - run_comparisons_in_parallel(comparisons, verbose=self.args.verbose) + multiprocess = self.config.get("multiprocessing", internal.DEFAULT_MULTIPROCESS) + if multiprocess: + ncpus = self.config.get("pbs", {}).get( + "ncpus", internal.DEFAULT_PBS["ncpus"] + ) + run_comparisons_in_parallel( + comparisons, n_processes=ncpus, verbose=self.args.verbose + ) else: run_comparisons(comparisons, verbose=self.args.verbose) print("Successfully ran comparison tasks") diff --git a/benchcab/comparison.py b/benchcab/comparison.py index 4a2b08f8..db89f545 100644 --- a/benchcab/comparison.py +++ b/benchcab/comparison.py @@ -59,7 +59,9 @@ def run_comparisons(comparison_tasks: list[ComparisonTask], verbose=False) -> No def run_comparisons_in_parallel( - comparison_tasks: list[ComparisonTask], verbose=False + comparison_tasks: list[ComparisonTask], + n_processes=internal.DEFAULT_PBS["ncpus"], + verbose=False, ) -> None: """Runs bitwise comparison tasks in parallel across multiple processes.""" @@ -68,7 +70,7 @@ def run_comparisons_in_parallel( task_queue.put(task) processes = [] - for _ in range(internal.NCPUS): + for _ in range(n_processes): proc = multiprocessing.Process( target=worker_comparison, args=[task_queue, verbose] ) diff --git a/benchcab/internal.py b/benchcab/internal.py index f24e061d..816da2ad 100644 --- a/benchcab/internal.py +++ b/benchcab/internal.py @@ -2,6 +2,7 @@ import os from pathlib import Path +from typing import Any _, NODENAME, _, _, _ = os.uname() @@ -10,11 +11,9 @@ # Parameters for job script: QSUB_FNAME = "benchmark_cable_qsub.sh" -NCPUS = 18 -MEM = "30GB" -WALL_TIME = "6:00:00" +DEFAULT_PBS: Any = {"ncpus": 18, "mem": "30GB", "walltime": "6:00:00", "storage": []} MPI = False -MULTIPROCESS = True +DEFAULT_MULTIPROCESS = True # DIRECTORY PATHS/STRUCTURE: diff --git a/benchcab/task.py b/benchcab/task.py index b91d3731..07c615b4 100644 --- a/benchcab/task.py +++ b/benchcab/task.py @@ -334,7 +334,9 @@ def run_tasks(tasks: list[Task], verbose=False): task.run(verbose=verbose) -def run_tasks_in_parallel(tasks: list[Task], verbose=False): +def run_tasks_in_parallel( + tasks: list[Task], n_processes=internal.DEFAULT_PBS["ncpus"], verbose=False +): """Runs tasks in `tasks` in parallel across multiple processes.""" task_queue: multiprocessing.Queue = multiprocessing.Queue() @@ -342,7 +344,7 @@ def run_tasks_in_parallel(tasks: list[Task], verbose=False): task_queue.put(task) processes = [] - for _ in range(internal.NCPUS): + for _ in range(n_processes): proc = multiprocessing.Process(target=worker_run, args=[task_queue, verbose]) proc.start() processes.append(proc) diff --git a/benchcab/utils/pbs.py b/benchcab/utils/pbs.py index 607efe78..289168d4 100644 --- a/benchcab/utils/pbs.py +++ b/benchcab/utils/pbs.py @@ -1,5 +1,7 @@ """Contains helper functions for manipulating PBS job scripts.""" +from typing import Optional + from benchcab import internal @@ -7,10 +9,10 @@ def render_job_script( project: str, config_path: str, modules: list, - storage_flags: list, benchcab_path: str, verbose=False, skip_bitwise_cmp=False, + pbs_config: Optional[dict] = None, ) -> str: """Returns the text for a PBS job script that executes all computationally expensive commands. @@ -18,16 +20,27 @@ def render_job_script( between model output files. """ + if pbs_config is None: + pbs_config = internal.DEFAULT_PBS + module_load_lines = "\n".join( f"module load {module_name}" for module_name in modules ) verbose_flag = "-v" if verbose else "" - storage_flags = ["gdata/ks32", "gdata/hh5", f"gdata/{project}", *storage_flags] + ncpus = pbs_config.get("ncpus", internal.DEFAULT_PBS["ncpus"]) + mem = pbs_config.get("mem", internal.DEFAULT_PBS["mem"]) + walltime = pbs_config.get("walltime", internal.DEFAULT_PBS["walltime"]) + storage_flags = [ + "gdata/ks32", + "gdata/hh5", + f"gdata/{project}", + *pbs_config.get("storage", internal.DEFAULT_PBS["storage"]), + ] return f"""#!/bin/bash #PBS -l wd -#PBS -l ncpus={internal.NCPUS} -#PBS -l mem={internal.MEM} -#PBS -l walltime={internal.WALL_TIME} +#PBS -l ncpus={ncpus} +#PBS -l mem={mem} +#PBS -l walltime={walltime} #PBS -q normal #PBS -P {project} #PBS -j oe diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index 0bfaeb6f..70fb0ecb 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -43,6 +43,45 @@ The different running modes of `benchcab` are solely dependent on the options us : NCI modules to use for compiling CABLE +### `pbs` + +: Contains sub-keys which specify settings specific to the PBS scheduler at NCI. + +#### `ncpus` + +: The number of CPU cores to allocate for the main job script, i.e. the `-l ncpus=<4>` PBS flag (see [PBS Directives Explained][nci-pbs-directives]). +: This key is **optional** and can be omitted from the config file. By default `ncpus` is set to `18`. + +#### `mem` + +: The total memory limit for the main job script, i.e. the `-l mem=<10GB>` PBS flag (see [PBS Directives Explained][nci-pbs-directives]). +: This key is **optional** and can be omitted from the config file. By default `mem` is set to `30GB`. + +#### `walltime` + +: The wall clock time limit for the main job script, i.e. `-l walltime=` PBS flag (see [PBS Directives Explained][nci-pbs-directives]). +: This key is **optional** and can be omitted from the config file. By default `walltime` is set to `6:00:00`. + +#### `storage` + +: A list of extra storage flags required for the main job script, i.e. `-l storage=` (see [PBS Directives Explained][nci-pbs-directives]). +: This key is **optional** and can be omitted from the config file. By default `storage` is set to `[]`. + +Example: +```yaml +pbs: + ncpus: 16 + mem: 64GB + walltime: 00:01:00 + storage: [scratch/a00, gdata/xy11] +``` + +### `multiprocessing` + +: Enables or disables multiprocessing for executing embarrassingly parallel tasks. +: This key is **optional** and can be omitted from the config file. By default `multiprocessing` is set to `True`. + + ## Simulations options ### `realisations` @@ -138,4 +177,5 @@ science_configurations: [ [forty-two-me]: https://modelevaluation.org/experiment/display/urTKSXEsojdvEPwdR [five-me]: https://modelevaluation.org/experiment/display/xNZx2hSvn4PMKAa9R [f90nml-github]: https://github.com/marshallward/f90nml -[environment-modules]: https://modules.sourceforge.net/ \ No newline at end of file +[environment-modules]: https://modules.sourceforge.net/ +[nci-pbs-directives]: https://opus.nci.org.au/display/Help/PBS+Directives+Explained \ No newline at end of file diff --git a/tests/common.py b/tests/common.py index 73bc5a61..66ff7a39 100644 --- a/tests/common.py +++ b/tests/common.py @@ -55,6 +55,13 @@ def get_mock_config() -> dict: } }, ], + "pbs": { + "ncpus": 16, + "mem": "64G", + "walltime": "01:00:00", + "storage": ["gdata/foo123"], + }, + "multiprocessing": True, } return config diff --git a/tests/test_bench_config.py b/tests/test_bench_config.py index 505e7103..a51b94ee 100644 --- a/tests/test_bench_config.py +++ b/tests/test_bench_config.py @@ -55,6 +55,36 @@ def test_check_config(): config.pop("science_configurations") check_config(config) + # Success case: test config without multiprocessing key is valid + config = get_mock_config() + config.pop("multiprocessing") + check_config(config) + + # Success case: test config without pbs key is valid + config = get_mock_config() + config.pop("pbs") + check_config(config) + + # Success case: test config without ncpus key is valid + config = get_mock_config() + config["pbs"].pop("ncpus") + check_config(config) + + # Success case: test config without mem key is valid + config = get_mock_config() + config["pbs"].pop("mem") + check_config(config) + + # Success case: test config without walltime key is valid + config = get_mock_config() + config["pbs"].pop("walltime") + check_config(config) + + # Success case: test config without storage key is valid + config = get_mock_config() + config["pbs"].pop("storage") + check_config(config) + # Failure case: test missing required keys raises an exception with pytest.raises( ValueError, @@ -207,6 +237,48 @@ def test_check_config(): config["science_configurations"] = [r"cable_user%GS_SWITCH = 'medlyn'"] check_config(config) + # Failure case: type of config["pbs"] is not a dict + with pytest.raises(TypeError, match="The 'pbs' key must be a dictionary."): + config = get_mock_config() + config["pbs"] = "-l ncpus=16" + check_config(config) + + # Failure case: type of config["pbs"]["ncpus"] is not an int + with pytest.raises(TypeError, match="The 'ncpus' key must be an integer."): + config = get_mock_config() + config["pbs"]["ncpus"] = "16" + check_config(config) + + # Failure case: type of config["pbs"]["mem"] is not a string + with pytest.raises(TypeError, match="The 'mem' key must be a string."): + config = get_mock_config() + config["pbs"]["mem"] = 64 + check_config(config) + + # Failure case: type of config["pbs"]["walltime"] is not a string + with pytest.raises(TypeError, match="The 'walltime' key must be a string."): + config = get_mock_config() + config["pbs"]["walltime"] = 60 + check_config(config) + + # Failure case: type of config["pbs"]["storage"] is not a list + with pytest.raises(TypeError, match="The 'storage' key must be a list of strings."): + config = get_mock_config() + config["pbs"]["storage"] = "gdata/foo+gdata/bar" + check_config(config) + + # Failure case: type of config["pbs"]["storage"] is not a list of strings + with pytest.raises(TypeError, match="The 'storage' key must be a list of strings."): + config = get_mock_config() + config["pbs"]["storage"] = [1, 2, 3] + check_config(config) + + # Failure case: type of config["multiprocessing"] is not a bool + with pytest.raises(TypeError, match="The 'multiprocessing' key must be a boolean."): + config = get_mock_config() + config["multiprocessing"] = 1 + check_config(config) + def test_read_config(): """Tests for `read_config()`.""" diff --git a/tests/test_pbs.py b/tests/test_pbs.py index c3561d0f..2c5e5e2d 100644 --- a/tests/test_pbs.py +++ b/tests/test_pbs.py @@ -12,19 +12,18 @@ def test_render_job_script(): project="tm70", config_path="/path/to/config.yaml", modules=["foo", "bar", "baz"], - storage_flags=["scratch/tm70"], benchcab_path="/absolute/path/to/benchcab", ) == ( f"""#!/bin/bash #PBS -l wd -#PBS -l ncpus={internal.NCPUS} -#PBS -l mem={internal.MEM} -#PBS -l walltime={internal.WALL_TIME} +#PBS -l ncpus={internal.DEFAULT_PBS["ncpus"]} +#PBS -l mem={internal.DEFAULT_PBS["mem"]} +#PBS -l walltime={internal.DEFAULT_PBS["walltime"]} #PBS -q normal #PBS -P tm70 #PBS -j oe #PBS -m e -#PBS -l storage=gdata/ks32+gdata/hh5+gdata/tm70+scratch/tm70 +#PBS -l storage=gdata/ks32+gdata/hh5+gdata/tm70 module purge module load foo @@ -50,20 +49,19 @@ def test_render_job_script(): project="tm70", config_path="/path/to/config.yaml", modules=["foo", "bar", "baz"], - storage_flags=["scratch/tm70"], verbose=True, benchcab_path="/absolute/path/to/benchcab", ) == ( f"""#!/bin/bash #PBS -l wd -#PBS -l ncpus={internal.NCPUS} -#PBS -l mem={internal.MEM} -#PBS -l walltime={internal.WALL_TIME} +#PBS -l ncpus={internal.DEFAULT_PBS["ncpus"]} +#PBS -l mem={internal.DEFAULT_PBS["mem"]} +#PBS -l walltime={internal.DEFAULT_PBS["walltime"]} #PBS -q normal #PBS -P tm70 #PBS -j oe #PBS -m e -#PBS -l storage=gdata/ks32+gdata/hh5+gdata/tm70+scratch/tm70 +#PBS -l storage=gdata/ks32+gdata/hh5+gdata/tm70 module purge module load foo @@ -89,20 +87,58 @@ def test_render_job_script(): project="tm70", config_path="/path/to/config.yaml", modules=["foo", "bar", "baz"], - storage_flags=["scratch/tm70"], skip_bitwise_cmp=True, benchcab_path="/absolute/path/to/benchcab", ) == ( f"""#!/bin/bash #PBS -l wd -#PBS -l ncpus={internal.NCPUS} -#PBS -l mem={internal.MEM} -#PBS -l walltime={internal.WALL_TIME} +#PBS -l ncpus={internal.DEFAULT_PBS["ncpus"]} +#PBS -l mem={internal.DEFAULT_PBS["mem"]} +#PBS -l walltime={internal.DEFAULT_PBS["walltime"]} +#PBS -q normal +#PBS -P tm70 +#PBS -j oe +#PBS -m e +#PBS -l storage=gdata/ks32+gdata/hh5+gdata/tm70 + +module purge +module load foo +module load bar +module load baz + +/absolute/path/to/benchcab fluxsite-run-tasks --config=/path/to/config.yaml +if [ $? -ne 0 ]; then + echo 'Error: benchcab fluxsite-run-tasks failed. Exiting...' + exit 1 +fi + +""" + ) + + # Success case: specify parameters in pbs_config + assert render_job_script( + project="tm70", + config_path="/path/to/config.yaml", + modules=["foo", "bar", "baz"], + skip_bitwise_cmp=True, + benchcab_path="/absolute/path/to/benchcab", + pbs_config={ + "ncpus": 4, + "mem": "16GB", + "walltime": "00:00:30", + "storage": ["gdata/foo"], + }, + ) == ( + """#!/bin/bash +#PBS -l wd +#PBS -l ncpus=4 +#PBS -l mem=16GB +#PBS -l walltime=00:00:30 #PBS -q normal #PBS -P tm70 #PBS -j oe #PBS -m e -#PBS -l storage=gdata/ks32+gdata/hh5+gdata/tm70+scratch/tm70 +#PBS -l storage=gdata/ks32+gdata/hh5+gdata/tm70+gdata/foo module purge module load foo @@ -118,20 +154,20 @@ def test_render_job_script(): """ ) - # Success case: test with storage_flags set to [] + # Success case: if the pbs_config is empty, use the default values assert render_job_script( project="tm70", config_path="/path/to/config.yaml", modules=["foo", "bar", "baz"], - storage_flags=[], skip_bitwise_cmp=True, benchcab_path="/absolute/path/to/benchcab", + pbs_config={}, ) == ( f"""#!/bin/bash #PBS -l wd -#PBS -l ncpus={internal.NCPUS} -#PBS -l mem={internal.MEM} -#PBS -l walltime={internal.WALL_TIME} +#PBS -l ncpus={internal.DEFAULT_PBS["ncpus"]} +#PBS -l mem={internal.DEFAULT_PBS["mem"]} +#PBS -l walltime={internal.DEFAULT_PBS["walltime"]} #PBS -q normal #PBS -P tm70 #PBS -j oe From e47fb64df2e1f8ad39ec9cfde6ece85586244e55 Mon Sep 17 00:00:00 2001 From: Sean Bryan <39685865+SeanBryan51@users.noreply.github.com> Date: Thu, 3 Aug 2023 09:34:59 +1000 Subject: [PATCH 02/10] Update docs/user_guide/config_options.md Co-authored-by: Claire Carouge --- docs/user_guide/config_options.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index 70fb0ecb..36effe89 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -45,7 +45,7 @@ The different running modes of `benchcab` are solely dependent on the options us ### `pbs` -: Contains sub-keys which specify settings specific to the PBS scheduler at NCI. +: Contains settings specific to the PBS scheduler at NCI for the PBS script running the CABLE simulations at FLUXNET sites and the bitwise comparison for these simulations. #### `ncpus` From f0b82bbb602ce45ed26b2a16ee803dfe53f998ec Mon Sep 17 00:00:00 2001 From: Sean Bryan <39685865+SeanBryan51@users.noreply.github.com> Date: Thu, 3 Aug 2023 09:35:09 +1000 Subject: [PATCH 03/10] Update docs/user_guide/config_options.md Co-authored-by: Claire Carouge --- docs/user_guide/config_options.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index 36effe89..b77f9a50 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -49,7 +49,7 @@ The different running modes of `benchcab` are solely dependent on the options us #### `ncpus` -: The number of CPU cores to allocate for the main job script, i.e. the `-l ncpus=<4>` PBS flag (see [PBS Directives Explained][nci-pbs-directives]). +: The number of CPU cores to allocate for the PBS job, i.e. the `-l ncpus=<4>` PBS flag (see [PBS Directives Explained][nci-pbs-directives]). : This key is **optional** and can be omitted from the config file. By default `ncpus` is set to `18`. #### `mem` From f89374c2132c1770971b0acdb548e68f7124da43 Mon Sep 17 00:00:00 2001 From: Sean Bryan <39685865+SeanBryan51@users.noreply.github.com> Date: Thu, 3 Aug 2023 09:35:20 +1000 Subject: [PATCH 04/10] Update docs/user_guide/config_options.md Co-authored-by: Claire Carouge --- docs/user_guide/config_options.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index b77f9a50..cd3e496a 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -54,7 +54,7 @@ The different running modes of `benchcab` are solely dependent on the options us #### `mem` -: The total memory limit for the main job script, i.e. the `-l mem=<10GB>` PBS flag (see [PBS Directives Explained][nci-pbs-directives]). +: The total memory limit for the PBS job, i.e. the `-l mem=<10GB>` PBS flag (see [PBS Directives Explained][nci-pbs-directives]). : This key is **optional** and can be omitted from the config file. By default `mem` is set to `30GB`. #### `walltime` From 197b3c3c374e8bc783698cfc3ee89caf60f43fc1 Mon Sep 17 00:00:00 2001 From: Sean Bryan <39685865+SeanBryan51@users.noreply.github.com> Date: Thu, 3 Aug 2023 09:35:34 +1000 Subject: [PATCH 05/10] Update docs/user_guide/config_options.md Co-authored-by: Claire Carouge --- docs/user_guide/config_options.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index cd3e496a..9034f9e4 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -59,7 +59,7 @@ The different running modes of `benchcab` are solely dependent on the options us #### `walltime` -: The wall clock time limit for the main job script, i.e. `-l walltime=` PBS flag (see [PBS Directives Explained][nci-pbs-directives]). +: The wall clock time limit for the PBS job, i.e. `-l walltime=` PBS flag (see [PBS Directives Explained][nci-pbs-directives]). : This key is **optional** and can be omitted from the config file. By default `walltime` is set to `6:00:00`. #### `storage` From da2233a6a006c62f193664657a6836abb2679908 Mon Sep 17 00:00:00 2001 From: Sean Bryan <39685865+SeanBryan51@users.noreply.github.com> Date: Thu, 3 Aug 2023 09:35:57 +1000 Subject: [PATCH 06/10] Update docs/user_guide/config_options.md Co-authored-by: Claire Carouge --- docs/user_guide/config_options.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index 9034f9e4..ee2a52af 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -64,7 +64,7 @@ The different running modes of `benchcab` are solely dependent on the options us #### `storage` -: A list of extra storage flags required for the main job script, i.e. `-l storage=` (see [PBS Directives Explained][nci-pbs-directives]). +: A list of extra storage flags required for the PBS job, i.e. `-l storage=` (see [PBS Directives Explained][nci-pbs-directives]). : This key is **optional** and can be omitted from the config file. By default `storage` is set to `[]`. Example: From 03a724c2d80ea21ed8fce2ac7fc3be102c014904 Mon Sep 17 00:00:00 2001 From: Sean Bryan Date: Mon, 7 Aug 2023 16:18:40 +1000 Subject: [PATCH 07/10] Add requested changes from code review Co-authored-by: Claire Carouge --- benchcab/bench_config.py | 58 ++++++++++++++++++------------- benchcab/benchcab.py | 19 ++++++---- benchcab/comparison.py | 2 +- benchcab/internal.py | 9 +++-- benchcab/task.py | 2 +- benchcab/utils/pbs.py | 10 +++--- docs/user_guide/config_options.md | 34 ++++++++++-------- tests/common.py | 14 ++++---- tests/test_bench_config.py | 37 +++++++++++++------- tests/test_pbs.py | 24 ++++++------- 10 files changed, 124 insertions(+), 85 deletions(-) diff --git a/benchcab/bench_config.py b/benchcab/bench_config.py index a85852e7..509f551e 100644 --- a/benchcab/bench_config.py +++ b/benchcab/bench_config.py @@ -43,31 +43,41 @@ def check_config(config: dict): "that is compatible with the f90nml python package." ) - # the "pbs" key is optional - if "pbs" in config: - if not isinstance(config["pbs"], dict): - raise TypeError("The 'pbs' key must be a dictionary.") - # the "ncpus" key is optional - if "ncpus" in config["pbs"] and not isinstance(config["pbs"]["ncpus"], int): - raise TypeError("The 'ncpus' key must be an integer.") - # the "mem" key is optional - if "mem" in config["pbs"] and not isinstance(config["pbs"]["mem"], str): - raise TypeError("The 'mem' key must be a string.") - # the "walltime" key is optional - if "walltime" in config["pbs"] and not isinstance( - config["pbs"]["walltime"], str - ): - raise TypeError("The 'walltime' key must be a string.") - # the "storage" key is optional - if "storage" in config["pbs"]: - if not isinstance(config["pbs"]["storage"], list) or any( - not isinstance(val, str) for val in config["pbs"]["storage"] + # the "fluxsite" key is optional + if "fluxsite" in config: + if not isinstance(config["fluxsite"], dict): + raise TypeError("The 'fluxsite' key must be a dictionary.") + # the "pbs" key is optional + if "pbs" in config["fluxsite"]: + if not isinstance(config["fluxsite"]["pbs"], dict): + raise TypeError("The 'pbs' key must be a dictionary.") + # the "ncpus" key is optional + if "ncpus" in config["fluxsite"]["pbs"] and not isinstance( + config["fluxsite"]["pbs"]["ncpus"], int ): - raise TypeError("The 'storage' key must be a list of strings.") - - # the "multiprocessing" key is optional - if "multiprocessing" in config and not isinstance(config["multiprocessing"], bool): - raise TypeError("The 'multiprocessing' key must be a boolean.") + raise TypeError("The 'ncpus' key must be an integer.") + # the "mem" key is optional + if "mem" in config["fluxsite"]["pbs"] and not isinstance( + config["fluxsite"]["pbs"]["mem"], str + ): + raise TypeError("The 'mem' key must be a string.") + # the "walltime" key is optional + if "walltime" in config["fluxsite"]["pbs"] and not isinstance( + config["fluxsite"]["pbs"]["walltime"], str + ): + raise TypeError("The 'walltime' key must be a string.") + # the "storage" key is optional + if "storage" in config["fluxsite"]["pbs"]: + if not isinstance(config["fluxsite"]["pbs"]["storage"], list) or any( + not isinstance(val, str) + for val in config["fluxsite"]["pbs"]["storage"] + ): + raise TypeError("The 'storage' key must be a list of strings.") + # the "multiprocessing" key is optional + if "multiprocessing" in config["fluxsite"] and not isinstance( + config["fluxsite"]["multiprocessing"], bool + ): + raise TypeError("The 'multiprocessing' key must be a boolean.") valid_experiments = ( list(internal.MEORG_EXPERIMENTS) + internal.MEORG_EXPERIMENTS["five-site-test"] diff --git a/benchcab/benchcab.py b/benchcab/benchcab.py index 7bb47d40..b6850f66 100644 --- a/benchcab/benchcab.py +++ b/benchcab/benchcab.py @@ -211,10 +211,13 @@ def fluxsite_run_tasks(self): """Endpoint for `benchcab fluxsite-run-tasks`.""" tasks = self.tasks if self.tasks else self._initialise_tasks() print("Running fluxsite tasks...") - multiprocess = self.config.get("multiprocessing", internal.DEFAULT_MULTIPROCESS) + try: + multiprocess = self.config["fluxsite"]["multiprocess"] + except KeyError: + multiprocess = internal.FLUXSITE_DEFAULT_MULTIPROCESS if multiprocess: ncpus = self.config.get("pbs", {}).get( - "ncpus", internal.DEFAULT_PBS["ncpus"] + "ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"] ) run_tasks_in_parallel(tasks, n_processes=ncpus, verbose=self.args.verbose) else: @@ -234,11 +237,15 @@ def fluxsite_bitwise_cmp(self): comparisons = get_fluxsite_comparisons(tasks) print("Running comparison tasks...") - multiprocess = self.config.get("multiprocessing", internal.DEFAULT_MULTIPROCESS) + try: + multiprocess = self.config["fluxsite"]["multiprocess"] + except KeyError: + multiprocess = internal.FLUXSITE_DEFAULT_MULTIPROCESS if multiprocess: - ncpus = self.config.get("pbs", {}).get( - "ncpus", internal.DEFAULT_PBS["ncpus"] - ) + try: + ncpus = self.config["fluxsite"]["pbs"]["ncpus"] + except KeyError: + ncpus = internal.FLUXSITE_DEFAULT_PBS["ncpus"] run_comparisons_in_parallel( comparisons, n_processes=ncpus, verbose=self.args.verbose ) diff --git a/benchcab/comparison.py b/benchcab/comparison.py index db89f545..627bbe3f 100644 --- a/benchcab/comparison.py +++ b/benchcab/comparison.py @@ -60,7 +60,7 @@ def run_comparisons(comparison_tasks: list[ComparisonTask], verbose=False) -> No def run_comparisons_in_parallel( comparison_tasks: list[ComparisonTask], - n_processes=internal.DEFAULT_PBS["ncpus"], + n_processes=internal.FLUXSITE_DEFAULT_PBS["ncpus"], verbose=False, ) -> None: """Runs bitwise comparison tasks in parallel across multiple processes.""" diff --git a/benchcab/internal.py b/benchcab/internal.py index 816da2ad..daf0de37 100644 --- a/benchcab/internal.py +++ b/benchcab/internal.py @@ -11,9 +11,14 @@ # Parameters for job script: QSUB_FNAME = "benchmark_cable_qsub.sh" -DEFAULT_PBS: Any = {"ncpus": 18, "mem": "30GB", "walltime": "6:00:00", "storage": []} +FLUXSITE_DEFAULT_PBS: Any = { + "ncpus": 18, + "mem": "30GB", + "walltime": "6:00:00", + "storage": [], +} MPI = False -DEFAULT_MULTIPROCESS = True +FLUXSITE_DEFAULT_MULTIPROCESS = True # DIRECTORY PATHS/STRUCTURE: diff --git a/benchcab/task.py b/benchcab/task.py index 07c615b4..30395d50 100644 --- a/benchcab/task.py +++ b/benchcab/task.py @@ -335,7 +335,7 @@ def run_tasks(tasks: list[Task], verbose=False): def run_tasks_in_parallel( - tasks: list[Task], n_processes=internal.DEFAULT_PBS["ncpus"], verbose=False + tasks: list[Task], n_processes=internal.FLUXSITE_DEFAULT_PBS["ncpus"], verbose=False ): """Runs tasks in `tasks` in parallel across multiple processes.""" diff --git a/benchcab/utils/pbs.py b/benchcab/utils/pbs.py index 289168d4..c02abb25 100644 --- a/benchcab/utils/pbs.py +++ b/benchcab/utils/pbs.py @@ -21,20 +21,20 @@ def render_job_script( """ if pbs_config is None: - pbs_config = internal.DEFAULT_PBS + pbs_config = internal.FLUXSITE_DEFAULT_PBS module_load_lines = "\n".join( f"module load {module_name}" for module_name in modules ) verbose_flag = "-v" if verbose else "" - ncpus = pbs_config.get("ncpus", internal.DEFAULT_PBS["ncpus"]) - mem = pbs_config.get("mem", internal.DEFAULT_PBS["mem"]) - walltime = pbs_config.get("walltime", internal.DEFAULT_PBS["walltime"]) + ncpus = pbs_config.get("ncpus", internal.FLUXSITE_DEFAULT_PBS["ncpus"]) + mem = pbs_config.get("mem", internal.FLUXSITE_DEFAULT_PBS["mem"]) + walltime = pbs_config.get("walltime", internal.FLUXSITE_DEFAULT_PBS["walltime"]) storage_flags = [ "gdata/ks32", "gdata/hh5", f"gdata/{project}", - *pbs_config.get("storage", internal.DEFAULT_PBS["storage"]), + *pbs_config.get("storage", internal.FLUXSITE_DEFAULT_PBS["storage"]), ] return f"""#!/bin/bash #PBS -l wd diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index ee2a52af..63d7e610 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -43,44 +43,48 @@ The different running modes of `benchcab` are solely dependent on the options us : NCI modules to use for compiling CABLE -### `pbs` +### `fluxsite` +: Contains settings specific to fluxsite tests. + +#### `pbs` : Contains settings specific to the PBS scheduler at NCI for the PBS script running the CABLE simulations at FLUXNET sites and the bitwise comparison for these simulations. -#### `ncpus` +##### `ncpus` : The number of CPU cores to allocate for the PBS job, i.e. the `-l ncpus=<4>` PBS flag (see [PBS Directives Explained][nci-pbs-directives]). : This key is **optional** and can be omitted from the config file. By default `ncpus` is set to `18`. -#### `mem` +##### `mem` : The total memory limit for the PBS job, i.e. the `-l mem=<10GB>` PBS flag (see [PBS Directives Explained][nci-pbs-directives]). : This key is **optional** and can be omitted from the config file. By default `mem` is set to `30GB`. -#### `walltime` +##### `walltime` : The wall clock time limit for the PBS job, i.e. `-l walltime=` PBS flag (see [PBS Directives Explained][nci-pbs-directives]). : This key is **optional** and can be omitted from the config file. By default `walltime` is set to `6:00:00`. -#### `storage` +##### `storage` : A list of extra storage flags required for the PBS job, i.e. `-l storage=` (see [PBS Directives Explained][nci-pbs-directives]). : This key is **optional** and can be omitted from the config file. By default `storage` is set to `[]`. -Example: -```yaml -pbs: - ncpus: 16 - mem: 64GB - walltime: 00:01:00 - storage: [scratch/a00, gdata/xy11] -``` - -### `multiprocessing` +#### `multiprocessing` : Enables or disables multiprocessing for executing embarrassingly parallel tasks. : This key is **optional** and can be omitted from the config file. By default `multiprocessing` is set to `True`. +Example: +```yaml +fluxsite: + pbs: + ncpus: 16 + mem: 64GB + walltime: 00:01:00 + storage: [scratch/a00, gdata/xy11] + multiprocessing: True +``` ## Simulations options diff --git a/tests/common.py b/tests/common.py index 66ff7a39..a135249b 100644 --- a/tests/common.py +++ b/tests/common.py @@ -55,13 +55,15 @@ def get_mock_config() -> dict: } }, ], - "pbs": { - "ncpus": 16, - "mem": "64G", - "walltime": "01:00:00", - "storage": ["gdata/foo123"], + "fluxsite": { + "pbs": { + "ncpus": 16, + "mem": "64G", + "walltime": "01:00:00", + "storage": ["gdata/foo123"], + }, + "multiprocessing": True, }, - "multiprocessing": True, } return config diff --git a/tests/test_bench_config.py b/tests/test_bench_config.py index a51b94ee..02a7adf9 100644 --- a/tests/test_bench_config.py +++ b/tests/test_bench_config.py @@ -55,34 +55,39 @@ def test_check_config(): config.pop("science_configurations") check_config(config) + # Success case: test config without fluxsite key is valid + config = get_mock_config() + config.pop("fluxsite") + check_config(config) + # Success case: test config without multiprocessing key is valid config = get_mock_config() - config.pop("multiprocessing") + config["fluxsite"].pop("multiprocessing") check_config(config) # Success case: test config without pbs key is valid config = get_mock_config() - config.pop("pbs") + config["fluxsite"].pop("pbs") check_config(config) # Success case: test config without ncpus key is valid config = get_mock_config() - config["pbs"].pop("ncpus") + config["fluxsite"]["pbs"].pop("ncpus") check_config(config) # Success case: test config without mem key is valid config = get_mock_config() - config["pbs"].pop("mem") + config["fluxsite"]["pbs"].pop("mem") check_config(config) # Success case: test config without walltime key is valid config = get_mock_config() - config["pbs"].pop("walltime") + config["fluxsite"]["pbs"].pop("walltime") check_config(config) # Success case: test config without storage key is valid config = get_mock_config() - config["pbs"].pop("storage") + config["fluxsite"]["pbs"].pop("storage") check_config(config) # Failure case: test missing required keys raises an exception @@ -237,46 +242,52 @@ def test_check_config(): config["science_configurations"] = [r"cable_user%GS_SWITCH = 'medlyn'"] check_config(config) + # Failure case: type of config["fluxsite"] is not a dict + with pytest.raises(TypeError, match="The 'fluxsite' key must be a dictionary."): + config = get_mock_config() + config["fluxsite"] = ["ncpus: 16\nmem: 64GB\n"] + check_config(config) + # Failure case: type of config["pbs"] is not a dict with pytest.raises(TypeError, match="The 'pbs' key must be a dictionary."): config = get_mock_config() - config["pbs"] = "-l ncpus=16" + config["fluxsite"]["pbs"] = "-l ncpus=16" check_config(config) # Failure case: type of config["pbs"]["ncpus"] is not an int with pytest.raises(TypeError, match="The 'ncpus' key must be an integer."): config = get_mock_config() - config["pbs"]["ncpus"] = "16" + config["fluxsite"]["pbs"]["ncpus"] = "16" check_config(config) # Failure case: type of config["pbs"]["mem"] is not a string with pytest.raises(TypeError, match="The 'mem' key must be a string."): config = get_mock_config() - config["pbs"]["mem"] = 64 + config["fluxsite"]["pbs"]["mem"] = 64 check_config(config) # Failure case: type of config["pbs"]["walltime"] is not a string with pytest.raises(TypeError, match="The 'walltime' key must be a string."): config = get_mock_config() - config["pbs"]["walltime"] = 60 + config["fluxsite"]["pbs"]["walltime"] = 60 check_config(config) # Failure case: type of config["pbs"]["storage"] is not a list with pytest.raises(TypeError, match="The 'storage' key must be a list of strings."): config = get_mock_config() - config["pbs"]["storage"] = "gdata/foo+gdata/bar" + config["fluxsite"]["pbs"]["storage"] = "gdata/foo+gdata/bar" check_config(config) # Failure case: type of config["pbs"]["storage"] is not a list of strings with pytest.raises(TypeError, match="The 'storage' key must be a list of strings."): config = get_mock_config() - config["pbs"]["storage"] = [1, 2, 3] + config["fluxsite"]["pbs"]["storage"] = [1, 2, 3] check_config(config) # Failure case: type of config["multiprocessing"] is not a bool with pytest.raises(TypeError, match="The 'multiprocessing' key must be a boolean."): config = get_mock_config() - config["multiprocessing"] = 1 + config["fluxsite"]["multiprocessing"] = 1 check_config(config) diff --git a/tests/test_pbs.py b/tests/test_pbs.py index 2c5e5e2d..cb678b17 100644 --- a/tests/test_pbs.py +++ b/tests/test_pbs.py @@ -16,9 +16,9 @@ def test_render_job_script(): ) == ( f"""#!/bin/bash #PBS -l wd -#PBS -l ncpus={internal.DEFAULT_PBS["ncpus"]} -#PBS -l mem={internal.DEFAULT_PBS["mem"]} -#PBS -l walltime={internal.DEFAULT_PBS["walltime"]} +#PBS -l ncpus={internal.FLUXSITE_DEFAULT_PBS["ncpus"]} +#PBS -l mem={internal.FLUXSITE_DEFAULT_PBS["mem"]} +#PBS -l walltime={internal.FLUXSITE_DEFAULT_PBS["walltime"]} #PBS -q normal #PBS -P tm70 #PBS -j oe @@ -54,9 +54,9 @@ def test_render_job_script(): ) == ( f"""#!/bin/bash #PBS -l wd -#PBS -l ncpus={internal.DEFAULT_PBS["ncpus"]} -#PBS -l mem={internal.DEFAULT_PBS["mem"]} -#PBS -l walltime={internal.DEFAULT_PBS["walltime"]} +#PBS -l ncpus={internal.FLUXSITE_DEFAULT_PBS["ncpus"]} +#PBS -l mem={internal.FLUXSITE_DEFAULT_PBS["mem"]} +#PBS -l walltime={internal.FLUXSITE_DEFAULT_PBS["walltime"]} #PBS -q normal #PBS -P tm70 #PBS -j oe @@ -92,9 +92,9 @@ def test_render_job_script(): ) == ( f"""#!/bin/bash #PBS -l wd -#PBS -l ncpus={internal.DEFAULT_PBS["ncpus"]} -#PBS -l mem={internal.DEFAULT_PBS["mem"]} -#PBS -l walltime={internal.DEFAULT_PBS["walltime"]} +#PBS -l ncpus={internal.FLUXSITE_DEFAULT_PBS["ncpus"]} +#PBS -l mem={internal.FLUXSITE_DEFAULT_PBS["mem"]} +#PBS -l walltime={internal.FLUXSITE_DEFAULT_PBS["walltime"]} #PBS -q normal #PBS -P tm70 #PBS -j oe @@ -165,9 +165,9 @@ def test_render_job_script(): ) == ( f"""#!/bin/bash #PBS -l wd -#PBS -l ncpus={internal.DEFAULT_PBS["ncpus"]} -#PBS -l mem={internal.DEFAULT_PBS["mem"]} -#PBS -l walltime={internal.DEFAULT_PBS["walltime"]} +#PBS -l ncpus={internal.FLUXSITE_DEFAULT_PBS["ncpus"]} +#PBS -l mem={internal.FLUXSITE_DEFAULT_PBS["mem"]} +#PBS -l walltime={internal.FLUXSITE_DEFAULT_PBS["walltime"]} #PBS -q normal #PBS -P tm70 #PBS -j oe From 68bea1b40b5e465dcb96c4191f22f7107eb9e0d1 Mon Sep 17 00:00:00 2001 From: Sean Bryan Date: Tue, 8 Aug 2023 14:21:36 +1000 Subject: [PATCH 08/10] Fix typo in documentation In config.yaml options, 'multiprocessing' should be 'multiprocess'. --- docs/user_guide/config_options.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index 63d7e610..48d9cbb4 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -70,10 +70,10 @@ The different running modes of `benchcab` are solely dependent on the options us : A list of extra storage flags required for the PBS job, i.e. `-l storage=` (see [PBS Directives Explained][nci-pbs-directives]). : This key is **optional** and can be omitted from the config file. By default `storage` is set to `[]`. -#### `multiprocessing` +#### `multiprocess` : Enables or disables multiprocessing for executing embarrassingly parallel tasks. -: This key is **optional** and can be omitted from the config file. By default `multiprocessing` is set to `True`. +: This key is **optional** and can be omitted from the config file. By default `multiprocess` is set to `True`. Example: ```yaml @@ -83,7 +83,7 @@ fluxsite: mem: 64GB walltime: 00:01:00 storage: [scratch/a00, gdata/xy11] - multiprocessing: True + multiprocess: True ``` ## Simulations options From 8c4c799acbb1366fad25c57512a5a86c07f8af50 Mon Sep 17 00:00:00 2001 From: Sean Bryan <39685865+SeanBryan51@users.noreply.github.com> Date: Thu, 10 Aug 2023 17:28:16 +1000 Subject: [PATCH 09/10] Update docs/user_guide/config_options.md Co-authored-by: Claire Carouge --- docs/user_guide/config_options.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index 48d9cbb4..e5dd814f 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -44,7 +44,8 @@ The different running modes of `benchcab` are solely dependent on the options us : NCI modules to use for compiling CABLE ### `fluxsite` -: Contains settings specific to fluxsite tests. +: Contains settings specific to fluxsite tests. +: This key is **optional**. Default settings for the fluxsite tests will be used if it is not present #### `pbs` From 309adca20b90ecc6406c363d2707bdfa5ef69391 Mon Sep 17 00:00:00 2001 From: Sean Bryan <39685865+SeanBryan51@users.noreply.github.com> Date: Thu, 10 Aug 2023 17:28:29 +1000 Subject: [PATCH 10/10] Update docs/user_guide/config_options.md Co-authored-by: Claire Carouge --- docs/user_guide/config_options.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index e5dd814f..3e2f4ec8 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -49,7 +49,8 @@ The different running modes of `benchcab` are solely dependent on the options us #### `pbs` -: Contains settings specific to the PBS scheduler at NCI for the PBS script running the CABLE simulations at FLUXNET sites and the bitwise comparison for these simulations. +: Contains settings specific to the PBS scheduler at NCI for the PBS script running the CABLE simulations at FLUXNET sites and the bitwise comparison for these simulations. +: This key is **optional**. Default values for the PBS settings will apply if it is not specified. ##### `ncpus`