From 22b5695e8c3b43f52be35a3505701278e8790ba2 Mon Sep 17 00:00:00 2001 From: Michael Levy Date: Wed, 6 Mar 2024 15:57:27 -0700 Subject: [PATCH 1/5] Use click to allow command line arguments Click seems better suited to this task than argparse since pyproject.toml wants to call run() from cupid/run.py rather than running run.py as a command line script. First pass added --serial and --time-series flags; the former can be updated to skip creating the LocalCluster object in notebooks, the latter can be updated to run time series generation directly. --- cupid/run.py | 16 ++++++++-------- environments/dev-environment.yml | 1 + 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/cupid/run.py b/cupid/run.py index b263cd5..38943a6 100755 --- a/cupid/run.py +++ b/cupid/run.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +import click import os from glob import glob import papermill as pm @@ -11,15 +12,14 @@ import time import ploomber -def run(): +CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) +@click.command(context_settings=CONTEXT_SETTINGS) +@click.option("--serial", "-s", is_flag=True, help="Do not use LocalCluster objects") +@click.option("--time-series", "-ts", is_flag=True, + help="Run time series generation scripts prior to diagnostics") +def run(serial=False, time_series=False): """ - Main engine to set up running all the notebooks. Called by `cupid-run`. - - Args: - none - Returns: - None - + Main engine to set up running all the notebooks. """ # Get control structure diff --git a/environments/dev-environment.yml b/environments/dev-environment.yml index 58f940d..5abc2e8 100644 --- a/environments/dev-environment.yml +++ b/environments/dev-environment.yml @@ -2,6 +2,7 @@ name: cupid-dev dependencies: - python=3.11.4 - black + - click - dask - dask-jobqueue - intake From 03735ae94534e82e7ceefdda278b60413ef4ba6a Mon Sep 17 00:00:00 2001 From: Michael Levy Date: Wed, 6 Mar 2024 16:29:03 -0700 Subject: [PATCH 2/5] Add config_path argument Also make use of serial in ocean and ice notebooks --- cupid/run.py | 6 +++--- examples/nblibrary/ocean_surface.ipynb | 9 ++++++--- examples/nblibrary/seaice.ipynb | 9 ++++++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/cupid/run.py b/cupid/run.py index 38943a6..a161252 100755 --- a/cupid/run.py +++ b/cupid/run.py @@ -6,7 +6,6 @@ import papermill as pm import intake import cupid.util -import sys from dask.distributed import Client import dask import time @@ -17,13 +16,13 @@ @click.option("--serial", "-s", is_flag=True, help="Do not use LocalCluster objects") @click.option("--time-series", "-ts", is_flag=True, help="Run time series generation scripts prior to diagnostics") -def run(serial=False, time_series=False): +@click.argument("config_path") +def run(config_path, serial=False, time_series=False): """ Main engine to set up running all the notebooks. """ # Get control structure - config_path = str(sys.argv[1]) control = cupid.util.get_control_dict(config_path) cupid.util.setup_book(config_path) @@ -87,6 +86,7 @@ def run(serial=False, time_series=False): for nb, info in all_nbs.items(): + global_params['serial'] = serial if "dependency" in info: cupid.util.create_ploomber_nb_task(nb, info, cat_path, nb_path_root, output_dir, global_params, dag, dependency = info["dependency"]) diff --git a/examples/nblibrary/ocean_surface.ipynb b/examples/nblibrary/ocean_surface.ipynb index 3ba6ca2..4a96320 100644 --- a/examples/nblibrary/ocean_surface.ipynb +++ b/examples/nblibrary/ocean_surface.ipynb @@ -65,6 +65,7 @@ "outputs": [], "source": [ "CESM_output_dir = \"/glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing\"\n", + "serial = False # use dask LocalCluster\n", "Case = \"b.e23_alpha16b.BLT1850.ne30_t232.054\"\n", "savefigs = False\n", "mom6_tools_config = {}\n", @@ -130,9 +131,11 @@ "metadata": {}, "outputs": [], "source": [ - "# Spin up cluster\n", - "cluster = LocalCluster(**lc_kwargs)\n", - "client = Client(cluster)\n", + "# Spin up cluster (if running in parallel)\n", + "client=None\n", + "if not serial:\n", + " cluster = LocalCluster(**lc_kwargs)\n", + " client = Client(cluster)\n", "\n", "client" ] diff --git a/examples/nblibrary/seaice.ipynb b/examples/nblibrary/seaice.ipynb index 79322c0..99ab287 100644 --- a/examples/nblibrary/seaice.ipynb +++ b/examples/nblibrary/seaice.ipynb @@ -50,6 +50,7 @@ "outputs": [], "source": [ "CESM_output_dir = \"/glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing\"\n", + "serial = False # use dask LocalCluster\n", "cases = [\"b.e23_alpha16g.BLT1850.ne30_t232.075\",\"b.e23_alpha16g.BLT1850.ne30_t232.078\"]\n", "lc_kwargs = {}\n", "\n", @@ -67,9 +68,11 @@ "metadata": {}, "outputs": [], "source": [ - "# Spin up cluster\n", - "cluster = LocalCluster(**lc_kwargs)\n", - "client = Client(cluster)\n", + "# Spin up cluster (if running in parallel)\n", + "client=None\n", + "if not serial:\n", + " cluster = LocalCluster(**lc_kwargs)\n", + " client = Client(cluster)\n", "\n", "client" ] From 9f8908948a3fca800211dcb7fe08732a56bb9ca8 Mon Sep 17 00:00:00 2001 From: Michael Levy Date: Thu, 7 Mar 2024 12:34:37 -0700 Subject: [PATCH 3/5] Explicitly list click as dependency click is already in the cupid-dev environment, but it was not listed as a dependency in the pyproject.toml file --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 1ef665a..8120bdb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ ] dependencies = [ "black", + "click", "dask", "dask-jobqueue", "intake", From b6d22ac31bdde2be8403bbec50d385fa1a994825 Mon Sep 17 00:00:00 2001 From: Michael Levy Date: Fri, 8 Mar 2024 14:14:19 -0700 Subject: [PATCH 4/5] Abort if --time-series flag is used For now, this option raises a NotImplementedError exception. The branch that incorporates time series should remove that block (as well as the "import sys" line) --- cupid/run.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cupid/run.py b/cupid/run.py index a161252..7aa2e5b 100755 --- a/cupid/run.py +++ b/cupid/run.py @@ -2,6 +2,7 @@ import click import os +import sys from glob import glob import papermill as pm import intake @@ -22,6 +23,11 @@ def run(config_path, serial=False, time_series=False): Main engine to set up running all the notebooks. """ + # Abort if run with --time-series (until feature is added) + if time_series: + sys.tracebacklimit = 0 + raise NotImplementedError("--time-series option not implemented yet") + # Get control structure control = cupid.util.get_control_dict(config_path) cupid.util.setup_book(config_path) From 63c09e2ef59647806b1ce5783673cfe2f1432a5b Mon Sep 17 00:00:00 2001 From: Michael Levy Date: Fri, 8 Mar 2024 15:06:00 -0700 Subject: [PATCH 5/5] Mention command line arguments in README file --- README.md | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1fd65b1..3db01ba 100644 --- a/README.md +++ b/README.md @@ -65,4 +65,33 @@ $ cupid-build config.yml # Will build HTML from Jupyter Book ``` After the last step is finished, you can use Jupyter to view generated notebooks in `${CUPID_ROOT}/examples/coupled-model/computed_notebooks/quick-run` -or you can view `${CUPID_ROOT}/examples/coupled-model/computed_notebooks/quick-run/_build/html/index.html` in a web browser. \ No newline at end of file +or you can view `${CUPID_ROOT}/examples/coupled-model/computed_notebooks/quick-run/_build/html/index.html` in a web browser. + +### CUPiD Options + +Most of CUPiD's configuration is done via the `config.yml` file, but there are a few command line options as well: + +```bash +(cupid-dev) $ cupid-run -h +Usage: cupid-run [OPTIONS] CONFIG_PATH + + Main engine to set up running all the notebooks. + +Options: + -s, --serial Do not use LocalCluster objects + -ts, --time-series Run time series generation scripts prior to diagnostics + -h, --help Show this message and exit. +``` + +By default, several of the example notebooks provided use a dask `LocalCluster` object to run in parallel. +However, the `--serial` option will pass a logical flag to each notebook that can be used to skip starting the cluster. + +```py3 +# Spin up cluster (if running in parallel) +client=None +if not serial: + cluster = LocalCluster(**lc_kwargs) + client = Client(cluster) + +client +``` \ No newline at end of file