Merge pull request #94 from TeaganKing/docstrings_formatting

Some updated pylint and black formatting, added docstrings to improve clarity when adding to repo
NCAR · May 8, 2024 · 7134f3a · 7134f3a
2 parents d14ee72 + 0eec99b
commit 7134f3a
Show file tree

Hide file tree

Showing 10 changed files with 438 additions and 286 deletions.
diff --git a/README.md b/README.md
@@ -54,8 +54,8 @@ To test the package out, try to run `examples/coupled-model`:
 $ conda activate cupid-dev
 $ cd examples/coupled_model
 $ # machine-dependent: request multiple compute cores
-$ cupid-run config.yml
-$ cupid-build config.yml # Will build HTML from Jupyter Book
+$ cupid-run
+$ cupid-build  # Will build HTML from Jupyter Book
 ```
 
 After the last step is finished, you can use Jupyter to view generated notebooks in `${CUPID_ROOT}/examples/coupled-model/computed_notebooks/quick-run`
@@ -64,7 +64,7 @@ or you can view `${CUPID_ROOT}/examples/coupled-model/computed_notebooks/quick-r
 Furthermore, to clear the `computed_notebooks` folder which was generated by the `cupid-run` and `cupid-build` commands, you can run the following command:
 
 ``` bash
-$ cupid-clear config.yml 
+$ cupid-clear 
 ```
 
 This will clear the `computed_notebooks` folder which is at the location pointed to by the `run_dir` variable in the `config.yml` file. 
@@ -87,6 +87,7 @@ Options:
   -lnd, --land        Run land component diagnostics
   -ice, --seaice      Run sea ice component diagnostics
   -glc, --landice     Run land ice component diagnostics
+  --config_path       Path to the YAML configuration file containing specifications for notebooks (default config.yml)
   -h, --help          Show this message and exit.
 ```
 
@@ -107,8 +108,8 @@ client
 
 #### Specifying components
 
-If no component flags are provided, all component diagnostics listed in `config.yml` will be executed by default. Multiple flags can be used together to select a group of components, for example: `cupid-run -ocn -ice config.yml`.
+If no component flags are provided, all component diagnostics listed in `config.yml` will be executed by default. Multiple flags can be used together to select a group of components, for example: `cupid-run -ocn -ice`.
 
 
 ### Timeseries File Generation
-CUPiD also has the capability to generate single variable timeseries files from history files for all components. To run timeseries, edit the `config.yml` file's timeseries section to fit your preferences, and then run `cupid-run config.yml -ts`.
+CUPiD also has the capability to generate single variable timeseries files from history files for all components. To run timeseries, edit the `config.yml` file's timeseries section to fit your preferences, and then run `cupid-run -ts`.
diff --git a/cupid/build.py b/cupid/build.py
@@ -1,41 +1,60 @@
 #!/usr/bin/env python
+"""
+This script provides functionality to build a Jupyter book based on
+the configuration specified in a YAML file.
 
+The main function `build()` reads the configuration file (default config.yml),
+extracts the necessary information such as the name of the book and the
+directory containing computed notebooks, and then proceeds to clean and build the
+Jupyter book using the `jupyter-book` command-line tool.
+
+Args:
+    CONFIG_PATH: str, path to configuration file (default config.yml)
+
+Returns:
+    None
+"""
+
+import click
 import subprocess
 import sys
-import os
 import yaml
 
-def build():
+
+@click.command()
+@click.argument("config_path", default="config.yml")
+def build(config_path):
     """
-    Build a Jupyter book based on the TOC in config.yml. Called by `cupid-build`.
-    
+    Build a Jupyter book based on the TOC in CONFIG_PATH. Called by `cupid-build`.
+
     Args:
-        none
+        CONFIG_PATH: str, path to configuration file (default config.yml)
+
     Returns:
         None
     """
-
-    config_path = str(sys.argv[1])
-
+
     with open(config_path, "r") as fid:
         control = yaml.safe_load(fid)
-    
+
     sname = control["data_sources"]["sname"]
     run_dir = control["data_sources"]["run_dir"]
 
-    subprocess.run(["jupyter-book", "clean" , f"{run_dir}/computed_notebooks/{sname}"])
-    subprocess.run(["jupyter-book",  "build" , f"{run_dir}/computed_notebooks/{sname}",  "--all"])
+    subprocess.run(["jupyter-book", "clean", f"{run_dir}/computed_notebooks/{sname}"])
+    subprocess.run(
+        ["jupyter-book", "build", f"{run_dir}/computed_notebooks/{sname}", "--all"]
+    )
 
-### Originally used this code to copy jupyter book HTML to a location to host it online
+    # Originally used this code to copy jupyter book HTML to a location to host it online
 
-#     if 'publish_location' in control:
-
-#         user = os.environ.get('USER')
-#         remote_mach = control["publish_location"]["remote_mach"]
-#         remote_dir = control["publish_location"]["remote_dir"]
-# this seems more complicated than expected...people have mentioned paramiko library?
-        # subprocess.run(["mkdir", "-p", remote_dir])
-        # subprocess.run(["scp", "-r", f"{run_dir}/computed_notebooks/{sname}/_build/html/*", f"{user}@{remote_mach}:{remote_dir}"])
-
-    return None
+    #     if 'publish_location' in control:
 
+    #         user = os.environ.get('USER')
+    #         remote_mach = control["publish_location"]["remote_mach"]
+    #         remote_dir = control["publish_location"]["remote_dir"]
+    # this seems more complicated than expected...people have mentioned paramiko library?
+    # subprocess.run(["mkdir", "-p", remote_dir])
+    # subprocess.run(["scp", "-r", f"{run_dir}/computed_notebooks/{sname}/_build/html/*",
+    #                 f"{user}@{remote_mach}:{remote_dir}"])
+
+    return None
diff --git a/cupid/clear.py b/cupid/clear.py
@@ -1,36 +1,56 @@
 #!/usr/bin/env python
+"""
+This script provides functionality to clear the contents of the 'computed_notebooks' folder
+at the location specified by the 'run_dir' variable in the CONFIG_PATH.
+
+The main function `clear()` takes the path to the configuration file as input, reads the config file
+to obtain the 'run_dir' variable, and then deletes the contents of the 'computed_notebooks' folder
+at that location.
+
+"""
+
 import os
+import shutil
 import click
 import cupid.util
-import shutil
 
-def readConfigFile(config_path):
-    #Given the file path to config.yml, this function reads the config file content and 
-    #returns the val of the run_dir string with '/computed_notebooks' appended to it 
-
-    #Obtain the contents of the config.yml file and extract the run_dir variable
+
+def read_config_file(config_path):
+    """
+    Given the file path to the configuration file, this function reads the config file content and
+    returns the val of the run_dir string with '/computed_notebooks' appended to it
+
+    Args:
+        CONFIG_PATH: str, path to configuration file (default config.yml)
+
+    Returns:
+        None
+    """
+    # Obtain the contents of the configuration file and extract the run_dir variable
     control = cupid.util.get_control_dict(config_path)
-    run_dir = control['data_sources'].get('run_dir', None)
-    
+    run_dir = control["data_sources"].get("run_dir", None)
+
     if run_dir:
-        #Append '/computed_notebooks' to the run_dir value if it is not empty
-        fullPath = os.path.join(run_dir, 'computed_notebooks')
-        return fullPath
-
-    else: #run_dir is empty/wasn't found in config file so return error
-        raise ValueError("'run_dir' was empty/not found in the config file.")
+        # Append '/computed_notebooks' to the run_dir value if it is not empty
+        full_path = os.path.join(run_dir, "computed_notebooks")
+        return full_path
+
+    # else run_dir is empty/wasn't found in config file so return error
+    raise ValueError("'run_dir' was empty/not found in the config file.")
+
 
 @click.command()
-@click.argument('config_path')
-#Entry point to this script
+@click.argument("config_path", default="config.yml")
+# Entry point to this script
 def clear(config_path):
-    """Clears the contents of the 'computed_notebooks' folder at the location specified by the 'run_dir' variable in the 'config.yml' file.
-    
-    Args: config_path - The path to the config.yml file.
+    """Clears the contents of the 'computed_notebooks' folder at the location
+    specified by the 'run_dir' variable in the CONFIG_PATH.
+
+    Args: CONFIG_PATH - The path to the configuration file.
 
     """
-    
-    run_dir = readConfigFile(config_path)
-    #Delete the 'computed_notebooks' folder and all the contents inside of it
+
+    run_dir = read_config_file(config_path)
+    # Delete the 'computed_notebooks' folder and all the contents inside of it
     shutil.rmtree(run_dir)
-    print(f"All contents in {run_dir} have been cleared.")
+    print(f"All contents in {run_dir} have been cleared.")
diff --git a/cupid/quickstart.py b/cupid/quickstart.py
@@ -1,2 +1,3 @@
-### To be created: a script (maybe called through a command line entry point) that sets up a directory with a config.yml file and
-### basics necessary to set up a notebook collection
+### To be created: a script, maybe called through a command line entry point,
+### that sets up a directory with a config.yml file and
+### basics necessary to set up a notebook collection
diff --git a/cupid/read.py b/cupid/read.py
@@ -1,32 +1,44 @@
+"""
+This module provides functions for reading YAML files and working with intake catalogs.
+
+Functions:
+    - read_yaml(path_to_yaml): Read a YAML file and return its content as a dictionary.
+    - get_collection(path_to_catalog, **kwargs): Get a collection of datasets from an
+                     intake catalog based on specified criteria.
+"""
+
 import intake
 import yaml
 
+
 def read_yaml(path_to_yaml):
-    with open(path_to_yaml) as f:
-        data = yaml.load(f, Loader=yaml.FullLoader) 
+    """Read yaml file and return data from loaded yaml file"""
+    with open(path_to_yaml) as file:
+        data = yaml.load(file, Loader=yaml.FullLoader)
     return data
 
 
 def get_collection(path_to_catalog, **kwargs):
+    """Get collection of datasets from intake catalog"""
     cat = intake.open_esm_datastore(path_to_catalog)
     ### note that the json file points to the csv, so the path that the
     ### yaml file contains doesn't actually get used. this can cause issues
-    
+
     cat_subset = cat.search(**kwargs)
-        
+
     if "variable" in kwargs.keys():
-
+        # pylint: disable=invalid-name
         def preprocess(ds):
             ## the double brackets return a Dataset rather than a DataArray
-            ## this is fragile and could cause issues, i'm not totally sure what subsetting on time_bound does
-            return ds[[kwargs["variable"], 'time_bound']]
-    
+            ## this is fragile and could cause issues, not sure what subsetting on time_bound does
+            return ds[[kwargs["variable"], "time_bound"]]
+
         ## not sure what the chunking kwarg is doing here either
-        dsets = cat_subset.to_dataset_dict(xarray_open_kwargs={'chunks': {'time': -1}}, preprocess=preprocess)
-
+        dsets = cat_subset.to_dataset_dict(
+            xarray_open_kwargs={"chunks": {"time": -1}}, preprocess=preprocess
+        )
+
     else:
         dsets = cat_subset.to_dataset_dict()
-
-    return dsets
-
 
+    return dsets