Skip to content

Commit

Permalink
Merge pull request #94 from TeaganKing/docstrings_formatting
Browse files Browse the repository at this point in the history
Some updated pylint and black formatting, added docstrings to improve clarity when adding to repo
  • Loading branch information
mnlevy1981 authored May 8, 2024
2 parents d14ee72 + 0eec99b commit 7134f3a
Show file tree
Hide file tree
Showing 10 changed files with 438 additions and 286 deletions.
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ To test the package out, try to run `examples/coupled-model`:
$ conda activate cupid-dev
$ cd examples/coupled_model
$ # machine-dependent: request multiple compute cores
$ cupid-run config.yml
$ cupid-build config.yml # Will build HTML from Jupyter Book
$ cupid-run
$ cupid-build # Will build HTML from Jupyter Book
```

After the last step is finished, you can use Jupyter to view generated notebooks in `${CUPID_ROOT}/examples/coupled-model/computed_notebooks/quick-run`
Expand All @@ -64,7 +64,7 @@ or you can view `${CUPID_ROOT}/examples/coupled-model/computed_notebooks/quick-r
Furthermore, to clear the `computed_notebooks` folder which was generated by the `cupid-run` and `cupid-build` commands, you can run the following command:

``` bash
$ cupid-clear config.yml
$ cupid-clear
```

This will clear the `computed_notebooks` folder which is at the location pointed to by the `run_dir` variable in the `config.yml` file.
Expand All @@ -87,6 +87,7 @@ Options:
-lnd, --land Run land component diagnostics
-ice, --seaice Run sea ice component diagnostics
-glc, --landice Run land ice component diagnostics
--config_path Path to the YAML configuration file containing specifications for notebooks (default config.yml)
-h, --help Show this message and exit.
```

Expand All @@ -107,8 +108,8 @@ client

#### Specifying components

If no component flags are provided, all component diagnostics listed in `config.yml` will be executed by default. Multiple flags can be used together to select a group of components, for example: `cupid-run -ocn -ice config.yml`.
If no component flags are provided, all component diagnostics listed in `config.yml` will be executed by default. Multiple flags can be used together to select a group of components, for example: `cupid-run -ocn -ice`.


### Timeseries File Generation
CUPiD also has the capability to generate single variable timeseries files from history files for all components. To run timeseries, edit the `config.yml` file's timeseries section to fit your preferences, and then run `cupid-run config.yml -ts`.
CUPiD also has the capability to generate single variable timeseries files from history files for all components. To run timeseries, edit the `config.yml` file's timeseries section to fit your preferences, and then run `cupid-run -ts`.
63 changes: 41 additions & 22 deletions cupid/build.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,60 @@
#!/usr/bin/env python
"""
This script provides functionality to build a Jupyter book based on
the configuration specified in a YAML file.
The main function `build()` reads the configuration file (default config.yml),
extracts the necessary information such as the name of the book and the
directory containing computed notebooks, and then proceeds to clean and build the
Jupyter book using the `jupyter-book` command-line tool.
Args:
CONFIG_PATH: str, path to configuration file (default config.yml)
Returns:
None
"""

import click
import subprocess
import sys
import os
import yaml

def build():

@click.command()
@click.argument("config_path", default="config.yml")
def build(config_path):
"""
Build a Jupyter book based on the TOC in config.yml. Called by `cupid-build`.
Build a Jupyter book based on the TOC in CONFIG_PATH. Called by `cupid-build`.
Args:
none
CONFIG_PATH: str, path to configuration file (default config.yml)
Returns:
None
"""

config_path = str(sys.argv[1])


with open(config_path, "r") as fid:
control = yaml.safe_load(fid)

sname = control["data_sources"]["sname"]
run_dir = control["data_sources"]["run_dir"]

subprocess.run(["jupyter-book", "clean" , f"{run_dir}/computed_notebooks/{sname}"])
subprocess.run(["jupyter-book", "build" , f"{run_dir}/computed_notebooks/{sname}", "--all"])
subprocess.run(["jupyter-book", "clean", f"{run_dir}/computed_notebooks/{sname}"])
subprocess.run(
["jupyter-book", "build", f"{run_dir}/computed_notebooks/{sname}", "--all"]
)

### Originally used this code to copy jupyter book HTML to a location to host it online
# Originally used this code to copy jupyter book HTML to a location to host it online

# if 'publish_location' in control:

# user = os.environ.get('USER')
# remote_mach = control["publish_location"]["remote_mach"]
# remote_dir = control["publish_location"]["remote_dir"]
# this seems more complicated than expected...people have mentioned paramiko library?
# subprocess.run(["mkdir", "-p", remote_dir])
# subprocess.run(["scp", "-r", f"{run_dir}/computed_notebooks/{sname}/_build/html/*", f"{user}@{remote_mach}:{remote_dir}"])

return None
# if 'publish_location' in control:

# user = os.environ.get('USER')
# remote_mach = control["publish_location"]["remote_mach"]
# remote_dir = control["publish_location"]["remote_dir"]
# this seems more complicated than expected...people have mentioned paramiko library?
# subprocess.run(["mkdir", "-p", remote_dir])
# subprocess.run(["scp", "-r", f"{run_dir}/computed_notebooks/{sname}/_build/html/*",
# f"{user}@{remote_mach}:{remote_dir}"])

return None
66 changes: 43 additions & 23 deletions cupid/clear.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,56 @@
#!/usr/bin/env python
"""
This script provides functionality to clear the contents of the 'computed_notebooks' folder
at the location specified by the 'run_dir' variable in the CONFIG_PATH.
The main function `clear()` takes the path to the configuration file as input, reads the config file
to obtain the 'run_dir' variable, and then deletes the contents of the 'computed_notebooks' folder
at that location.
"""

import os
import shutil
import click
import cupid.util
import shutil

def readConfigFile(config_path):
#Given the file path to config.yml, this function reads the config file content and
#returns the val of the run_dir string with '/computed_notebooks' appended to it

#Obtain the contents of the config.yml file and extract the run_dir variable

def read_config_file(config_path):
"""
Given the file path to the configuration file, this function reads the config file content and
returns the val of the run_dir string with '/computed_notebooks' appended to it
Args:
CONFIG_PATH: str, path to configuration file (default config.yml)
Returns:
None
"""
# Obtain the contents of the configuration file and extract the run_dir variable
control = cupid.util.get_control_dict(config_path)
run_dir = control['data_sources'].get('run_dir', None)
run_dir = control["data_sources"].get("run_dir", None)

if run_dir:
#Append '/computed_notebooks' to the run_dir value if it is not empty
fullPath = os.path.join(run_dir, 'computed_notebooks')
return fullPath

else: #run_dir is empty/wasn't found in config file so return error
raise ValueError("'run_dir' was empty/not found in the config file.")
# Append '/computed_notebooks' to the run_dir value if it is not empty
full_path = os.path.join(run_dir, "computed_notebooks")
return full_path

# else run_dir is empty/wasn't found in config file so return error
raise ValueError("'run_dir' was empty/not found in the config file.")


@click.command()
@click.argument('config_path')
#Entry point to this script
@click.argument("config_path", default="config.yml")
# Entry point to this script
def clear(config_path):
"""Clears the contents of the 'computed_notebooks' folder at the location specified by the 'run_dir' variable in the 'config.yml' file.
Args: config_path - The path to the config.yml file.
"""Clears the contents of the 'computed_notebooks' folder at the location
specified by the 'run_dir' variable in the CONFIG_PATH.
Args: CONFIG_PATH - The path to the configuration file.
"""
run_dir = readConfigFile(config_path)
#Delete the 'computed_notebooks' folder and all the contents inside of it

run_dir = read_config_file(config_path)
# Delete the 'computed_notebooks' folder and all the contents inside of it
shutil.rmtree(run_dir)
print(f"All contents in {run_dir} have been cleared.")
print(f"All contents in {run_dir} have been cleared.")
5 changes: 3 additions & 2 deletions cupid/quickstart.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
### To be created: a script (maybe called through a command line entry point) that sets up a directory with a config.yml file and
### basics necessary to set up a notebook collection
### To be created: a script, maybe called through a command line entry point,
### that sets up a directory with a config.yml file and
### basics necessary to set up a notebook collection
38 changes: 25 additions & 13 deletions cupid/read.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,44 @@
"""
This module provides functions for reading YAML files and working with intake catalogs.
Functions:
- read_yaml(path_to_yaml): Read a YAML file and return its content as a dictionary.
- get_collection(path_to_catalog, **kwargs): Get a collection of datasets from an
intake catalog based on specified criteria.
"""

import intake
import yaml


def read_yaml(path_to_yaml):
with open(path_to_yaml) as f:
data = yaml.load(f, Loader=yaml.FullLoader)
"""Read yaml file and return data from loaded yaml file"""
with open(path_to_yaml) as file:
data = yaml.load(file, Loader=yaml.FullLoader)
return data


def get_collection(path_to_catalog, **kwargs):
"""Get collection of datasets from intake catalog"""
cat = intake.open_esm_datastore(path_to_catalog)
### note that the json file points to the csv, so the path that the
### yaml file contains doesn't actually get used. this can cause issues

cat_subset = cat.search(**kwargs)

if "variable" in kwargs.keys():

# pylint: disable=invalid-name
def preprocess(ds):
## the double brackets return a Dataset rather than a DataArray
## this is fragile and could cause issues, i'm not totally sure what subsetting on time_bound does
return ds[[kwargs["variable"], 'time_bound']]
## this is fragile and could cause issues, not sure what subsetting on time_bound does
return ds[[kwargs["variable"], "time_bound"]]

## not sure what the chunking kwarg is doing here either
dsets = cat_subset.to_dataset_dict(xarray_open_kwargs={'chunks': {'time': -1}}, preprocess=preprocess)

dsets = cat_subset.to_dataset_dict(
xarray_open_kwargs={"chunks": {"time": -1}}, preprocess=preprocess
)

else:
dsets = cat_subset.to_dataset_dict()

return dsets


return dsets
Loading

0 comments on commit 7134f3a

Please sign in to comment.