From 9d42e09094cd1c55b451ab3adba7eeea07e42be3 Mon Sep 17 00:00:00 2001 From: rmshkv Date: Mon, 24 Apr 2023 17:09:53 -0600 Subject: [PATCH] Improved installation process --- README.md | 22 +++++++++------------- environment1.yml | 42 ------------------------------------------ environment2.yml | 42 ------------------------------------------ nbscuid/build.py | 15 ++++++++------- nbscuid/run.py | 24 +++++++++++++----------- pyproject.toml | 17 +++++++++++++++-- 6 files changed, 45 insertions(+), 117 deletions(-) delete mode 100644 environment1.yml delete mode 100644 environment2.yml diff --git a/README.md b/README.md index 706d46d..5035482 100644 --- a/README.md +++ b/README.md @@ -18,19 +18,14 @@ This is a package to enable running notebook-based diagnostic workflows. Based o 1. Clone this repo -2. Create two environments: -``` -mamba env create -f environment1.yml -mamba env create -f environment2.yml -``` - -(It's also possible to use conda with the same command, but mamba is a lot faster.) - -3. Activate the environment you want to install `nbscuid` in (not one of the two created above). Within the cloned `nbscuid` directory, run: -``` -pip install . -``` - +2. Activate the environment you want to install `nbscuid` in. Within the cloned `nbscuid` directory, run: + ``` + pip install . + ``` + Alternatively, to install the commands `nbscuid-run` and `nbscuid-build` without installing all of nbscuid's dependencies, first install `pipx` with `pip install pipx`, then run: + ``` + pipx install . + ``` ## Running a notebook collection @@ -50,3 +45,4 @@ nbscuid-build path/to/config.yml ``` + diff --git a/environment1.yml b/environment1.yml deleted file mode 100644 index b7feb25..0000000 --- a/environment1.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: nbscuid-papermill -channels: - - conda-forge - - nodefaults -dependencies: - - dask - - intake - - intake-esm - - jinja2 - - jupyter - - jupyter-server-proxy - - jupyter_client==7.3.4 - - jupyter_contrib_nbextensions - - jupyterlab - - nb_conda_kernels - - nbformat - - nbsphinx - - nc-time-axis - - ncar-jobqueue - - netcdf4 - - numba - - numpy - - numpydoc - - pandas - - papermill - - pint - - pip - - pre-commit - - pytest - - pytest-cov - - python==3.9 - - scipy - - statsmodels - - tqdm - - watermark - - xarray - - xcollection - - xesmf - - xgcm - - xhistogram - - xrft - - zarr diff --git a/environment2.yml b/environment2.yml deleted file mode 100644 index 52f9e41..0000000 --- a/environment2.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: nbscuid-jupyter-book -channels: - - conda-forge - - nodefaults -dependencies: - - dask - - intake - - intake-esm - - jinja2 - - jupyter - - jupyter-book - - jupyter-server-proxy - - jupyter_client==7.3.4 - - jupyter_contrib_nbextensions - - jupyterlab - - nb_conda_kernels - - nbformat - - nbsphinx - - nc-time-axis - - ncar-jobqueue - - netcdf4 - - numba - - numpy - - numpydoc - - pandas - - pint - - pip - - pre-commit - - pytest - - pytest-cov - - python==3.9 - - scipy - - statsmodels - - tqdm - - watermark - - xarray - - xcollection - - xesmf - - xgcm - - xhistogram - - xrft - - zarr diff --git a/nbscuid/build.py b/nbscuid/build.py index b583dfe..c9e7881 100755 --- a/nbscuid/build.py +++ b/nbscuid/build.py @@ -5,9 +5,8 @@ import os import yaml -if __name__ == '__main__': - - # is it possible to carry this over from the previous call to run.py? +def build(): + config_path = str(sys.argv[1]) with open(config_path, "r") as fid: @@ -19,12 +18,14 @@ subprocess.run(["jupyter-book", "clean" , f"{run_dir}/computed_notebooks/{casename}"]) subprocess.run(["jupyter-book", "build" , f"{run_dir}/computed_notebooks/{casename}", "--all"]) - if 'publish_location' in control: +# if 'publish_location' in control: - user = os.environ.get('USER') - remote_mach = control["publish_location"]["remote_mach"] - remote_dir = control["publish_location"]["remote_dir"] +# user = os.environ.get('USER') +# remote_mach = control["publish_location"]["remote_mach"] +# remote_dir = control["publish_location"]["remote_dir"] # this seems more complicated than expected...people have mentioned paramiko library? # subprocess.run(["mkdir", "-p", remote_dir]) # subprocess.run(["scp", "-r", f"{run_dir}/computed_notebooks/{casename}/_build/html/*", f"{user}@{remote_mach}:{remote_dir}"]) + + return None diff --git a/nbscuid/run.py b/nbscuid/run.py index 827bd75..de73feb 100755 --- a/nbscuid/run.py +++ b/nbscuid/run.py @@ -4,20 +4,20 @@ from glob import glob import papermill as pm import intake -import util -import cache +import nbscuid.util +import nbscuid.cache import sys -if __name__ == '__main__': +def run(): # Get control structure config_path = str(sys.argv[1]) - control = util.get_control_dict(config_path) - util.setup_book(config_path) + control = nbscuid.util.get_control_dict(config_path) + nbscuid.util.setup_book(config_path) # Cluster management # Notebooks are configured to connect to this cluster - cluster = util.get_Cluster(account=control['account']) + cluster = nbscuid.util.get_Cluster(account=control['account']) cluster.scale(32) # Should this be user modifiable? # Grab paths @@ -110,7 +110,7 @@ ) - result_df = cache.gen_df_query(cache_metadata_path, input_path, + result_df = nbscuid.cache.gen_df_query(cache_metadata_path, input_path, full_cat_path, first_subset=first_subset_kwargs, second_subset=subset_kwargs, params=parms) @@ -125,7 +125,7 @@ nb_api = pm.inspect_notebook(input_path) - asset_path = cache.make_filename(cache_data_path, input_path, full_cat_path) + ".nc" + asset_path = nbscuid.cache.make_filename(cache_data_path, input_path, full_cat_path) + ".nc" if nb_api: parms_in = dict(**default_params) @@ -150,7 +150,7 @@ cwd=nb_path_root ) - cache.make_sidecar_entry(cache_metadata_path, + nbscuid.cache.make_sidecar_entry(cache_metadata_path, input_path, full_cat_path, asset_path=asset_path, @@ -165,7 +165,7 @@ for nb, info in regular_nbs.items(): - util.run_notebook(nb, info, cluster, cat_path, nb_path_root, output_dir) + nbscuid.util.run_notebook(nb, info, cluster, cat_path, nb_path_root, output_dir) # Calculating notebooks with dependencies @@ -174,8 +174,10 @@ ### getting necessary asset: dependent_asset_path = precompute_nbs[info['dependency']]["asset_path"] - util.run_notebook(nb, info, cluster, cat_path, nb_path_root, output_dir, dependent_asset_path) + nbscuid.util.run_notebook(nb, info, cluster, cat_path, nb_path_root, output_dir, dependent_asset_path) # Closing cluster cluster.close() + + return None \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b793828..3a3e51a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,11 +15,24 @@ classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License" ] +dependencies = [ + "black", + "dask", + "dask-jobqueue", + "intake", + "intake-esm", + "jinja2", + "jupyter-book", + "pandas", + "papermill", + "xarray", + "pyyaml" +] [project.urls] source = "https://github.com/rmshkv/nbscuid" [project.scripts] -nbscuid-run = "nbscuid.run_wrapper:run" -nbscuid-build = "nbscuid.build_wrapper:build" \ No newline at end of file +nbscuid-run = "nbscuid.run:run" +nbscuid-build = "nbscuid.build:build"