Improved installation process

rmshkv · Apr 24, 2023 · 9d42e09 · 9d42e09
1 parent 939b768
commit 9d42e09
Show file tree

Hide file tree

Showing 6 changed files with 45 additions and 117 deletions.
diff --git a/README.md b/README.md
@@ -18,19 +18,14 @@ This is a package to enable running notebook-based diagnostic workflows. Based o
 
 1. Clone this repo
 
-2. Create two environments:
-```
-mamba env create -f environment1.yml
-mamba env create -f environment2.yml
-```
-
-(It's also possible to use conda with the same command, but mamba is a lot faster.)
-
-3. Activate the environment you want to install `nbscuid` in (not one of the two created above). Within the cloned `nbscuid` directory, run:
-```
-pip install .
-```
-
+2. Activate the environment you want to install `nbscuid` in. Within the cloned `nbscuid` directory, run:
+    ```
+    pip install .
+    ```
+    Alternatively, to install the commands `nbscuid-run` and `nbscuid-build` without installing all of nbscuid's dependencies, first install `pipx` with `pip install pipx`, then run:
+    ```
+    pipx install .
+    ```
 
 ## Running a notebook collection
 
@@ -50,3 +45,4 @@ nbscuid-build path/to/config.yml
 ```
 
 
+
diff --git a/environment1.yml b/environment1.yml
diff --git a/environment2.yml b/environment2.yml
diff --git a/nbscuid/build.py b/nbscuid/build.py
@@ -5,9 +5,8 @@
 import os
 import yaml
 
-if __name__ == '__main__':
-
-    # is it possible to carry this over from the previous call to run.py?
+def build():
+
     config_path = str(sys.argv[1])
 
     with open(config_path, "r") as fid:
@@ -19,12 +18,14 @@
     subprocess.run(["jupyter-book", "clean" , f"{run_dir}/computed_notebooks/{casename}"])
     subprocess.run(["jupyter-book",  "build" , f"{run_dir}/computed_notebooks/{casename}",  "--all"])
 
-    if 'publish_location' in control:
+#     if 'publish_location' in control:
 
-        user = os.environ.get('USER')
-        remote_mach = control["publish_location"]["remote_mach"]
-        remote_dir = control["publish_location"]["remote_dir"]
+#         user = os.environ.get('USER')
+#         remote_mach = control["publish_location"]["remote_mach"]
+#         remote_dir = control["publish_location"]["remote_dir"]
 # this seems more complicated than expected...people have mentioned paramiko library?
         # subprocess.run(["mkdir", "-p", remote_dir])
         # subprocess.run(["scp", "-r", f"{run_dir}/computed_notebooks/{casename}/_build/html/*", f"{user}@{remote_mach}:{remote_dir}"])
+
+    return None
 
diff --git a/nbscuid/run.py b/nbscuid/run.py
@@ -4,20 +4,20 @@
 from glob import glob
 import papermill as pm
 import intake
-import util
-import cache
+import nbscuid.util
+import nbscuid.cache
 import sys
 
-if __name__ == '__main__':
+def run():
 
     # Get control structure
     config_path = str(sys.argv[1])
-    control = util.get_control_dict(config_path)
-    util.setup_book(config_path)
+    control = nbscuid.util.get_control_dict(config_path)
+    nbscuid.util.setup_book(config_path)
 
     # Cluster management 
     # Notebooks are configured to connect to this cluster    
-    cluster = util.get_Cluster(account=control['account'])
+    cluster = nbscuid.util.get_Cluster(account=control['account'])
     cluster.scale(32) # Should this be user modifiable?
 
     # Grab paths
@@ -110,7 +110,7 @@
             )
 
 
-            result_df = cache.gen_df_query(cache_metadata_path, input_path, 
+            result_df = nbscuid.cache.gen_df_query(cache_metadata_path, input_path, 
                                    full_cat_path, first_subset=first_subset_kwargs, 
                                                  second_subset=subset_kwargs,
                                    params=parms)
@@ -125,7 +125,7 @@
 
                 nb_api = pm.inspect_notebook(input_path)
 
-                asset_path = cache.make_filename(cache_data_path, input_path, full_cat_path) + ".nc"
+                asset_path = nbscuid.cache.make_filename(cache_data_path, input_path, full_cat_path) + ".nc"
 
                 if nb_api:
                     parms_in = dict(**default_params)
@@ -150,7 +150,7 @@
                     cwd=nb_path_root
                 )
 
-                cache.make_sidecar_entry(cache_metadata_path, 
+                nbscuid.cache.make_sidecar_entry(cache_metadata_path, 
                                                input_path, 
                                                full_cat_path, 
                                                asset_path=asset_path, 
@@ -165,7 +165,7 @@
 
     for nb, info in regular_nbs.items():
 
-        util.run_notebook(nb, info, cluster, cat_path, nb_path_root, output_dir)
+        nbscuid.util.run_notebook(nb, info, cluster, cat_path, nb_path_root, output_dir)
 
     # Calculating notebooks with dependencies
 
@@ -174,8 +174,10 @@
         ### getting necessary asset:
         dependent_asset_path = precompute_nbs[info['dependency']]["asset_path"]
 
-        util.run_notebook(nb, info, cluster, cat_path, nb_path_root, output_dir, dependent_asset_path)
+        nbscuid.util.run_notebook(nb, info, cluster, cat_path, nb_path_root, output_dir, dependent_asset_path)
 
     # Closing cluster
     cluster.close()
+
+    return None
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,11 +15,24 @@ classifiers = [
     "Programming Language :: Python :: 3",
     "License :: OSI Approved :: MIT License"
 ]
+dependencies = [
+    "black",
+    "dask",
+    "dask-jobqueue",
+    "intake",
+    "intake-esm",
+    "jinja2",
+    "jupyter-book",
+    "pandas",
+    "papermill",
+    "xarray",
+    "pyyaml"
+]
 
 [project.urls]
 source = "https://github.com/rmshkv/nbscuid"
 
 
 [project.scripts]
-nbscuid-run = "nbscuid.run_wrapper:run"
-nbscuid-build = "nbscuid.build_wrapper:build"
+nbscuid-run = "nbscuid.run:run"
+nbscuid-build = "nbscuid.build:build"