From e1a5783fad5b9e1381bea37435b87c5d9e61f71c Mon Sep 17 00:00:00 2001
From: Matt Drozt <drozt@hpe.com>
Date: Thu, 14 Sep 2023 16:50:43 -0700
Subject: [PATCH] Torch install back to build (#365)

Move torch install back to smart build, revert CI
build to use `[ml]`, update docs with proper build steps

[ committed by @MattToast ]
[ reviewed by @al-rigazzi @ashao @mellis13 ]
---
 .github/workflows/run_tests.yml               |   2 +-
 doc/changelog.rst                             |   3 +-
 doc/installation_instructions/basic.rst       |  15 +-
 .../site-install.rst                          |   3 +-
 smartsim/_core/_cli/build.py                  | 108 +++++++----
 smartsim/_core/_cli/utils.py                  |  13 ++
 smartsim/_core/_cli/validate.py               |   2 +-
 smartsim/_core/_install/buildenv.py           |  64 +------
 .../ml_inference/Inference-in-SmartSim.ipynb  | 181 +++++++++---------
 9 files changed, 189 insertions(+), 202 deletions(-)

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 6c09e5724..09b680374 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -108,7 +108,7 @@ jobs:
       - name: Install SmartSim (with ML backends)
         run: |
           python -m pip install git+https://github.com/CrayLabs/SmartRedis.git@develop#egg=smartredis
-          python -m pip install .[dev,ml-cpu]
+          python -m pip install .[dev,ml]
 
       - name: Install ML Runtimes with Smart (with pt, tf, and onnx support)
         if: (matrix.py_v != '3.10')
diff --git a/doc/changelog.rst b/doc/changelog.rst
index c4ee55d81..375e1f17e 100644
--- a/doc/changelog.rst
+++ b/doc/changelog.rst
@@ -15,7 +15,7 @@ SmartSim
 0.5.1
 -----
 
-Released on 13 September, 2023
+Released on 14 September, 2023
 
 Description
 
@@ -45,7 +45,6 @@ Detailed Notes
 - Create public properties where appropriate to mitigate `protected-access` errors. (PR341_)
 - Fix a failure to execute `_prep_colocated_db` due to incorrect named attr check. (PR339_)
 - Enabled and mitigated mypy `disallow_any_generics` and `warn_return_any`. (PR338_)
-- Move installation of all optional SmartSim Python ML dependencies to `pip install` time. (PR336_)
 - Add a `smart validate` target to provide a simple smoke test to assess a SmartSim build. (PR336_, PR351_)
 - Add typehints to `smartsim._core.launcher.step.*`. (PR334_)
 - Log errors reported from slurm WLM when attempts to retrieve status fail. (PR331_, PR332_)
diff --git a/doc/installation_instructions/basic.rst b/doc/installation_instructions/basic.rst
index f65d76dac..f18cc3ec6 100644
--- a/doc/installation_instructions/basic.rst
+++ b/doc/installation_instructions/basic.rst
@@ -160,15 +160,15 @@ and install SmartSim from PyPI with the following command:
 
 If you would like SmartSim to also install python machine learning libraries
 that can be used outside SmartSim to build SmartSim-compatible models, you
-can request their installation through the ``ml-*`` optional dependencies,
+can request their installation through the ``[ml]`` optional dependencies,
 as follows:
 
 .. code-block:: bash
 
-    # For CPU based models
-    pip install smartsim[ml-cpu]
-    # For CPU and CUDA based models
-    pip install smartsim[ml-cuda]
+    # For bash
+    pip install smartsim[ml]
+    # For zsh
+    pip install smartsim\[ml\]
 
 At this point, SmartSim is installed and can be used for more basic features.
 If you want to use the machine learning features of SmartSim, you will need
@@ -287,9 +287,8 @@ source remains at the site of the clone instead of in site-packages.
 .. code-block:: bash
 
   cd smartsim
-  pip install -e .[dev,ml-cpu]   # for CPU only
-  # OR
-  pip install -e .[dev,ml-cuda]  # for CUDA support
+  pip install -e .[dev,ml]    # for bash users
+  pip install -e .\[dev,ml\]  # for zsh users
 
 Use the now installed ``smart`` cli to install the machine learning runtimes.
 
diff --git a/doc/installation_instructions/site-install.rst b/doc/installation_instructions/site-install.rst
index 1a0db3d09..26ecd6c13 100644
--- a/doc/installation_instructions/site-install.rst
+++ b/doc/installation_instructions/site-install.rst
@@ -11,4 +11,5 @@ from source with the following steps replacing ``COMPILER_VERSION`` and
 
     module use -a /lus/scratch/smartsim/local/modulefiles
     module load cudatoolkit/11.8 cudnn smartsim-deps/COMPILER_VERSION/SMARTSIM_VERSION
-    pip install smartsim[ml-cuda]
+    pip install smartsim[ml]
+    smart build --only_python_packages --device gpu [--onnx]
diff --git a/smartsim/_core/_cli/build.py b/smartsim/_core/_cli/build.py
index 092fb8f8d..4218d79db 100644
--- a/smartsim/_core/_cli/build.py
+++ b/smartsim/_core/_cli/build.py
@@ -32,15 +32,15 @@
 
 from tabulate import tabulate
 
-from smartsim._core._cli.utils import color_bool, SMART_LOGGER_FORMAT
+from smartsim._core._cli.utils import SMART_LOGGER_FORMAT, color_bool, pip
 from smartsim._core._install import builder
 from smartsim._core._install.buildenv import (
     BuildEnv,
+    DbEngine,
     SetupError,
     Version_,
-    Versioner,
-    DbEngine,
     VersionConflictError,
+    Versioner,
 )
 from smartsim._core._install.builder import BuildError
 from smartsim._core.config import CONFIG
@@ -244,16 +244,34 @@ def check_py_torch_version(versions: Versioner, device: _TDeviceStr = "cpu") ->
         else:
             raise BuildError("Unrecognized device requested")
 
-    _check_packages_in_python_env(
-        {
-            "torch": Version_(f"{versions.TORCH}{device_suffix}"),
-            "torchvision": Version_(f"{versions.TORCHVISION}{device_suffix}"),
-        },
+    torch_deps = {
+        "torch": Version_(f"{versions.TORCH}{device_suffix}"),
+        "torchvision": Version_(f"{versions.TORCHVISION}{device_suffix}"),
+    }
+    missing, conflicts = _assess_python_env(
+        torch_deps,
+        package_pinning="==",
         validate_installed_version=_create_torch_version_validator(
             with_suffix=device_suffix
         ),
     )
 
+    if len(missing) == len(torch_deps) and not conflicts:
+        # All PyTorch deps are not installed and there are no conflicting
+        # python packages. We can try to install torch deps into the current env.
+        logger.info(
+            "Torch version not found in python environment. "
+            "Attempting to install via `pip`"
+        )
+        pip(
+            "install",
+            "-f",
+            "https://download.pytorch.org/whl/torch_stable.html",
+            *(f"{package}=={version}" for package, version in torch_deps.items()),
+        )
+    elif missing or conflicts:
+        logger.warning(_format_incompatible_python_env_message(missing, conflicts))
+
 
 def _create_torch_version_validator(
     with_suffix: str,
@@ -297,20 +315,7 @@ def _check_packages_in_python_env(
     )
 
     if missing or conflicts:
-        indent = "\n\t"
-        fmt_list: t.Callable[[str, t.List[str]], str] = (
-            lambda n, l: f"{n}:{indent}{indent.join(l)}" if l else ""
-        )
-        missing_str = fmt_list("Missing", missing)
-        conflict_str = fmt_list("Conflicting", conflicts)
-        sep = "\n" if missing_str and conflict_str else ""
-        logger.warning(
-            "Python Env Status Warning!\n"
-            "Requested Packages are Missing or Conflicting:\n\n"
-            f"{missing_str}{sep}{conflict_str}"
-            "\n\nConsider installing packages at the requested versions via "
-            "`pip` or installing SmartSim with optional ML dependencies"
-        )
+        logger.warning(_format_incompatible_python_env_message(missing, conflicts))
 
 
 def _assess_python_env(
@@ -334,6 +339,26 @@ def _assess_python_env(
     return missing, conflicts
 
 
+def _format_incompatible_python_env_message(
+    missing: t.Iterable[str], conflicting: t.Iterable[str]
+) -> str:
+    indent = "\n\t"
+    fmt_list: t.Callable[[str, t.Iterable[str]], str] = (
+        lambda n, l: f"{n}:{indent}{indent.join(l)}" if l else ""
+    )
+    missing_str = fmt_list("Missing", missing)
+    conflict_str = fmt_list("Conflicting", conflicting)
+    sep = "\n" if missing_str and conflict_str else ""
+    return (
+        "Python Env Status Warning!\n"
+        "Requested Packages are Missing or Conflicting:\n\n"
+        f"{missing_str}{sep}{conflict_str}\n\n"
+        "Consider installing packages at the requested versions via `pip` or "
+        "uninstalling them, installing SmartSim with optional ML dependencies "
+        "(`pip install smartsim[ml]`), and running `smart clean && smart build ...`"
+    )
+
+
 def execute(args: argparse.Namespace) -> int:
     verbose = args.v
     keydb = args.keydb
@@ -376,21 +401,22 @@ def execute(args: argparse.Namespace) -> int:
         print(tabulate(vers, headers=version_names, tablefmt="github"), "\n")
 
     try:
-        # REDIS/KeyDB
-        build_database(build_env, versions, keydb, verbose)
-
-        # REDISAI
-        build_redis_ai(
-            build_env,
-            versions,
-            device,
-            pt,
-            tf,
-            onnx,
-            args.torch_dir,
-            args.libtensorflow_dir,
-            verbose=verbose,
-        )
+        if not args.only_python_packages:
+            # REDIS/KeyDB
+            build_database(build_env, versions, keydb, verbose)
+
+            # REDISAI
+            build_redis_ai(
+                build_env,
+                versions,
+                device,
+                pt,
+                tf,
+                onnx,
+                args.torch_dir,
+                args.libtensorflow_dir,
+                verbose=verbose,
+            )
     except (SetupError, BuildError) as e:
         logger.error(str(e))
         return 1
@@ -406,7 +432,7 @@ def execute(args: argparse.Namespace) -> int:
             check_py_tf_version(versions)
         if "onnxruntime" in backends:
             check_py_onnx_version(versions)
-    except SetupError as e:
+    except (SetupError, BuildError) as e:
         logger.error(str(e))
         return 1
 
@@ -430,6 +456,12 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
         choices=["cpu", "gpu"],
         help="Device to build ML runtimes for",
     )
+    parser.add_argument(
+        "--only_python_packages",
+        action="store_true",
+        default=False,
+        help="Only evaluate the python packages (i.e. skip building backends)",
+    )
     parser.add_argument(
         "--no_pt",
         action="store_true",
diff --git a/smartsim/_core/_cli/utils.py b/smartsim/_core/_cli/utils.py
index 21fba623a..d8fd4b68c 100644
--- a/smartsim/_core/_cli/utils.py
+++ b/smartsim/_core/_cli/utils.py
@@ -26,11 +26,14 @@
 
 import importlib
 import shutil
+import subprocess as sp
+import sys
 import typing as t
 from argparse import ArgumentParser, Namespace
 from pathlib import Path
 
 from smartsim._core._install.buildenv import SetupError
+from smartsim._core._install.builder import BuildError
 from smartsim._core.utils import colorize
 from smartsim.log import get_logger
 
@@ -60,6 +63,16 @@ def color_bool(trigger: bool = True) -> str:
     return colorize(str(trigger), color=_color)
 
 
+def pip(*args: str) -> None:
+    cmd = (sys.executable, "-m", "pip") + args
+    with sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) as proc:
+        _, err = proc.communicate()
+        if int(proc.returncode) != 0:
+            raise BuildError(
+                f"`pip` returned with a non-zero exit code:\n{err.decode('utf-8')}"
+            )
+
+
 def clean(core_path: Path, _all: bool = False) -> int:
     """Remove pre existing installations of ML runtimes
 
diff --git a/smartsim/_core/_cli/validate.py b/smartsim/_core/_cli/validate.py
index 310130ed4..df1d331e0 100644
--- a/smartsim/_core/_cli/validate.py
+++ b/smartsim/_core/_cli/validate.py
@@ -194,7 +194,7 @@ def _test_tf_install(client: Client, tmp_dir: str, device: _TCapitalDeviceStr) -
         model_path, inputs, outputs = recv_conn.recv()
     except EOFError as e:
         raise Exception(
-            "Failed to recieve serialized model from subprocess. "
+            "Failed to receive serialized model from subprocess. "
             "Is the `tensorflow` python package installed?"
         ) from e
 
diff --git a/smartsim/_core/_install/buildenv.py b/smartsim/_core/_install/buildenv.py
index 49d35272b..c6a050b4d 100644
--- a/smartsim/_core/_install/buildenv.py
+++ b/smartsim/_core/_install/buildenv.py
@@ -26,7 +26,6 @@
 
 # pylint: disable=invalid-name
 
-import itertools
 import importlib.metadata
 import os
 import platform
@@ -262,30 +261,6 @@ def get_defaults(self) -> t.Dict[str, str]:
         return self.defaults[self.version].copy()
 
 
-def _format_linux_torch_py_package_req(
-    arch: str, python_version: str, torch_version: str
-) -> str:
-    pyv_no_dot = python_version.replace(".", "")
-    return (
-        "torch"
-        # pylint: disable-next=line-too-long
-        f"  @ https://download.pytorch.org/whl/{arch}/torch-{torch_version}%2B{arch}-cp{pyv_no_dot}-cp{pyv_no_dot}-linux_x86_64.whl"
-        f'  ; python_version == "{python_version}" and sys_platform != "darwin"'
-    )
-
-
-def _format_linux_torchvision_py_package_req(
-    arch: str, python_version: str, torchvision_version: str
-) -> str:
-    pyv_no_dot = python_version.replace(".", "")
-    return (
-        "torchvision"
-        # pylint: disable-next=line-too-long
-        f"  @ https://download.pytorch.org/whl/{arch}/torchvision-{torchvision_version}%2B{arch}-cp{pyv_no_dot}-cp{pyv_no_dot}-linux_x86_64.whl"
-        f'  ; python_version == "{python_version}" and sys_platform != "darwin"'
-    )
-
-
 class Versioner:
     """Versioner is responsible for managing all the versions
     within SmartSim including SmartSim itself.
@@ -376,26 +351,8 @@ def ml_extras_required(self) -> t.Dict[str, t.List[str]]:
         """
         ml_defaults = self.REDISAI.get_defaults()
 
-        def _format_custom_linux_torch_deps(
-            torchv: str, torchvisionv: str, arch: str
-        ) -> t.Tuple[str, ...]:
-            # The correct versions and suffixes were scraped from
-            # https://pytorch.org/get-started/previous-versions/
-            supported_py_versions = ("3.8", "3.9", "3.10")
-            return tuple(
-                itertools.chain.from_iterable(
-                    (
-                        _format_linux_torch_py_package_req(arch, pyv, torchv),
-                        _format_linux_torchvision_py_package_req(
-                            arch, pyv, torchvisionv
-                        ),
-                    )
-                    for pyv in supported_py_versions
-                )
-            )
-
         # remove torch-related fields as they are subject to change
-        # by having the user set env vars
+        # by having the user change hardware (cpu/gpu)
         _torch_fields = [
             "torch",
             "torchvision",
@@ -405,25 +362,8 @@ def _format_custom_linux_torch_deps(
         for field in _torch_fields:
             ml_defaults.pop(field)
 
-        common = tuple(f"{lib}=={vers}" for lib, vers in ml_defaults.items())
         return {
-            "ml-cpu": [
-                *common,
-                # osx
-                f'torch=={self.TORCH} ; sys_platform == "darwin"',
-                f'torchvision=={self.TORCHVISION} ; sys_platform == "darwin"',
-                # linux
-                *_format_custom_linux_torch_deps(
-                    self.TORCH, self.TORCHVISION, self.TORCH_CPU_SUFFIX.lstrip("+")
-                ),
-            ],
-            "ml-cuda": [
-                *common,
-                # linux
-                *_format_custom_linux_torch_deps(
-                    self.TORCH, self.TORCHVISION, self.TORCH_CUDA_SUFFIX.lstrip("+")
-                ),
-            ],
+            "ml": [f"{lib}=={vers}" for lib, vers in ml_defaults.items()]
         }
 
     @staticmethod
diff --git a/tutorials/ml_inference/Inference-in-SmartSim.ipynb b/tutorials/ml_inference/Inference-in-SmartSim.ipynb
index 4e1c53d75..384c46d69 100644
--- a/tutorials/ml_inference/Inference-in-SmartSim.ipynb
+++ b/tutorials/ml_inference/Inference-in-SmartSim.ipynb
@@ -68,8 +68,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "usage: smart build [-h] [-v] [--device {cpu,gpu}] [--no_pt] [--no_tf] [--onnx]\n",
-      "                   [--torch_dir TORCH_DIR]\n",
+      "usage: smart build [-h] [-v] [--device {cpu,gpu}] [--only_python_packages]\n",
+      "                   [--no_pt] [--no_tf] [--onnx] [--torch_dir TORCH_DIR]\n",
       "                   [--libtensorflow_dir LIBTENSORFLOW_DIR] [--keydb]\n",
       "\n",
       "Build SmartSim dependencies (Redis, RedisAI, ML runtimes)\n",
@@ -78,6 +78,9 @@
       "  -h, --help            show this help message and exit\n",
       "  -v                    Enable verbose build process\n",
       "  --device {cpu,gpu}    Device to build ML runtimes for\n",
+      "  --only_python_packages\n",
+      "                        Only evaluate the python packages (i.e. skip building\n",
+      "                        backends)\n",
       "  --no_pt               Do not build PyTorch backend\n",
       "  --no_tf               Do not build TensorFlow backend\n",
       "  --onnx                Build ONNX backend (off by default)\n",
@@ -348,48 +351,48 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "SmartRedis Library@16-54-13:WARNING: Environment variable SR_LOG_FILE is not set. Defaulting to stdout\n",
-      "SmartRedis Library@16-54-13:WARNING: Environment variable SR_LOG_LEVEL is not set. Defaulting to INFO\n",
-      "Prediction: [[-2.2478018 -2.395631  -2.2669961 -2.400009  -2.2728784 -2.27591\n",
-      "  -2.2906797 -2.3545692 -2.2261484 -2.3114305]\n",
-      " [-2.2480786 -2.3842168 -2.2628386 -2.403884  -2.2832446 -2.2860255\n",
-      "  -2.2994099 -2.375429  -2.2036932 -2.2975397]\n",
-      " [-2.245448  -2.389964  -2.268176  -2.4029608 -2.280573  -2.2878091\n",
-      "  -2.2937536 -2.3574357 -2.2161047 -2.3001926]\n",
-      " [-2.2418973 -2.3858    -2.2578824 -2.3933666 -2.2973359 -2.2910569\n",
-      "  -2.2984695 -2.3671184 -2.2083244 -2.301574 ]\n",
-      " [-2.2591636 -2.3932264 -2.2583983 -2.3987677 -2.2713947 -2.2846944\n",
-      "  -2.3082988 -2.3607285 -2.2154033 -2.2924812]\n",
-      " [-2.2580602 -2.3792613 -2.272383  -2.4011228 -2.272388  -2.2694292\n",
-      "  -2.3032012 -2.365895  -2.2177567 -2.3020668]\n",
-      " [-2.245951  -2.3920639 -2.2689075 -2.407091  -2.267692  -2.2859006\n",
-      "  -2.3037941 -2.3703804 -2.2084582 -2.29459  ]\n",
-      " [-2.253354  -2.3911812 -2.2643569 -2.4074526 -2.285964  -2.2871246\n",
-      "  -2.2955043 -2.375829  -2.2028425 -2.2816033]\n",
-      " [-2.2477372 -2.380954  -2.2610688 -2.392686  -2.282587  -2.2853549\n",
-      "  -2.296441  -2.3648849 -2.2207007 -2.3085098]\n",
-      " [-2.2393162 -2.3921392 -2.2663367 -2.4068065 -2.2845395 -2.290668\n",
-      "  -2.284157  -2.3662543 -2.2121382 -2.3019514]\n",
-      " [-2.2408657 -2.384463  -2.2651308 -2.4005342 -2.2722073 -2.287395\n",
-      "  -2.3110955 -2.3697157 -2.2028828 -2.3101122]\n",
-      " [-2.2389345 -2.390637  -2.2625332 -2.4023867 -2.2838216 -2.289138\n",
-      "  -2.3047955 -2.3756099 -2.2034433 -2.2940078]\n",
-      " [-2.2369158 -2.3936262 -2.2521482 -2.4033504 -2.2768025 -2.2861629\n",
-      "  -2.316517  -2.3730042 -2.210685  -2.296432 ]\n",
-      " [-2.2388594 -2.3964899 -2.26673   -2.4088354 -2.2696297 -2.287068\n",
-      "  -2.3139803 -2.3763204 -2.1987703 -2.290656 ]\n",
-      " [-2.237903  -2.3903239 -2.2650957 -2.4093916 -2.27383   -2.2950318\n",
-      "  -2.299792  -2.3679557 -2.2050626 -2.3010159]\n",
-      " [-2.2392526 -2.4035661 -2.2502809 -2.4051485 -2.279354  -2.2831054\n",
-      "  -2.3016942 -2.3732626 -2.2127666 -2.297947 ]\n",
-      " [-2.2445319 -2.3949418 -2.2569892 -2.414155  -2.268012  -2.2846045\n",
-      "  -2.3016362 -2.3772373 -2.2142422 -2.289995 ]\n",
-      " [-2.2507832 -2.3926392 -2.2597246 -2.40416   -2.2814076 -2.2869318\n",
-      "  -2.2988138 -2.37261   -2.2073905 -2.29012  ]\n",
-      " [-2.249173  -2.3990614 -2.257255  -2.3995957 -2.2756734 -2.2913516\n",
-      "  -2.3009257 -2.3622444 -2.2111323 -2.2974997]\n",
-      " [-2.2522578 -2.3874125 -2.2524507 -2.411776  -2.2802238 -2.2815466\n",
-      "  -2.2947848 -2.3739164 -2.2161126 -2.2940607]]\n"
+      "SmartRedis Library@23-56-41:WARNING: Environment variable SR_LOG_FILE is not set. Defaulting to stdout\n",
+      "SmartRedis Library@23-56-41:WARNING: Environment variable SR_LOG_LEVEL is not set. Defaulting to INFO\n",
+      "Prediction: [[-2.3274555 -2.3253717 -2.354757  -2.3729622 -2.3431003 -2.1907542\n",
+      "  -2.3514638 -2.1824958 -2.3210742 -2.2772176]\n",
+      " [-2.319342  -2.3146112 -2.370425  -2.372699  -2.3437245 -2.1988375\n",
+      "  -2.354674  -2.1797025 -2.3205185 -2.2724082]\n",
+      " [-2.316474  -2.3222082 -2.354598  -2.3659394 -2.3442194 -2.203955\n",
+      "  -2.3561926 -2.1938426 -2.3158035 -2.2702417]\n",
+      " [-2.3319743 -2.311106  -2.356003  -2.3770962 -2.333499  -2.1953351\n",
+      "  -2.3548756 -2.195049  -2.310809  -2.2787712]\n",
+      " [-2.3205962 -2.3178282 -2.3519592 -2.3816493 -2.3516834 -2.1981795\n",
+      "  -2.3636622 -2.1777525 -2.3139138 -2.2705152]\n",
+      " [-2.3096914 -2.3222034 -2.3647196 -2.3790689 -2.3540542 -2.206103\n",
+      "  -2.350227  -2.1878397 -2.3078933 -2.2638521]\n",
+      " [-2.3328648 -2.3219166 -2.3527567 -2.3824098 -2.3419397 -2.1949291\n",
+      "  -2.3534136 -2.1831408 -2.31838   -2.2653728]\n",
+      " [-2.3125417 -2.324307  -2.3541815 -2.379772  -2.348383  -2.2018006\n",
+      "  -2.3614779 -2.1773078 -2.322288  -2.2653532]\n",
+      " [-2.3261974 -2.3169107 -2.3658333 -2.372918  -2.3417373 -2.1894612\n",
+      "  -2.3535395 -2.2018242 -2.308719  -2.268019 ]\n",
+      " [-2.316616  -2.3056076 -2.355318  -2.3717446 -2.346278  -2.1928883\n",
+      "  -2.3632033 -2.2028553 -2.3090112 -2.2805274]\n",
+      " [-2.3209507 -2.3127859 -2.358682  -2.3774037 -2.3558414 -2.2000623\n",
+      "  -2.3439143 -2.1920927 -2.3196788 -2.2638488]\n",
+      " [-2.3159695 -2.3109243 -2.356306  -2.374135  -2.3412004 -2.1999855\n",
+      "  -2.3728766 -2.1851294 -2.3103416 -2.2791054]\n",
+      " [-2.320004  -2.3205712 -2.3569424 -2.3752837 -2.3463457 -2.1887283\n",
+      "  -2.3645942 -2.1946917 -2.3067377 -2.272361 ]\n",
+      " [-2.310819  -2.3274822 -2.356091  -2.3715394 -2.3474889 -2.200722\n",
+      "  -2.3434677 -2.1957805 -2.3201551 -2.2701602]\n",
+      " [-2.3143158 -2.31956   -2.358585  -2.362682  -2.3464782 -2.196579\n",
+      "  -2.3578608 -2.2015376 -2.3066673 -2.2789493]\n",
+      " [-2.318907  -2.3225117 -2.3634868 -2.3806338 -2.344084  -2.1920872\n",
+      "  -2.3534818 -2.1955805 -2.3039575 -2.2711294]\n",
+      " [-2.3084583 -2.3254113 -2.3642344 -2.3710778 -2.3496058 -2.192245\n",
+      "  -2.3604536 -2.1796546 -2.310007  -2.286219 ]\n",
+      " [-2.3140576 -2.3124697 -2.359347  -2.379842  -2.3481016 -2.1948602\n",
+      "  -2.3681424 -2.1851056 -2.3161757 -2.2693238]\n",
+      " [-2.3162746 -2.3137376 -2.3598473 -2.3751001 -2.3536685 -2.1899457\n",
+      "  -2.3560162 -2.1918488 -2.3077402 -2.2818694]\n",
+      " [-2.3138344 -2.3119657 -2.3552136 -2.3767023 -2.3556495 -2.187487\n",
+      "  -2.3484402 -2.1922355 -2.3236399 -2.2809098]]\n"
      ]
     }
    ],
@@ -406,7 +409,7 @@
     "\n",
     "# get the output\n",
     "output = client.get_tensor(\"output\")\n",
-    "print(f\"Prediction: {output}\")\n"
+    "print(f\"Prediction: {output}\")"
    ]
   },
   {
@@ -465,46 +468,46 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "U: [[[-0.6023696   0.07311916]\n",
-      "  [-0.49197382 -0.8181777 ]\n",
-      "  [-0.62857985  0.5702971 ]]\n",
+      "U: [[[-0.550159    0.8065786 ]\n",
+      "  [-0.52288723 -0.5346357 ]\n",
+      "  [-0.6510868  -0.2521817 ]]\n",
       "\n",
-      " [[-0.36903515  0.92125744]\n",
-      "  [-0.43777385 -0.28891826]\n",
-      "  [-0.8198579  -0.2604056 ]]\n",
+      " [[-0.17983183 -0.20003092]\n",
+      "  [-0.5534476  -0.7888692 ]\n",
+      "  [-0.81323797  0.58109635]]\n",
       "\n",
-      " [[-0.5127858   0.851067  ]\n",
-      "  [-0.56601447 -0.43397826]\n",
-      "  [-0.64550614 -0.2955465 ]]\n",
+      " [[-0.20800859  0.42269117]\n",
+      "  [-0.65485084 -0.7300564 ]\n",
+      "  [-0.7265692   0.53698224]]\n",
       "\n",
-      " [[-0.6031817   0.7681344 ]\n",
-      "  [-0.735206   -0.63987494]\n",
-      "  [-0.3092629   0.02300668]]\n",
+      " [[-0.336111    0.77894354]\n",
+      "  [-0.31149226  0.43854192]\n",
+      "  [-0.8888205  -0.44825   ]]\n",
       "\n",
-      " [[-0.6514613   0.26648495]\n",
-      "  [-0.4583032  -0.88437456]\n",
-      "  [-0.6046124   0.3832325 ]]]\n",
+      " [[-0.6365824   0.7635661 ]\n",
+      "  [-0.2663487  -0.08588188]\n",
+      "  [-0.723755   -0.639993  ]]]\n",
       "\n",
-      ", S: [[111.77634   46.54085 ]\n",
-      " [114.562614  78.1435  ]\n",
-      " [186.19078   33.83777 ]\n",
-      " [132.62187   57.779224]\n",
-      " [145.16234   20.540234]]\n",
+      ", S: [[137.34267   54.616768]\n",
+      " [142.89323   35.937744]\n",
+      " [ 90.98083   48.821   ]\n",
+      " [ 86.74378   31.835794]\n",
+      " [146.14839   36.327038]]\n",
       "\n",
-      ", V: [[[-0.91532665  0.40271226]\n",
-      "  [-0.40271226 -0.91532665]]\n",
+      ", V: [[[-0.48165366  0.8763617 ]\n",
+      "  [-0.8763617  -0.48165366]]\n",
       "\n",
-      " [[-0.9118641  -0.4104923 ]\n",
-      "  [-0.4104923   0.9118641 ]]\n",
+      " [[-0.47905296  0.8777859 ]\n",
+      "  [-0.8777859  -0.47905296]]\n",
       "\n",
-      " [[-0.8434532   0.53720254]\n",
-      "  [-0.53720254 -0.8434532 ]]\n",
+      " [[-0.737007   -0.67588514]\n",
+      "  [-0.67588514  0.737007  ]]\n",
       "\n",
-      " [[-0.70564145 -0.70856905]\n",
-      "  [-0.70856905  0.70564145]]\n",
+      " [[-0.28137407  0.9595981 ]\n",
+      "  [-0.9595981  -0.28137407]]\n",
       "\n",
-      " [[-0.9691591   0.24643579]\n",
-      "  [-0.24643579 -0.9691591 ]]]\n",
+      " [[-0.5767642  -0.8169106 ]\n",
+      "  [-0.8169106   0.5767642 ]]]\n",
       "\n"
      ]
     }
@@ -521,7 +524,7 @@
     "U = client.get_tensor(\"U\")\n",
     "S = client.get_tensor(\"S\")\n",
     "V = client.get_tensor(\"V\")\n",
-    "print(f\"U: {U}\\n\\n, S: {S}\\n\\n, V: {V}\\n\")\n"
+    "print(f\"U: {U}\\n\\n, S: {S}\\n\\n, V: {V}\\n\")"
    ]
   },
   {
@@ -591,8 +594,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[[0.19671488 0.03442806 0.05503438 0.05018509 0.01337491 0.03888546\n",
-      "  0.3835799  0.06959172 0.09724794 0.06095758]]\n"
+      "[[0.03525018 0.04472604 0.02831913 0.1114466  0.25944078 0.11165252\n",
+      "  0.2983908  0.04830809 0.02390536 0.03856055]]\n"
      ]
     }
    ],
@@ -705,7 +708,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[0 0 0 0 0 1 1 1 1 1]\n"
+      "[1 1 1 1 1 0 0 0 0 0]\n"
      ]
     }
    ],
@@ -727,7 +730,7 @@
     "client.set_model(\"kmeans\", model, \"ONNX\", device=\"CPU\")\n",
     "client.run_model(\"kmeans\", inputs=\"input\", outputs=[\"labels\", \"transform\"])\n",
     "\n",
-    "print(client.get_tensor(\"labels\"))\n"
+    "print(client.get_tensor(\"labels\"))"
    ]
   },
   {
@@ -788,7 +791,7 @@
     "client.put_tensor(\"input\", sample)\n",
     "client.set_model(\"rf_regressor\", model, \"ONNX\", device=\"CPU\")\n",
     "client.run_model(\"rf_regressor\", inputs=\"input\", outputs=\"output\")\n",
-    "print(client.get_tensor(\"output\"))\n"
+    "print(client.get_tensor(\"output\"))"
    ]
   },
   {
@@ -803,7 +806,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "16:54:20 C02G13RYMD6N SmartSim[63271] INFO Stopping model orchestrator_0 with job name orchestrator_0-CVD39CHUID8O\n"
+      "23:56:50 C02G13RYMD6N SmartSim[33744] INFO Stopping model orchestrator_0 with job name orchestrator_0-CVIG02IVGHO0\n"
      ]
     }
    ],
@@ -827,12 +830,12 @@
        "<tr><th>  </th><th>Name          </th><th>Entity-Type  </th><th>JobID  </th><th>RunID  </th><th>Time   </th><th>Status   </th><th>Returncode  </th></tr>\n",
        "</thead>\n",
        "<tbody>\n",
-       "<tr><td>0 </td><td>orchestrator_0</td><td>DBNode       </td><td>65343  </td><td>0      </td><td>27.1124</td><td>Cancelled</td><td>-9          </td></tr>\n",
+       "<tr><td>0 </td><td>orchestrator_0</td><td>DBNode       </td><td>35628  </td><td>0      </td><td>29.7008</td><td>Cancelled</td><td>-9          </td></tr>\n",
        "</tbody>\n",
        "</table>"
       ],
       "text/plain": [
-       "'<table>\\n<thead>\\n<tr><th>  </th><th>Name          </th><th>Entity-Type  </th><th>JobID  </th><th>RunID  </th><th>Time   </th><th>Status   </th><th>Returncode  </th></tr>\\n</thead>\\n<tbody>\\n<tr><td>0 </td><td>orchestrator_0</td><td>DBNode       </td><td>65343  </td><td>0      </td><td>27.1124</td><td>Cancelled</td><td>-9          </td></tr>\\n</tbody>\\n</table>'"
+       "'<table>\\n<thead>\\n<tr><th>  </th><th>Name          </th><th>Entity-Type  </th><th>JobID  </th><th>RunID  </th><th>Time   </th><th>Status   </th><th>Returncode  </th></tr>\\n</thead>\\n<tbody>\\n<tr><td>0 </td><td>orchestrator_0</td><td>DBNode       </td><td>35628  </td><td>0      </td><td>29.7008</td><td>Cancelled</td><td>-9          </td></tr>\\n</tbody>\\n</table>'"
       ]
      },
      "execution_count": 19,
@@ -898,7 +901,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "16:54:20 C02G13RYMD6N SmartSim[63271] INFO \n",
+      "23:56:50 C02G13RYMD6N SmartSim[33744] INFO \n",
       "\n",
       "=== Launch Summary ===\n",
       "Experiment: Inference-Tutorial\n",
@@ -915,7 +918,7 @@
       "\n",
       "\n",
       "\n",
-      "16:54:23 C02G13RYMD6N SmartSim[63271] INFO colocated_model(65381): Completed\n"
+      "23:56:52 C02G13RYMD6N SmartSim[33744] INFO colocated_model(35666): Completed\n"
      ]
     }
    ],
@@ -939,13 +942,13 @@
        "<tr><th>  </th><th>Name           </th><th>Entity-Type  </th><th>JobID  </th><th>RunID  </th><th>Time   </th><th>Status   </th><th>Returncode  </th></tr>\n",
        "</thead>\n",
        "<tbody>\n",
-       "<tr><td>0 </td><td>orchestrator_0 </td><td>DBNode       </td><td>65343  </td><td>0      </td><td>27.1124</td><td>Cancelled</td><td>-9          </td></tr>\n",
-       "<tr><td>1 </td><td>colocated_model</td><td>Model        </td><td>65381  </td><td>0      </td><td>3.1831 </td><td>Completed</td><td>0           </td></tr>\n",
+       "<tr><td>0 </td><td>orchestrator_0 </td><td>DBNode       </td><td>35628  </td><td>0      </td><td>29.7008</td><td>Cancelled</td><td>-9          </td></tr>\n",
+       "<tr><td>1 </td><td>colocated_model</td><td>Model        </td><td>35666  </td><td>0      </td><td>2.1590 </td><td>Completed</td><td>0           </td></tr>\n",
        "</tbody>\n",
        "</table>"
       ],
       "text/plain": [
-       "'<table>\\n<thead>\\n<tr><th>  </th><th>Name           </th><th>Entity-Type  </th><th>JobID  </th><th>RunID  </th><th>Time   </th><th>Status   </th><th>Returncode  </th></tr>\\n</thead>\\n<tbody>\\n<tr><td>0 </td><td>orchestrator_0 </td><td>DBNode       </td><td>65343  </td><td>0      </td><td>27.1124</td><td>Cancelled</td><td>-9          </td></tr>\\n<tr><td>1 </td><td>colocated_model</td><td>Model        </td><td>65381  </td><td>0      </td><td>3.1831 </td><td>Completed</td><td>0           </td></tr>\\n</tbody>\\n</table>'"
+       "'<table>\\n<thead>\\n<tr><th>  </th><th>Name           </th><th>Entity-Type  </th><th>JobID  </th><th>RunID  </th><th>Time   </th><th>Status   </th><th>Returncode  </th></tr>\\n</thead>\\n<tbody>\\n<tr><td>0 </td><td>orchestrator_0 </td><td>DBNode       </td><td>35628  </td><td>0      </td><td>29.7008</td><td>Cancelled</td><td>-9          </td></tr>\\n<tr><td>1 </td><td>colocated_model</td><td>Model        </td><td>35666  </td><td>0      </td><td>2.1590 </td><td>Completed</td><td>0           </td></tr>\\n</tbody>\\n</table>'"
       ]
      },
      "execution_count": 22,