From d8fba1b7a18f1408a3e1e13dfff424ad4335fd8f Mon Sep 17 00:00:00 2001
From: Chris McBride <3595025+ankona@users.noreply.github.com>
Date: Wed, 6 Dec 2023 16:35:54 -0500
Subject: [PATCH] Add support for producing dashboard outputs (#426)

Add support for producing & consuming telemetry outputs.

- Adds telemetry monitor to check for updates and produce events for the dashboard
- Updates controller to conditionally start telemetry monitor
- Updates controller to produce a runtime manifest to trigger telemetry collection
- Adds indirect proxy to produce events for the dashboard for unmanaged tasks
- Adds CLI capability to launch dashboard

[ committed by @ankona, @MattToast, @AlyssaCote ]
[ reviewed by @al-rigazzi, @ashao ]

---------

Co-authored-by: Matt Drozt <drozt@hpe.com>
Co-authored-by: Alyssa Cote <46540273+AlyssaCote@users.noreply.github.com>
---
 .gitignore                                    |    5 +
 conftest.py                                   |   13 +-
 doc/index.rst                                 |    6 +
 doc/smartdashboard.rst                        |    7 +
 docker/docs/dev/Dockerfile                    |    6 +
 pyproject.toml                                |    1 +
 setup.py                                      |    1 +
 smartsim/_core/_cli/__main__.py               |   19 +-
 smartsim/_core/_cli/build.py                  |   10 +-
 smartsim/_core/_cli/clean.py                  |    9 +-
 smartsim/_core/_cli/cli.py                    |   57 +-
 smartsim/_core/_cli/dbcli.py                  |   10 +-
 smartsim/_core/_cli/info.py                   |    7 +-
 smartsim/_core/_cli/plugin.py                 |   55 +
 smartsim/_core/_cli/site.py                   |    6 +-
 smartsim/_core/_cli/utils.py                  |    7 +-
 smartsim/_core/_cli/validate.py               |   11 +-
 smartsim/_core/config/config.py               |   11 +
 smartsim/_core/control/controller.py          |  226 +++-
 smartsim/_core/control/job.py                 |   38 +-
 smartsim/_core/control/jobmanager.py          |   22 +-
 smartsim/_core/control/manifest.py            |  145 ++-
 smartsim/_core/entrypoints/indirect.py        |  242 ++++
 .../_core/entrypoints/telemetrymonitor.py     |  691 ++++++++++
 .../_core/launcher/cobalt/cobaltLauncher.py   |    5 +-
 smartsim/_core/launcher/launcher.py           |   12 +-
 smartsim/_core/launcher/local/local.py        |   43 +-
 smartsim/_core/launcher/lsf/lsfLauncher.py    |    9 +-
 smartsim/_core/launcher/pbs/pbsLauncher.py    |    5 +-
 .../_core/launcher/slurm/slurmLauncher.py     |    5 +-
 smartsim/_core/launcher/step/alpsStep.py      |    6 +-
 smartsim/_core/launcher/step/localStep.py     |    9 +-
 smartsim/_core/launcher/step/lsfStep.py       |    3 +-
 smartsim/_core/launcher/step/mpiStep.py       |    8 +-
 smartsim/_core/launcher/step/slurmStep.py     |   10 +-
 smartsim/_core/launcher/step/step.py          |   66 +-
 smartsim/_core/utils/helpers.py               |   45 +-
 smartsim/_core/utils/serialize.py             |  246 ++++
 smartsim/entity/dbnode.py                     |   14 +-
 smartsim/error/errors.py                      |   30 +-
 smartsim/experiment.py                        |   35 +
 smartsim/log.py                               |   10 +-
 smartsim/wlm/slurm.py                         |    3 +-
 tests/backends/test_dbmodel.py                |    9 +-
 tests/backends/test_dbscript.py               |    8 +-
 tests/full_wlm/test_generic_batch_launch.py   |   12 +-
 .../full_wlm/test_generic_orc_launch_batch.py |    8 +-
 tests/full_wlm/test_mpmd.py                   |    4 +-
 tests/on_wlm/test_base_settings_on_wlm.py     |    8 +-
 tests/on_wlm/test_colocated_model.py          |   38 +-
 tests/on_wlm/test_generic_orc_launch.py       |    6 +-
 tests/on_wlm/test_launch_errors.py            |    6 +-
 tests/on_wlm/test_launch_ompi_lsf.py          |    2 +-
 tests/on_wlm/test_restart.py                  |    4 +-
 .../test_simple_base_settings_on_wlm.py       |    8 +-
 tests/on_wlm/test_simple_entity_launch.py     |   12 +-
 tests/on_wlm/test_stop.py                     |    8 +-
 tests/test_cli.py                             |  130 +-
 tests/test_colo_model_local.py                |   38 +-
 tests/test_config.py                          |   54 +
 tests/test_configs/echo.py                    |   42 +
 tests/test_configs/printing_model.py          |   18 +
 .../telemetry/colocatedmodel.json             |   69 +
 .../test_configs/telemetry/db_and_model.json  |   86 ++
 .../telemetry/db_and_model_1run.json          |   79 ++
 tests/test_configs/telemetry/ensembles.json   |  329 +++++
 .../test_configs/telemetry/serialmodels.json  |  186 +++
 tests/test_configs/telemetry/telemetry.json   |  946 ++++++++++++++
 tests/test_controller.py                      |   68 +
 tests/test_controller_errors.py               |    2 +-
 tests/test_dbnode.py                          |    2 +-
 tests/test_experiment.py                      |   23 +-
 tests/test_generator.py                       |    6 +-
 tests/test_helpers.py                         |   15 +
 tests/test_indirect.py                        |  195 +++
 tests/test_launch_errors.py                   |    4 +-
 tests/test_local_launch.py                    |    4 +-
 tests/test_local_multi_run.py                 |    2 +-
 tests/test_local_restart.py                   |    4 +-
 tests/test_manifest.py                        |   73 +-
 tests/test_model.py                           |    7 +-
 tests/test_multidb.py                         |   15 +-
 tests/test_orchestrator.py                    |    4 +-
 tests/test_pals_settings.py                   |   13 +
 tests/test_reconnect_orchestrator.py          |    7 +-
 tests/test_serialize.py                       |  175 +++
 tests/test_telemetry_monitor.py               | 1139 +++++++++++++++++
 87 files changed, 5735 insertions(+), 302 deletions(-)
 create mode 100644 doc/smartdashboard.rst
 create mode 100644 smartsim/_core/_cli/plugin.py
 create mode 100644 smartsim/_core/entrypoints/indirect.py
 create mode 100644 smartsim/_core/entrypoints/telemetrymonitor.py
 create mode 100644 smartsim/_core/utils/serialize.py
 create mode 100644 tests/test_configs/echo.py
 create mode 100644 tests/test_configs/printing_model.py
 create mode 100644 tests/test_configs/telemetry/colocatedmodel.json
 create mode 100644 tests/test_configs/telemetry/db_and_model.json
 create mode 100644 tests/test_configs/telemetry/db_and_model_1run.json
 create mode 100644 tests/test_configs/telemetry/ensembles.json
 create mode 100644 tests/test_configs/telemetry/serialmodels.json
 create mode 100644 tests/test_configs/telemetry/telemetry.json
 create mode 100644 tests/test_controller.py
 create mode 100644 tests/test_indirect.py
 create mode 100644 tests/test_serialize.py
 create mode 100644 tests/test_telemetry_monitor.py

diff --git a/.gitignore b/.gitignore
index 3c1f7db48..428e439b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,3 +31,8 @@ smartsim/_core/bin/*-cli
 
 # created upon install
 smartsim/_core/lib
+
+**/manifest/
+**/*.err
+**/*.out
+**/.smartsim/*
diff --git a/conftest.py b/conftest.py
index 69f712d6a..2aab72cd1 100644
--- a/conftest.py
+++ b/conftest.py
@@ -380,10 +380,10 @@ def local_db(
     """Yield fixture for startup and teardown of an local orchestrator"""
 
     exp_name = request.function.__name__
-    exp = Experiment(exp_name, launcher="local")
     test_dir = fileutils.make_test_dir(
         caller_function=exp_name, caller_fspath=request.fspath
     )
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
     db = Orchestrator(port=wlmutils.get_test_port(), interface="lo")
     db.set_path(test_dir)
     exp.start(db)
@@ -402,10 +402,10 @@ def db(
     launcher = wlmutils.get_test_launcher()
 
     exp_name = request.function.__name__
-    exp = Experiment(exp_name, launcher=launcher)
     test_dir = fileutils.make_test_dir(
         caller_function=exp_name, caller_fspath=request.fspath
     )
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
     db = wlmutils.get_orchestrator()
     db.set_path(test_dir)
     exp.start(db)
@@ -427,10 +427,10 @@ def db_cluster(
     launcher = wlmutils.get_test_launcher()
 
     exp_name = request.function.__name__
-    exp = Experiment(exp_name, launcher=launcher)
     test_dir = fileutils.make_test_dir(
         caller_function=exp_name, caller_fspath=request.fspath
     )
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
     db = wlmutils.get_orchestrator(nodes=3)
     db.set_path(test_dir)
     exp.start(db)
@@ -630,7 +630,7 @@ def get_test_dir_path(dirname: str) -> str:
         return dir_path
 
     @staticmethod
-    def make_test_file(file_name: str, file_dir: t.Optional[str] = None) -> str:
+    def make_test_file(file_name: str, file_dir: t.Optional[str] = None, file_content: t.Optional[str] = None) -> str:
         """Create a dummy file in the test output directory.
 
         :param file_name: name of file to create, e.g. "file.txt"
@@ -644,7 +644,10 @@ def make_test_file(file_name: str, file_dir: t.Optional[str] = None) -> str:
         file_path = os.path.join(test_dir, file_name)
 
         with open(file_path, "w+", encoding="utf-8") as dummy_file:
-            dummy_file.write("dummy\n")
+            if not file_content:
+                dummy_file.write("dummy\n")
+            else:
+                dummy_file.write(file_content)
 
         return file_path
 
diff --git a/doc/index.rst b/doc/index.rst
index d61fdb1ce..13d509257 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -48,6 +48,12 @@
    sr_runtime
    api/smartredis_api
 
+.. toctree::
+   :maxdepth: 2
+   :caption: SmartDashboard
+
+   smartdashboard
+
 .. toctree::
    :maxdepth: 2
    :caption: Reference
diff --git a/doc/smartdashboard.rst b/doc/smartdashboard.rst
new file mode 100644
index 000000000..532fa6db0
--- /dev/null
+++ b/doc/smartdashboard.rst
@@ -0,0 +1,7 @@
+
+**************
+SmartDashboard
+**************
+
+.. include:: ../smartdashboard/doc/overview.rst
+    :start-line: 4
\ No newline at end of file
diff --git a/docker/docs/dev/Dockerfile b/docker/docs/dev/Dockerfile
index a27ae03c1..57fee67c9 100644
--- a/docker/docs/dev/Dockerfile
+++ b/docker/docs/dev/Dockerfile
@@ -52,6 +52,12 @@ RUN git clone https://github.com/CrayLabs/SmartRedis.git --branch develop --dept
     && python -m pip install . \
     && rm -rf ~/.cache/pip
 
+# Install smartdashboard
+RUN git clone https://github.com/CrayLabs/SmartDashboard.git --branch develop --depth=1 smartdashboard \
+    && cd smartdashboard \
+    && python -m pip install . \
+    && rm -rf ~/.cache/pip
+
 RUN cd doc/tutorials/ && \
     ln -s ../../tutorials/* .
 
diff --git a/pyproject.toml b/pyproject.toml
index 24c12d8b6..cd517abb5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -107,6 +107,7 @@ module = [
   "keras",
   "torch",
   "smartsim.ml.torch.*",            # must solve/ignore inheritance issues
+  "watchdog",
 ]
 ignore_missing_imports = true
 ignore_errors = true
diff --git a/setup.py b/setup.py
index 66cc7f879..d38918f68 100644
--- a/setup.py
+++ b/setup.py
@@ -167,6 +167,7 @@ def has_ext_modules(_placeholder):
     "tqdm>=4.50.2",
     "filelock>=3.4.2",
     "protobuf~=3.20",
+    "watchdog>=3.0.0",
 ]
 
 # Add SmartRedis at specific version
diff --git a/smartsim/_core/_cli/__main__.py b/smartsim/_core/_cli/__main__.py
index 68d22d14f..399ca3b03 100644
--- a/smartsim/_core/_cli/__main__.py
+++ b/smartsim/_core/_cli/__main__.py
@@ -24,14 +24,31 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import os
 import sys
 
 from smartsim._core._cli.cli import default_cli
+from smartsim._core._cli.utils import SMART_LOGGER_FORMAT
+from smartsim.error.errors import SmartSimCLIActionCancelled
+from smartsim.log import get_logger
+
+
+logger = get_logger("Smart", fmt=SMART_LOGGER_FORMAT)
 
 
 def main() -> int:
     smart_cli = default_cli()
-    return smart_cli.execute(sys.argv)
+    exception_trace_back_msg = "SmartSim exited with the following exception info:"
+
+    try:
+        return smart_cli.execute(sys.argv)
+    except SmartSimCLIActionCancelled as ssi:
+        logger.info(str(ssi))
+        logger.debug(exception_trace_back_msg, exc_info=ssi)
+    except KeyboardInterrupt as e:
+        logger.info("SmartSim was terminated by user")
+        logger.debug(exception_trace_back_msg, exc_info=e)
+    return os.EX_OK
 
 
 if __name__ == "__main__":
diff --git a/smartsim/_core/_cli/build.py b/smartsim/_core/_cli/build.py
index b2df26412..e3ba444ad 100644
--- a/smartsim/_core/_cli/build.py
+++ b/smartsim/_core/_cli/build.py
@@ -356,7 +356,9 @@ def _format_incompatible_python_env_message(
     )
 
 
-def execute(args: argparse.Namespace) -> int:
+def execute(
+    args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
+) -> int:
     verbose = args.v
     keydb = args.keydb
     device: _TDeviceStr = args.device
@@ -416,7 +418,7 @@ def execute(args: argparse.Namespace) -> int:
             )
     except (SetupError, BuildError) as e:
         logger.error(str(e))
-        return 1
+        return os.EX_SOFTWARE
 
     backends = installed_redisai_backends()
     backends_str = ", ".join(s.capitalize() for s in backends) if backends else "No"
@@ -431,10 +433,10 @@ def execute(args: argparse.Namespace) -> int:
             check_py_onnx_version(versions)
     except (SetupError, BuildError) as e:
         logger.error(str(e))
-        return 1
+        return os.EX_SOFTWARE
 
     logger.info("SmartSim build complete!")
-    return 0
+    return os.EX_OK
 
 
 def configure_parser(parser: argparse.ArgumentParser) -> None:
diff --git a/smartsim/_core/_cli/clean.py b/smartsim/_core/_cli/clean.py
index fcf051f0c..d8a85f8a9 100644
--- a/smartsim/_core/_cli/clean.py
+++ b/smartsim/_core/_cli/clean.py
@@ -25,6 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import argparse
+import typing as t
 
 from smartsim._core._cli.utils import clean, get_install_path
 
@@ -39,10 +40,14 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
     )
 
 
-def execute(args: argparse.Namespace) -> int:
+def execute(
+    args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
+) -> int:
     return clean(get_install_path() / "_core", _all=args.clobber)
 
 
-def execute_all(args: argparse.Namespace) -> int:
+def execute_all(
+    args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
+) -> int:
     args.clobber = True
     return execute(args)
diff --git a/smartsim/_core/_cli/cli.py b/smartsim/_core/_cli/cli.py
index ce2376c15..3d50765fb 100644
--- a/smartsim/_core/_cli/cli.py
+++ b/smartsim/_core/_cli/cli.py
@@ -27,6 +27,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import argparse
+import os
 import typing as t
 
 from smartsim._core._cli.build import configure_parser as build_parser
@@ -41,46 +42,68 @@
     execute as validate_execute,
     configure_parser as validate_parser,
 )
+from smartsim._core._cli.plugin import plugins
 from smartsim._core._cli.utils import MenuItemConfig
 
 
 class SmartCli:
     def __init__(self, menu: t.List[MenuItemConfig]) -> None:
-        self.menu: t.Dict[str, MenuItemConfig] = {item.command: item for item in menu}
-        parser = argparse.ArgumentParser(
+        self.menu: t.Dict[str, MenuItemConfig] = {}
+        self.parser = argparse.ArgumentParser(
             prog="smart",
             description="SmartSim command line interface",
         )
-        self.parser = parser
-        self.args: t.Optional[argparse.Namespace] = None
 
-        subparsers = parser.add_subparsers(
+        self.subparsers = self.parser.add_subparsers(
             dest="command",
             required=True,
             metavar="<command>",
             help="Available commands",
         )
 
-        for cmd, item in self.menu.items():
-            parser = subparsers.add_parser(
-                cmd, description=item.description, help=item.description
-            )
-            if item.configurator:
-                item.configurator(parser)
+        self.register_menu_items(menu)
+        self.register_menu_items([plugin() for plugin in plugins])
 
     def execute(self, cli_args: t.List[str]) -> int:
         if len(cli_args) < 2:
             self.parser.print_help()
-            return 0
+            return os.EX_USAGE
 
-        app_args = cli_args[1:]
-        self.args = self.parser.parse_args(app_args)
+        app_args = cli_args[1:]  # exclude the path to executable
+        subcommand = cli_args[1]  # first positional arg is the subcommand
 
-        if not (menu_item := self.menu.get(app_args[0], None)):
+        menu_item = self.menu.get(subcommand, None)
+        if not menu_item:
             self.parser.print_help()
-            return 0
+            return os.EX_USAGE
+
+        args = argparse.Namespace()
+        unparsed_args = []
+
+        if menu_item.is_plugin:
+            unparsed_args = app_args[1:]
+        else:
+            args = self.parser.parse_args(app_args)
+
+        return menu_item.handler(args, unparsed_args)
+
+    def _register_menu_item(self, item: MenuItemConfig) -> None:
+        parser = self.subparsers.add_parser(
+            item.command, description=item.description, help=item.description
+        )
+        if item.configurator:
+            item.configurator(parser)
+
+        if item.command in self.menu:
+            raise ValueError(
+                f"{item.command} cannot overwrite existing CLI command"
+            )
+
+        self.menu[item.command] = item
 
-        return menu_item.handler(self.args)
+    def register_menu_items(self, menu_items: t.List[MenuItemConfig]) -> None:
+        for item in menu_items:
+            self._register_menu_item(item)
 
 
 def default_cli() -> SmartCli:
diff --git a/smartsim/_core/_cli/dbcli.py b/smartsim/_core/_cli/dbcli.py
index 22a376588..ce0975bc4 100644
--- a/smartsim/_core/_cli/dbcli.py
+++ b/smartsim/_core/_cli/dbcli.py
@@ -25,13 +25,17 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import argparse
+import os
+import typing as t
 
 from smartsim._core._cli.utils import get_db_path
 
 
-def execute(_args: argparse.Namespace) -> int:
+def execute(
+    _args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
+) -> int:
     if db_path := get_db_path():
         print(db_path)
-        return 0
+        return os.EX_OK
     print("Database (Redis or KeyDB) dependencies not found")
-    return 1
+    return os.EX_SOFTWARE
diff --git a/smartsim/_core/_cli/info.py b/smartsim/_core/_cli/info.py
index 35ee9b9ec..c08fcb1a3 100644
--- a/smartsim/_core/_cli/info.py
+++ b/smartsim/_core/_cli/info.py
@@ -1,5 +1,6 @@
 import argparse
 import importlib.metadata
+import os
 import pathlib
 import typing as t
 
@@ -12,7 +13,9 @@
 _MISSING_DEP = _helpers.colorize("Not Installed", "red")
 
 
-def execute(_args: argparse.Namespace, /) -> int:
+def execute(
+    _args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
+) -> int:
     print("\nSmart Python Packages:")
     print(
         tabulate(
@@ -66,7 +69,7 @@ def execute(_args: argparse.Namespace, /) -> int:
         ),
         end="\n\n",
     )
-    return 0
+    return os.EX_OK
 
 
 def _fmt_installed_db(db_path: t.Optional[pathlib.Path]) -> str:
diff --git a/smartsim/_core/_cli/plugin.py b/smartsim/_core/_cli/plugin.py
new file mode 100644
index 000000000..b263fe8b2
--- /dev/null
+++ b/smartsim/_core/_cli/plugin.py
@@ -0,0 +1,55 @@
+import argparse
+import importlib.util
+import os
+import sys
+import subprocess as sp
+import typing as t
+
+import smartsim.log
+from smartsim._core._cli.utils import MenuItemConfig, SMART_LOGGER_FORMAT
+from smartsim.error.errors import SmartSimCLIActionCancelled
+
+_LOGGER = smartsim.log.get_logger("Smart", fmt=SMART_LOGGER_FORMAT)
+
+
+def dynamic_execute(
+    cmd: str, plugin_name: str
+) -> t.Callable[[argparse.Namespace, t.List[str]], int]:
+    def process_execute(
+        _args: argparse.Namespace, unparsed_args: t.List[str], /
+    ) -> int:
+        try:
+            spec = importlib.util.find_spec(cmd)
+            if spec is None:
+                raise AttributeError
+        except (ModuleNotFoundError, AttributeError):
+            _LOGGER.error(f"{cmd} plugin not found. Please ensure it is installed")
+            return os.EX_CONFIG
+
+        combined_cmd = [sys.executable, "-m", cmd] + unparsed_args
+
+        try:
+            completed_proc = sp.run(combined_cmd, check=False)
+        except KeyboardInterrupt as ex:
+            msg = f"{plugin_name} terminated by user"
+            raise SmartSimCLIActionCancelled(msg) from ex
+        return completed_proc.returncode
+
+    return process_execute
+
+
+def dashboard() -> MenuItemConfig:
+    return MenuItemConfig(
+        "dashboard",
+        (
+            "Start the SmartSim dashboard to monitor experiment output from a "
+            "graphical user interface. This requires that the SmartSim Dashboard "
+            "Package be installed. For more infromation please visit "
+            "https://github.com/CrayLabs/SmartDashboard"
+        ),
+        dynamic_execute("smartdashboard", "Dashboard"),
+        is_plugin=True,
+    )
+
+
+plugins = (dashboard,)
diff --git a/smartsim/_core/_cli/site.py b/smartsim/_core/_cli/site.py
index 5fe667cde..c86e0341b 100644
--- a/smartsim/_core/_cli/site.py
+++ b/smartsim/_core/_cli/site.py
@@ -25,10 +25,12 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import argparse
+import os
+import typing as t
 
 from smartsim._core._cli.utils import get_install_path
 
 
-def execute(_args: argparse.Namespace) -> int:
+def execute(_args: argparse.Namespace, _unparsed_args: t.List[str], /) -> int:
     print(get_install_path())
-    return 0
+    return os.EX_OK
diff --git a/smartsim/_core/_cli/utils.py b/smartsim/_core/_cli/utils.py
index 0be1b6ac9..d7b0f410d 100644
--- a/smartsim/_core/_cli/utils.py
+++ b/smartsim/_core/_cli/utils.py
@@ -25,6 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import importlib.util
+import os
 import shutil
 import subprocess as sp
 import sys
@@ -110,7 +111,7 @@ def clean(core_path: Path, _all: bool = False) -> int:
         if removed:
             logger.info("Successfully removed SmartSim database installation")
 
-    return 0
+    return os.EX_OK
 
 
 def get_db_path() -> t.Optional[Path]:
@@ -121,7 +122,7 @@ def get_db_path() -> t.Optional[Path]:
     return None
 
 
-_CliHandler = t.Callable[[Namespace], int]
+_CliHandler = t.Callable[[Namespace, t.List[str]], int]
 _CliParseConfigurator = t.Callable[[ArgumentParser], None]
 
 
@@ -132,8 +133,10 @@ def __init__(
         description: str,
         handler: _CliHandler,
         configurator: t.Optional[_CliParseConfigurator] = None,
+        is_plugin: bool = False
     ):
         self.command = cmd
         self.description = description
         self.handler = handler
         self.configurator = configurator
+        self.is_plugin = is_plugin
diff --git a/smartsim/_core/_cli/validate.py b/smartsim/_core/_cli/validate.py
index 78db15516..c796fc616 100644
--- a/smartsim/_core/_cli/validate.py
+++ b/smartsim/_core/_cli/validate.py
@@ -82,7 +82,9 @@ def __exit__(
             self._finalizer.detach()  # type: ignore[attr-defined]
 
 
-def execute(args: argparse.Namespace, /) -> int:
+def execute(
+    args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
+) -> int:
     """Validate the SmartSim installation works as expected given a
     simple experiment
     """
@@ -101,10 +103,10 @@ def execute(args: argparse.Namespace, /) -> int:
         logger.error(
             "SmartSim failed to run a simple experiment!\n"
             f"Experiment failed due to the following exception:\n{e}\n\n"
-            f"Output files are available at `{temp_dir}`"
+            f"Output files are available at `{temp_dir}`", exc_info=True
         )
-        return 2
-    return 0
+        return os.EX_SOFTWARE
+    return os.EX_OK
 
 
 def configure_parser(parser: argparse.ArgumentParser) -> None:
@@ -138,6 +140,7 @@ def test_install(
     with_onnx: bool,
 ) -> None:
     exp = Experiment("ValidationExperiment", exp_path=location, launcher="local")
+    exp.disable_telemetry()
     port = _find_free_port() if port is None else port
     with _make_managed_local_orc(exp, port) as client:
         logger.info("Verifying Tensor Transfer")
diff --git a/smartsim/_core/config/config.py b/smartsim/_core/config/config.py
index a7b1471bf..2fcee90f5 100644
--- a/smartsim/_core/config/config.py
+++ b/smartsim/_core/config/config.py
@@ -204,6 +204,17 @@ def test_account(self) -> t.Optional[str]:  # pragma: no cover
         # no account by default
         return os.environ.get("SMARTSIM_TEST_ACCOUNT", None)
 
+    @property
+    def telemetry_frequency(self) -> int:
+        return int(os.environ.get("SMARTSIM_TELEMETRY_FREQUENCY", 5))
+
+    @property
+    def telemetry_enabled(self) -> bool:
+        return int(os.environ.get("SMARTSIM_FLAG_TELEMETRY", "0")) > 0
+
+    @property
+    def telemetry_cooldown(self) -> int:
+        return int(os.environ.get("SMARTSIM_TELEMETRY_COOLDOWN", 90))
 
 @lru_cache(maxsize=128, typed=False)
 def get_config() -> Config:
diff --git a/smartsim/_core/control/controller.py b/smartsim/_core/control/controller.py
index 08c9f2bd8..62c5a155e 100644
--- a/smartsim/_core/control/controller.py
+++ b/smartsim/_core/control/controller.py
@@ -27,34 +27,39 @@
 from __future__ import annotations
 
 import os.path as osp
-from os import environ
+import pathlib
 import pickle
 import signal
+import subprocess
+import sys
 import threading
 import time
 import typing as t
+from os import environ
 
 from smartredis import Client, ConfigOptions
 
 from ..._core.launcher.step import Step
+from ..._core.utils.helpers import unpack_colo_db_identifier, unpack_db_identifier
 from ..._core.utils.redis import db_is_active, set_ml_model, set_script, shutdown_db
-from ..._core.utils.helpers import (
-    unpack_db_identifier,
-    unpack_colo_db_identifier,
-)
 from ...database import Orchestrator
-from ...entity import Ensemble, EntityList, EntitySequence, Model, SmartSimEntity
+from ...entity import (
+    Ensemble,
+    EntityList,
+    EntitySequence,
+    Model,
+    SmartSimEntity,
+)
 from ...error import (
     LauncherError,
     SmartSimError,
+    SSDBIDConflictError,
     SSInternalError,
     SSUnsupportedError,
-    SSDBIDConflictError,
 )
 from ...log import get_logger
-from ...settings.base import BatchSettings
+from ...servertype import CLUSTERED, STANDALONE
 from ...status import STATUS_CANCELLED, STATUS_RUNNING, TERMINAL_STATUSES
-from ...servertype import STANDALONE, CLUSTERED
 from ..config import CONFIG
 from ..launcher import (
     CobaltLauncher,
@@ -64,10 +69,14 @@
     SlurmLauncher,
 )
 from ..launcher.launcher import Launcher
-from ..utils import check_cluster_status, create_cluster
+from ..utils import check_cluster_status, create_cluster, serialize
 from .job import Job
 from .jobmanager import JobManager
-from .manifest import Manifest
+from .manifest import LaunchedManifest, LaunchedManifestBuilder, Manifest
+
+if t.TYPE_CHECKING:
+    from ..utils.serialize import TStepLaunchMetaData
+
 
 logger = get_logger(__name__)
 
@@ -89,9 +98,15 @@ def __init__(self, launcher: str = "local") -> None:
         """
         self._jobs = JobManager(JM_LOCK)
         self.init_launcher(launcher)
+        self._telemetry_monitor: t.Optional[subprocess.Popen[bytes]] = None
 
     def start(
-        self, manifest: Manifest, block: bool = True, kill_on_interrupt: bool = True
+        self,
+        exp_name: str,
+        exp_path: str,
+        manifest: Manifest,
+        block: bool = True,
+        kill_on_interrupt: bool = True,
     ) -> None:
         """Start the passed SmartSim entities
 
@@ -104,12 +119,20 @@ def start(
         self._jobs.kill_on_interrupt = kill_on_interrupt
         # register custom signal handler for ^C (SIGINT)
         signal.signal(signal.SIGINT, self._jobs.signal_interrupt)
-        self._launch(manifest)
+        launched = self._launch(exp_name, exp_path, manifest)
 
         # start the job manager thread if not already started
         if not self._jobs.actively_monitoring:
             self._jobs.start()
 
+        serialize.save_launch_manifest(
+            launched.map(_look_up_launched_data(self._launcher))
+        )
+
+        # launch a telemetry monitor to track job progress
+        if CONFIG.telemetry_enabled:
+            self._start_telemetry_monitor(exp_path)
+
         # block until all non-database jobs are complete
         if block:
             # poll handles its own keyboard interrupt as
@@ -312,16 +335,25 @@ def init_launcher(self, launcher: str) -> None:
         else:
             raise TypeError("Must provide a 'launcher' argument")
 
-    def _launch(self, manifest: Manifest) -> None:
+    def _launch(
+        self, exp_name: str, exp_path: str, manifest: Manifest
+    ) -> LaunchedManifest[t.Tuple[str, Step]]:
         """Main launching function of the controller
 
         Orchestrators are always launched first so that the
         address of the database can be given to following entities
 
+        :param exp_name: The name of the launching experiment
+        :type exp_name: str
+        :param exp_path: path to location of ``Experiment`` directory if generated
+        :type exp_path: str
         :param manifest: Manifest of deployables to launch
         :type manifest: Manifest
         """
 
+        manifest_builder = LaunchedManifestBuilder[t.Tuple[str, Step]](
+            exp_name=exp_name, exp_path=exp_path, launcher_name=str(self._launcher)
+        )
         # Loop over deployables to launch and launch multiple orchestrators
         for orchestrator in manifest.dbs:
             for key in self._jobs.get_db_host_addresses():
@@ -339,7 +371,7 @@ def _launch(self, manifest: Manifest) -> None:
                 raise SmartSimError(
                     "Local launcher does not support multi-host orchestrators"
                 )
-            self._launch_orchestrator(orchestrator)
+            self._launch_orchestrator(orchestrator, manifest_builder)
 
         if self.orchestrator_active:
             self._set_dbobjects(manifest)
@@ -348,33 +380,51 @@ def _launch(self, manifest: Manifest) -> None:
         steps: t.List[
             t.Tuple[Step, t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]]]
         ] = []
-        all_entity_lists = manifest.ensembles
-        for elist in all_entity_lists:
+        for elist in manifest.ensembles:
+            ens_telem_dir = manifest_builder.run_telemetry_subdirectory / "ensemble"
             if elist.batch:
-                batch_step = self._create_batch_job_step(elist)
+                batch_step, substeps = self._create_batch_job_step(elist, ens_telem_dir)
+                manifest_builder.add_ensemble(
+                    elist, [(batch_step.name, step) for step in substeps]
+                )
                 steps.append((batch_step, elist))
             else:
-                job_steps = [(self._create_job_step(e), e) for e in elist.entities]
+                # if ensemble is to be run as separate job steps, aka not in a batch
+                job_steps = [
+                    (self._create_job_step(e, ens_telem_dir / elist.name), e)
+                    for e in elist.entities
+                ]
+                manifest_builder.add_ensemble(
+                    elist, [(step.name, step) for step, _ in job_steps]
+                )
                 steps.extend(job_steps)
         # models themselves cannot be batch steps. If batch settings are
         # attached, wrap them in an anonymous batch job step
         for model in manifest.models:
+            model_telem_dir = manifest_builder.run_telemetry_subdirectory / "model"
             if model.batch_settings:
-                anon_entity_list = _AnonymousBatchJob(
-                    model.name, model.path, model.batch_settings
+                anon_entity_list = _AnonymousBatchJob(model)
+                batch_step, _ = self._create_batch_job_step(
+                    anon_entity_list, model_telem_dir
                 )
-                anon_entity_list.entities.append(model)
-                batch_step = self._create_batch_job_step(anon_entity_list)
+                manifest_builder.add_model(model, (batch_step.name, batch_step))
                 steps.append((batch_step, model))
             else:
-                job_step = self._create_job_step(model)
+                job_step = self._create_job_step(model, model_telem_dir)
+                manifest_builder.add_model(model, (job_step.name, job_step))
                 steps.append((job_step, model))
 
         # launch steps
         for step, entity in steps:
             self._launch_step(step, entity)
 
-    def _launch_orchestrator(self, orchestrator: Orchestrator) -> None:
+        return manifest_builder.finalize()
+
+    def _launch_orchestrator(
+        self,
+        orchestrator: Orchestrator,
+        manifest_builder: LaunchedManifestBuilder[t.Tuple[str, Step]],
+    ) -> None:
         """Launch an Orchestrator instance
 
         This function will launch the Orchestrator instance and
@@ -383,16 +433,32 @@ def _launch_orchestrator(self, orchestrator: Orchestrator) -> None:
 
         :param orchestrator: orchestrator to launch
         :type orchestrator: Orchestrator
+        :param manifest_builder: An `LaunchedManifestBuilder` to record the
+                                 names and `Step`s of the launched orchestrator
+        :type manifest_builder: LaunchedManifestBuilder[tuple[str, Step]]
         """
         orchestrator.remove_stale_files()
+        orc_telem_dir = manifest_builder.run_telemetry_subdirectory / "database"
+
         # if the orchestrator was launched as a batch workload
         if orchestrator.batch:
-            orc_batch_step = self._create_batch_job_step(orchestrator)
+            orc_batch_step, substeps = self._create_batch_job_step(
+                orchestrator, orc_telem_dir
+            )
+            manifest_builder.add_database(
+                orchestrator, [(orc_batch_step.name, step) for step in substeps]
+            )
             self._launch_step(orc_batch_step, orchestrator)
 
         # if orchestrator was run on existing allocation, locally, or in allocation
         else:
-            db_steps = [(self._create_job_step(db), db) for db in orchestrator.entities]
+            db_steps = [
+                (self._create_job_step(db, orc_telem_dir / orchestrator.name), db)
+                for db in orchestrator.entities
+            ]
+            manifest_builder.add_database(
+                orchestrator, [(step.name, step) for step, _ in db_steps]
+            )
             for db_step in db_steps:
                 self._launch_step(*db_step)
 
@@ -462,35 +528,52 @@ def _launch_step(
             self._jobs.add_job(job_step.name, job_id, entity, is_task)
 
     def _create_batch_job_step(
-        self, entity_list: t.Union[Orchestrator, Ensemble, _AnonymousBatchJob]
-    ) -> Step:
+        self,
+        entity_list: t.Union[Orchestrator, Ensemble, _AnonymousBatchJob],
+        telemetry_dir: pathlib.Path,
+    ) -> t.Tuple[Step, t.List[Step]]:
         """Use launcher to create batch job step
 
         :param entity_list: EntityList to launch as batch
         :type entity_list: EntityList
-        :return: job step instance
-        :rtype: Step
+        :param telemetry_dir: Path to a directory in which the batch job step
+                              may write telemetry events
+        :type telemetry_dir: pathlib.Path
+        :return: batch job step instance and a list of run steps to be
+                 executed within the batch job
+        :rtype: tuple[Step, list[Step]]
         """
         if not entity_list.batch_settings:
             raise ValueError(
                 "EntityList must have batch settings to be launched as batch"
             )
 
+        telemetry_dir = telemetry_dir / entity_list.name
         batch_step = self._launcher.create_step(
             entity_list.name, entity_list.path, entity_list.batch_settings
         )
+        batch_step.meta["entity_type"] = str(type(entity_list).__name__).lower()
+        batch_step.meta["status_dir"] = str(telemetry_dir / entity_list.name)
+
+        substeps = []
         for entity in entity_list.entities:
             # tells step creation not to look for an allocation
             entity.run_settings.in_batch = True
-            step = self._create_job_step(entity)
+            step = self._create_job_step(entity, telemetry_dir)
+            substeps.append(step)
             batch_step.add_to_batch(step)
-        return batch_step
+        return batch_step, substeps
 
-    def _create_job_step(self, entity: SmartSimEntity) -> Step:
+    def _create_job_step(
+        self, entity: SmartSimEntity, telemetry_dir: pathlib.Path
+    ) -> Step:
         """Create job steps for all entities with the launcher
 
         :param entity: an entity to create a step for
         :type entity: SmartSimEntity
+        :param telemetry_dir: Path to a directory in which the job step
+                               may write telemetry events
+        :type telemetry_dir: pathlib.Path
         :return: the job step
         :rtype: Step
         """
@@ -499,6 +582,10 @@ def _create_job_step(self, entity: SmartSimEntity) -> Step:
             self._prep_entity_client_env(entity)
 
         step = self._launcher.create_step(entity.name, entity.path, entity.run_settings)
+
+        step.meta["entity_type"] = str(type(entity).__name__).lower()
+        step.meta["status_dir"] = str(telemetry_dir / entity.name)
+
         return step
 
     def _prep_entity_client_env(self, entity: Model) -> None:
@@ -739,13 +826,74 @@ def _set_dbobjects(self, manifest: Manifest) -> None:
                             if db_script not in ensemble.db_scripts:
                                 set_script(db_script, client)
 
+    def _start_telemetry_monitor(self, exp_dir: str) -> None:
+        """Spawns a telemetry monitor process to keep track of the life times
+        of the processes launched through this controller.
+
+        :param exp_dir: An experiment directory
+        :type exp_dir: str
+        """
+        logger.debug("Starting telemetry monitor process")
+        if (
+            self._telemetry_monitor is None
+            or self._telemetry_monitor.returncode is not None
+        ):
+            cmd = [
+                sys.executable,
+                "-m",
+                "smartsim._core.entrypoints.telemetrymonitor",
+                "-exp_dir",
+                exp_dir,
+                "-frequency",
+                str(CONFIG.telemetry_frequency),
+                "-cooldown",
+                str(CONFIG.telemetry_cooldown),
+            ]
+            # pylint: disable-next=consider-using-with
+            self._telemetry_monitor = subprocess.Popen(
+                cmd,
+                stderr=sys.stderr,
+                stdout=sys.stdout,
+                cwd=str(pathlib.Path(__file__).parent.parent.parent),
+                shell=False,
+            )
+
 
 class _AnonymousBatchJob(EntityList[Model]):
-    def __init__(
-        self, name: str, path: str, batch_settings: BatchSettings, **kwargs: t.Any
-    ) -> None:
-        super().__init__(name, path)
-        self.batch_settings = batch_settings
+    @staticmethod
+    def _validate(model: Model) -> None:
+        if model.batch_settings is None:
+            msg = "Unable to create _AnonymousBatchJob without batch_settings"
+            raise SmartSimError(msg)
+
+    def __init__(self, model: Model) -> None:
+        self._validate(model)
+        super().__init__(model.name, model.path)
+        self.entities = [model]
+        self.batch_settings = model.batch_settings
 
     def _initialize_entities(self, **kwargs: t.Any) -> None:
         ...
+
+
+def _look_up_launched_data(
+    launcher: Launcher,
+) -> t.Callable[[t.Tuple[str, Step]], "TStepLaunchMetaData"]:
+    def _unpack_launched_data(data: t.Tuple[str, Step]) -> "TStepLaunchMetaData":
+        # NOTE: we cannot assume that the name of the launched step
+        # ``launched_step_name`` is equal to the name of the step referring to
+        # the entity ``step.name`` as is the case when an entity list is
+        # launched as a batch job
+        launched_step_name, step = data
+        launched_step_map = launcher.step_mapping[launched_step_name]
+        out_file, err_file = step.get_output_files()
+        return (
+            launched_step_map.step_id,
+            launched_step_map.task_id,
+            launched_step_map.managed,
+            out_file,
+            err_file,
+            pathlib.Path(step.meta.get("status_dir", step.cwd)),
+        )
+
+    return _unpack_launched_data
diff --git a/smartsim/_core/control/job.py b/smartsim/_core/control/job.py
index 2842c3c14..3a54c0d00 100644
--- a/smartsim/_core/control/job.py
+++ b/smartsim/_core/control/job.py
@@ -27,10 +27,44 @@
 import time
 import typing as t
 
+from dataclasses import dataclass
 from ...entity import SmartSimEntity, EntitySequence
 from ...status import STATUS_NEW
 
 
+@dataclass(frozen=True)
+class _JobKey():
+    step_id: str
+    task_id: str
+
+
+class JobEntity:
+    """API required for a job processed in the JobManager with support for
+    telemetry monitoring
+    """
+
+    def __init__(self) -> None:
+        self.name: str = ""
+        self.path: str = ""
+        self.step_id: str = ""
+        self.task_id: str = ""
+        self.type: str = ""
+        self.timestamp: int = 0
+        self.status_dir: str = ""
+
+    @property
+    def is_db(self) -> bool:
+        return self.type in ["orchestrator", "dbnode"]
+
+    @property
+    def is_managed(self) -> bool:
+        return bool(self.step_id)
+
+    @property
+    def key(self) -> _JobKey:
+        return _JobKey(self.step_id, self.task_id)
+
+
 class Job:
     """Keep track of various information for the controller.
     In doing so, continuously add various fields of information
@@ -42,7 +76,7 @@ def __init__(
         self,
         job_name: str,
         job_id: t.Optional[str],
-        entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]],
+        entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity], JobEntity],
         launcher: str,
         is_task: bool,
     ) -> None:
@@ -53,7 +87,7 @@ def __init__(
         :param job_id: The id associated with the job
         :type job_id: str
         :param entity: The SmartSim entity(list) associated with the job
-        :type entity: SmartSimEntity | EntitySequence
+        :type entity: SmartSimEntity | EntitySequence | JobEntity
         :param launcher: Launcher job was started with
         :type launcher: str
         :param is_task: process monitored by TaskManager (True) or the WLM (True)
diff --git a/smartsim/_core/control/jobmanager.py b/smartsim/_core/control/jobmanager.py
index 022a2c85c..90eedd229 100644
--- a/smartsim/_core/control/jobmanager.py
+++ b/smartsim/_core/control/jobmanager.py
@@ -24,9 +24,11 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+
 import itertools
 import time
 import typing as t
+from collections import ChainMap
 from threading import Thread, RLock
 from types import FrameType
 
@@ -38,7 +40,8 @@
 from ..config import CONFIG
 from ..launcher import LocalLauncher, Launcher
 from ..utils.network import get_ip_from_host
-from .job import Job
+from .job import Job, JobEntity
+
 
 logger = get_logger(__name__)
 
@@ -145,13 +148,8 @@ def __getitem__(self, entity_name: str) -> Job:
         :rtype: Job
         """
         with self._lock:
-            if entity_name in self.db_jobs:
-                return self.db_jobs[entity_name]
-            if entity_name in self.jobs:
-                return self.jobs[entity_name]
-            if entity_name in self.completed:
-                return self.completed[entity_name]
-            raise KeyError
+            entities = ChainMap(self.db_jobs, self.jobs, self.completed)
+            return entities[entity_name]
 
     def __call__(self) -> t.Dict[str, Job]:
         """Returns dictionary all jobs for () operator
@@ -166,7 +164,7 @@ def add_job(
         self,
         job_name: str,
         job_id: t.Optional[str],
-        entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]],
+        entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity], JobEntity],
         is_task: bool = True,
     ) -> None:
         """Add a job to the job manager which holds specific jobs by type.
@@ -185,7 +183,8 @@ def add_job(
         job = Job(job_name, job_id, entity, launcher, is_task)
         if isinstance(entity, (DBNode, Orchestrator)):
             self.db_jobs[entity.name] = job
-
+        elif isinstance(entity, JobEntity) and entity.is_db:
+            self.db_jobs[entity.name] = job
         else:
             self.jobs[entity.name] = job
 
@@ -310,7 +309,8 @@ def get_db_host_addresses(self) -> t.Dict[str, t.List[str]]:
         for corresponding database identifiers
 
         :return: dictionary of host ip addresses
-        :rtype: Dict[str, list]"""
+        :rtype: Dict[str, list]
+        """
 
         address_dict = {}
         for db_job in self.db_jobs.values():
diff --git a/smartsim/_core/control/manifest.py b/smartsim/_core/control/manifest.py
index 65aa8a898..ec1d79165 100644
--- a/smartsim/_core/control/manifest.py
+++ b/smartsim/_core/control/manifest.py
@@ -24,12 +24,22 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import pathlib
 import typing as t
+from dataclasses import dataclass, field
 
 from ...database import Orchestrator
-from ...entity import EntitySequence, SmartSimEntity, Model, Ensemble
+from ...entity import DBNode, Ensemble, EntitySequence, Model, SmartSimEntity
 from ...error import SmartSimError
-from ..utils.helpers import fmt_dict
+from ..utils import helpers as _helpers
+from ..utils import serialize as _serialize
+
+_T = t.TypeVar("_T")
+_U = t.TypeVar("_U")
+_AtomicLaunchableT = t.TypeVar("_AtomicLaunchableT", Model, DBNode)
+
+if t.TYPE_CHECKING:
+    import os
 
 
 class Manifest:
@@ -92,7 +102,6 @@ def all_entity_lists(self) -> t.List[EntitySequence[SmartSimEntity]]:
         """
         _all_entity_lists: t.List[EntitySequence[SmartSimEntity]] = list(self.ensembles)
 
-
         for db in self.dbs:
             _all_entity_lists.append(db)
 
@@ -150,7 +159,7 @@ def __str__(self) -> str:
                     output += f"{model.batch_settings}\n"
                 output += f"{model.run_settings}\n"
                 if model.params:
-                    output += f"Parameters: \n{fmt_dict(model.params)}\n"
+                    output += f"Parameters: \n{_helpers.fmt_dict(model.params)}\n"
             output += "\n"
 
         for adb in self.dbs:
@@ -214,3 +223,131 @@ def has_db_scripts(
 
         # `has_db_objects` should be False here
         return has_db_objects
+
+
+
+class _LaunchedManifestMetadata(t.NamedTuple):
+    run_id: str
+    exp_name: str
+    exp_path: str
+    launcher_name: str
+
+    @property
+    def exp_telemetry_subdirectory(self) -> pathlib.Path:
+        return _format_exp_telemetry_path(self.exp_path)
+
+    @property
+    def run_telemetry_subdirectory(self) -> pathlib.Path:
+        return _format_run_telemetry_path(self.exp_path, self.exp_name, self.run_id)
+
+    @property
+    def manifest_file_path(self) -> pathlib.Path:
+        return self.exp_telemetry_subdirectory / _serialize.MANIFEST_FILENAME
+
+
+@dataclass(frozen=True)
+class LaunchedManifest(t.Generic[_T]):
+    """Immutable manifest mapping launched entities or collections of launched
+    entities to other pieces of external data. This is commonly used to map a
+    launch-able entity to its constructed ``Step`` instance without assuming
+    that ``step.name == job.name`` or querying the ``JobManager`` which itself
+    can be ephemeral.
+    """
+
+    metadata: _LaunchedManifestMetadata
+    models: t.Tuple[t.Tuple[Model, _T], ...]
+    ensembles: t.Tuple[t.Tuple[Ensemble, t.Tuple[t.Tuple[Model, _T], ...]], ...]
+    databases: t.Tuple[t.Tuple[Orchestrator, t.Tuple[t.Tuple[DBNode, _T], ...]], ...]
+
+    def map(self, func: t.Callable[[_T], _U]) -> "LaunchedManifest[_U]":
+        def _map_entity_data(
+            fn: t.Callable[[_T], _U],
+            entity_list: t.Sequence[t.Tuple[_AtomicLaunchableT, _T]],
+        ) -> t.Tuple[t.Tuple[_AtomicLaunchableT, _U], ...]:
+            return tuple((entity, fn(data)) for entity, data in entity_list)
+
+        return LaunchedManifest(
+            metadata=self.metadata,
+            models=_map_entity_data(func, self.models),
+            ensembles=tuple(
+                (ens, _map_entity_data(func, model_data))
+                for ens, model_data in self.ensembles
+            ),
+            databases=tuple(
+                (db_, _map_entity_data(func, node_data))
+                for db_, node_data in self.databases
+            ),
+        )
+
+
+@dataclass(frozen=True)
+class LaunchedManifestBuilder(t.Generic[_T]):
+    """A class comprised of mutable collections of SmartSim entities that is
+    used to build a ``LaunchedManifest`` while going through the launching
+    process.
+    """
+
+    exp_name: str
+    exp_path: str
+    launcher_name: str
+    run_id: str = field(default_factory=_helpers.create_short_id_str)
+
+    _models: t.List[t.Tuple[Model, _T]] = field(default_factory=list, init=False)
+    _ensembles: t.List[t.Tuple[Ensemble, t.Tuple[t.Tuple[Model, _T], ...]]] = field(
+        default_factory=list, init=False
+    )
+    _databases: t.List[
+        t.Tuple[Orchestrator, t.Tuple[t.Tuple[DBNode, _T], ...]]
+    ] = field(default_factory=list, init=False)
+
+    @property
+    def exp_telemetry_subdirectory(self) -> pathlib.Path:
+        return _format_exp_telemetry_path(self.exp_path)
+
+    @property
+    def run_telemetry_subdirectory(self) -> pathlib.Path:
+        return _format_run_telemetry_path(self.exp_path, self.exp_name, self.run_id)
+
+    def add_model(self, model: Model, data: _T) -> None:
+        self._models.append((model, data))
+
+    def add_ensemble(self, ens: Ensemble, data: t.Sequence[_T]) -> None:
+        self._ensembles.append((ens, self._entities_to_data(ens.entities, data)))
+
+    def add_database(self, db_: Orchestrator, data: t.Sequence[_T]) -> None:
+        self._databases.append((db_, self._entities_to_data(db_.entities, data)))
+
+    @staticmethod
+    def _entities_to_data(
+        entities: t.Sequence[_AtomicLaunchableT], data: t.Sequence[_T]
+    ) -> t.Tuple[t.Tuple[_AtomicLaunchableT, _T], ...]:
+        if not entities:
+            raise ValueError("Cannot map data to an empty entity sequence")
+        if len(entities) != len(data):
+            raise ValueError(
+                f"Cannot map data sequence of length {len(data)} to entity "
+                f"sequence of length {len(entities)}"
+            )
+        return tuple(zip(entities, data))
+
+    def finalize(self) -> LaunchedManifest[_T]:
+        return LaunchedManifest(
+            metadata=_LaunchedManifestMetadata(
+                self.run_id, self.exp_name, self.exp_path, self.launcher_name
+            ),
+            models=tuple(self._models),
+            ensembles=tuple(self._ensembles),
+            databases=tuple(self._databases),
+        )
+
+
+def _format_exp_telemetry_path(
+    exp_path: t.Union[str, "os.PathLike[str]"]
+) -> pathlib.Path:
+    return pathlib.Path(exp_path, _serialize.TELMON_SUBDIR)
+
+
+def _format_run_telemetry_path(
+    exp_path: t.Union[str, "os.PathLike[str]"], exp_name: str, run_id: str
+) -> pathlib.Path:
+    return _format_exp_telemetry_path(exp_path) / f"{exp_name}/{run_id}"
diff --git a/smartsim/_core/entrypoints/indirect.py b/smartsim/_core/entrypoints/indirect.py
new file mode 100644
index 000000000..18d27601f
--- /dev/null
+++ b/smartsim/_core/entrypoints/indirect.py
@@ -0,0 +1,242 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2023 Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import logging
+import os
+import pathlib
+import signal
+import sys
+import typing as t
+from types import FrameType
+
+import coloredlogs
+import psutil
+
+import smartsim.log
+from smartsim._core.entrypoints.telemetrymonitor import track_event
+from smartsim._core.utils.helpers import decode_cmd, get_ts
+
+STEP_PID: t.Optional[int] = None
+logger = smartsim.log.get_logger(__name__)
+
+# kill is not catchable
+SIGNALS = [signal.SIGINT, signal.SIGTERM, signal.SIGQUIT, signal.SIGABRT]
+
+
+def main(
+    cmd: str,
+    etype: str,
+    cwd: str,
+    status_dir: str,
+) -> int:
+    """The main function of the entrypoint. This function takes an encoded step
+    command and runs it in a subprocess. In the background, this entrypoint
+    will then monitor the subprocess and write out status events such as when
+    the subprocess has started or stopped and write these events to a status
+    directory.
+    """
+    global STEP_PID  # pylint: disable=global-statement
+    proxy_pid = os.getpid()
+
+    status_path = pathlib.Path(status_dir)
+    if not status_path.exists():
+        status_path.mkdir(parents=True, exist_ok=True)
+
+    if not cmd.strip():
+        raise ValueError("Invalid cmd supplied")
+
+    cleaned_cmd = decode_cmd(cmd)
+    ret_code: int = 1
+    logger.debug("Indirect step starting")
+
+    start_detail = f"Proxy process {proxy_pid}"
+    start_rc: t.Optional[int] = None
+
+    try:
+        process = psutil.Popen(
+            cleaned_cmd,
+            cwd=cwd,
+            stdout=sys.stdout,
+            stderr=sys.stderr,
+        )
+        STEP_PID = process.pid
+        logger.info(f"Indirect proxy {proxy_pid} child process {STEP_PID} started")
+        start_detail += f" started child process {STEP_PID}"
+
+    except Exception as ex:
+        start_detail += f" failed to start child process. {ex}"
+        start_rc = 1
+        logger.error("Failed to create process", exc_info=True)
+        cleanup()
+        return 1
+    finally:
+        track_event(
+            get_ts(),
+            proxy_pid,
+            "",  # step_id for unmanaged task is always empty
+            etype,
+            "start",
+            status_path,
+            logger,
+            detail=start_detail,
+            return_code=start_rc,
+        )
+
+    logger.info(f"Waiting for child process {STEP_PID} to complete")
+    ret_code = process.wait()
+
+    logger.info(
+        f"Indirect proxy {proxy_pid} child process {STEP_PID} complete."
+        f" return code: {ret_code}"
+    )
+    msg = f"Process {STEP_PID} finished with return code: {ret_code}"
+    track_event(
+        get_ts(),
+        proxy_pid,
+        "",  # step_id for unmanaged task is always empty
+        etype,
+        "stop",
+        status_path,
+        logger,
+        detail=msg,
+        return_code=ret_code,
+    )
+    cleanup()
+
+    return ret_code
+
+
+def cleanup() -> None:
+    """Perform cleanup required for clean termination"""
+    logger.info("Performing cleanup")
+    global STEP_PID  # pylint: disable=global-statement
+    if STEP_PID is None:
+        return
+
+    try:
+        # attempt to stop the subprocess performing step-execution
+        if psutil.pid_exists(STEP_PID):
+            process = psutil.Process(STEP_PID)
+            process.terminate()
+    except psutil.NoSuchProcess:
+        # swallow exception to avoid overwriting outputs from cmd
+        ...
+
+    except OSError as ex:
+        logger.warning(f"Failed to clean up step executor gracefully: {ex}")
+    finally:
+        STEP_PID = None
+
+
+def handle_signal(signo: int, _frame: t.Optional[FrameType]) -> None:
+    """Helper function to ensure clean process termination"""
+    logger.info(f"handling signal {signo}")
+    if not signo:
+        logger.warning("Received signal with no signo")
+
+    cleanup()
+
+
+def register_signal_handlers() -> None:
+    """Register a signal handling function for all termination events"""
+    for sig in SIGNALS:
+        signal.signal(sig, handle_signal)
+
+
+def get_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prefix_chars="+", description="SmartSim Step Executor"
+    )
+    parser.add_argument(
+        "+name", type=str, help="Name of the step being executed", required=True
+    )
+    parser.add_argument(
+        "+command", type=str, help="The command to execute", required=True
+    )
+    parser.add_argument(
+        "+entity_type",
+        type=str,
+        help="The type of entity related to the step",
+        required=True,
+    )
+    parser.add_argument(
+        "+working_dir",
+        type=str,
+        help="The working directory of the executable",
+        required=True,
+    )
+    parser.add_argument(
+        "+telemetry_dir",
+        type=str,
+        help="Directory for telemetry output",
+        required=True,
+    )
+    return parser
+
+
+if __name__ == "__main__":
+    arg_parser = get_parser()
+    os.environ["PYTHONUNBUFFERED"] = "1"
+    parsed_args = arg_parser.parse_args()
+
+    # Set up a local private logger for when this module is run as an entry point
+    level = logger.getEffectiveLevel()
+    logger = logging.getLogger(f"{__name__}.{parsed_args.name}")
+    logger.propagate = False
+    logger.setLevel(level)
+
+    fh = logging.FileHandler(f"{parsed_args.name}.indirect.log")
+    coloredlogs.HostNameFilter.install(fh)
+    fh.setFormatter(
+        logging.Formatter(
+            smartsim.log.DEFAULT_LOG_FORMAT,
+            datefmt=smartsim.log.DEFAULT_DATE_FORMAT,
+        )
+    )
+    logger.addHandler(fh)
+
+    try:
+        logger.debug("Starting indirect step execution")
+
+        # make sure to register the cleanup before the start the process
+        # so our signaller will be able to stop the database process.
+        register_signal_handlers()
+
+        rc = main(
+            cmd=parsed_args.command,
+            etype=parsed_args.entity_type,
+            cwd=parsed_args.working_dir,
+            status_dir=parsed_args.telemetry_dir,
+        )
+        sys.exit(rc)
+
+    # gracefully exit the processes in the distributed application that
+    # we do not want to have start a colocated process. Only one process
+    # per node should be running.
+    except Exception as e:
+        logger.exception(f"An unexpected error caused step execution to fail: {e}")
+        sys.exit(1)
diff --git a/smartsim/_core/entrypoints/telemetrymonitor.py b/smartsim/_core/entrypoints/telemetrymonitor.py
new file mode 100644
index 000000000..cb80e6918
--- /dev/null
+++ b/smartsim/_core/entrypoints/telemetrymonitor.py
@@ -0,0 +1,691 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2023 Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import json
+import logging
+import os
+import pathlib
+import signal
+import sys
+import threading
+import time
+import typing as t
+
+from dataclasses import dataclass, field
+from types import FrameType
+
+from watchdog.observers import Observer
+from watchdog.observers.api import BaseObserver
+from watchdog.events import PatternMatchingEventHandler, LoggingEventHandler
+from watchdog.events import FileCreatedEvent, FileModifiedEvent
+
+from smartsim._core.config import CONFIG
+from smartsim._core.control.job import JobEntity, _JobKey
+from smartsim._core.control.jobmanager import JobManager
+from smartsim._core.launcher.stepInfo import StepInfo
+
+
+from smartsim._core.launcher.cobalt.cobaltLauncher import CobaltLauncher
+from smartsim._core.launcher.launcher import Launcher
+from smartsim._core.launcher.local.local import LocalLauncher
+from smartsim._core.launcher.lsf.lsfLauncher import LSFLauncher
+from smartsim._core.launcher.pbs.pbsLauncher import PBSLauncher
+from smartsim._core.launcher.slurm.slurmLauncher import SlurmLauncher
+from smartsim._core.utils.helpers import get_ts
+from smartsim._core.utils.serialize import TELMON_SUBDIR, MANIFEST_FILENAME
+
+from smartsim.error.errors import SmartSimError
+from smartsim.status import STATUS_COMPLETED, TERMINAL_STATUSES
+
+
+"""Telemetry Monitor entrypoint"""
+
+# kill is not catchable
+SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM, signal.SIGABRT]
+_EventClass = t.Literal["start", "stop", "timestep"]
+_MAX_MANIFEST_LOAD_ATTEMPTS: t.Final[int] = 6
+
+
+@dataclass
+class Run:
+    """Model containing entities of an individual start call for an experiment"""
+
+    timestamp: int
+    models: t.List[JobEntity]
+    orchestrators: t.List[JobEntity]
+    ensembles: t.List[JobEntity]
+
+    def flatten(
+        self, filter_fn: t.Optional[t.Callable[[JobEntity], bool]] = None
+    ) -> t.List[JobEntity]:
+        """Flatten runs into a list of SmartSimEntity run events"""
+        entities = self.models + self.orchestrators + self.ensembles
+        if filter_fn:
+            entities = [entity for entity in entities if filter_fn(entity)]
+        return entities
+
+
+@dataclass
+class RuntimeManifest:
+    """The runtime manifest holds meta information about the experiment entities created
+    at runtime to satisfy the experiment requirements.
+    """
+
+    name: str
+    path: pathlib.Path
+    launcher: str
+    runs: t.List[Run] = field(default_factory=list)
+
+
+def _hydrate_persistable(
+    persistable_entity: t.Dict[str, t.Any],
+    entity_type: str,
+    exp_dir: str,
+) -> JobEntity:
+    """Populate JobEntity instance with supplied metdata and instance details"""
+    entity = JobEntity()
+
+    metadata = persistable_entity["telemetry_metadata"]
+    status_dir = pathlib.Path(metadata.get("status_dir"))
+
+    entity.type = entity_type
+    entity.name = persistable_entity["name"]
+    entity.step_id = str(metadata.get("step_id") or "")
+    entity.task_id = str(metadata.get("task_id") or "")
+    entity.timestamp = int(persistable_entity.get("timestamp", "0"))
+    entity.path = str(exp_dir)
+    entity.status_dir = str(status_dir)
+
+    return entity
+
+
+def hydrate_persistable(
+    entity_type: str,
+    persistable_entity: t.Dict[str, t.Any],
+    exp_dir: pathlib.Path,
+) -> t.List[JobEntity]:
+    """Map entity data persisted in a manifest file to an object"""
+    entities = []
+
+    # an entity w/parent key creates persistables for entities it contains
+    parent_keys = {"shards", "models"}
+    parent_keys = parent_keys.intersection(persistable_entity.keys())
+    if parent_keys:
+        container = "shards" if "shards" in parent_keys else "models"
+        child_type = "orchestrator" if container == "shards" else "model"
+        for child_entity in persistable_entity[container]:
+            entity = _hydrate_persistable(child_entity, child_type, str(exp_dir))
+            entities.append(entity)
+
+        return entities
+
+    entity = _hydrate_persistable(persistable_entity, entity_type, str(exp_dir))
+    entities.append(entity)
+    return entities
+
+
+def hydrate_persistables(
+    entity_type: str,
+    run: t.Dict[str, t.Any],
+    exp_dir: pathlib.Path,
+) -> t.Dict[str, t.List[JobEntity]]:
+    """Map a collection of entity data persisted in a manifest file to an object"""
+    persisted: t.Dict[str, t.List[JobEntity]] = {
+        "model": [],
+        "orchestrator": [],
+    }
+    for item in run[entity_type]:
+        entities = hydrate_persistable(entity_type, item, exp_dir)
+        for new_entity in entities:
+            persisted[new_entity.type].append(new_entity)
+
+    return persisted
+
+
+def hydrate_runs(
+    persisted_runs: t.List[t.Dict[str, t.Any]], exp_dir: pathlib.Path
+) -> t.List[Run]:
+    """Map run data persisted in a manifest file to an object"""
+    the_runs: t.List[Run] = []
+    for run_instance in persisted_runs:
+        run_entities: t.Dict[str, t.List[JobEntity]] = {
+            "model": [],
+            "orchestrator": [],
+            "ensemble": [],
+        }
+
+        for key in run_entities:
+            _entities = hydrate_persistables(key, run_instance, exp_dir)
+            for entity_type, new_entities in _entities.items():
+                if new_entities:
+                    run_entities[entity_type].extend(new_entities)
+
+        run = Run(
+            run_instance["timestamp"],
+            run_entities["model"],
+            run_entities["orchestrator"],
+            run_entities["ensemble"],
+        )
+        the_runs.append(run)
+
+    return the_runs
+
+
+def load_manifest(file_path: str) -> t.Optional[RuntimeManifest]:
+    """Load a persisted manifest and return the content"""
+    manifest_dict: t.Optional[t.Dict[str, t.Any]] = None
+    try_count = 1
+
+    while manifest_dict is None and try_count < _MAX_MANIFEST_LOAD_ATTEMPTS:
+        source = pathlib.Path(file_path)
+        source = source.resolve()
+
+        try:
+            if text := source.read_text(encoding="utf-8").strip():
+                manifest_dict = json.loads(text)
+        except json.JSONDecodeError as ex:
+            print(f"Error loading manifest: {ex}")
+            # hack/fix: handle issues reading file before it is fully written
+            time.sleep(0.5 * try_count)
+        finally:
+            try_count += 1
+
+    if not manifest_dict:
+        return None
+
+    exp = manifest_dict.get("experiment", None)
+    if not exp:
+        raise ValueError("Manifest missing required experiment")
+
+    runs = manifest_dict.get("runs", None)
+    if runs is None:
+        raise ValueError("Manifest missing required runs")
+
+    exp_dir = pathlib.Path(exp["path"])
+    runs = hydrate_runs(runs, exp_dir)
+
+    manifest = RuntimeManifest(
+        name=exp["name"],
+        path=exp_dir,
+        launcher=exp["launcher"],
+        runs=runs,
+    )
+    return manifest
+
+
+def track_event(
+    timestamp: int,
+    task_id: t.Union[int, str],
+    step_id: str,
+    etype: str,
+    action: _EventClass,
+    status_dir: pathlib.Path,
+    logger: logging.Logger,
+    detail: str = "",
+    return_code: t.Optional[int] = None,
+) -> None:
+    """Persist a tracking event for an entity"""
+    tgt_path = status_dir / f"{action}.json"
+    tgt_path.parent.mkdir(parents=True, exist_ok=True)
+
+    try:
+        task_id = int(task_id)
+    except ValueError:
+        pass
+
+    entity_dict = {
+        "timestamp": timestamp,
+        "job_id": task_id,
+        "step_id": step_id,
+        "type": etype,
+        "action": action,
+    }
+
+    if detail is not None:
+        entity_dict["detail"] = detail
+
+    if return_code is not None:
+        entity_dict["return_code"] = return_code
+
+    try:
+        if not tgt_path.exists():
+            # Don't overwrite existing tracking files
+            bytes_written = tgt_path.write_text(json.dumps(entity_dict, indent=2))
+            if bytes_written < 1:
+                logger.warning("event tracking failed to write tracking file.")
+    except Exception:
+        logger.error("Unable to write tracking file.", exc_info=True)
+
+
+def faux_return_code(step_info: StepInfo) -> t.Optional[int]:
+    """Create a faux return code for a task run by the WLM. Must not be
+    called with non-terminal statuses or results may be confusing
+    """
+    if step_info.status not in TERMINAL_STATUSES:
+        return None
+
+    if step_info.status == STATUS_COMPLETED:
+        return os.EX_OK
+
+    return 1
+
+
+class ManifestEventHandler(PatternMatchingEventHandler):
+    """The ManifestEventHandler monitors an experiment for changes and updates
+    a telemetry datastore as needed.
+
+    It contains event handlers that are triggered by changes to a runtime experiment
+    manifest. The runtime manifest differs from a standard manifest. A runtime manifest
+    may contain multiple experiment executions in a `runs` collection.
+
+    It also contains a long-polling loop that checks experiment entities for updates
+    at each timestep.
+    """
+
+    def __init__(
+        self,
+        pattern: str,
+        logger: logging.Logger,
+        ignore_patterns: t.Any = None,
+        ignore_directories: bool = True,
+        case_sensitive: bool = False,
+    ) -> None:
+        super().__init__(
+            [pattern], ignore_patterns, ignore_directories, case_sensitive
+        )  # type: ignore
+        self._logger = logger
+        self._tracked_runs: t.Dict[int, Run] = {}
+        self._tracked_jobs: t.Dict[_JobKey, JobEntity] = {}
+        self._completed_jobs: t.Dict[_JobKey, JobEntity] = {}
+        self._launcher: t.Optional[Launcher] = None
+        self.job_manager: JobManager = JobManager(threading.RLock())
+        self._launcher_map: t.Dict[str, t.Type[Launcher]] = {
+            "slurm": SlurmLauncher,
+            "pbs": PBSLauncher,
+            "cobalt": CobaltLauncher,
+            "lsf": LSFLauncher,
+            "local": LocalLauncher,
+        }
+
+    def init_launcher(self, launcher: str) -> Launcher:
+        """Initialize the controller with a specific type of launcher.
+        SmartSim currently supports slurm, pbs(pro), cobalt, lsf,
+        and local launching
+
+        :param launcher: which launcher to initialize
+        :type launcher: str
+        :raises SSUnsupportedError: if a string is passed that is not
+                                    a supported launcher
+        :raises TypeError: if no launcher argument is provided.
+        """
+        if not launcher:
+            raise TypeError("Must provide a 'launcher' argument")
+
+        if launcher_type := self._launcher_map.get(launcher.lower(), None):
+            return launcher_type()
+
+        raise ValueError("Launcher type not supported: " + launcher)
+
+    def set_launcher(self, launcher_type: str) -> None:
+        """Set the launcher for the experiment"""
+        self._launcher = self.init_launcher(launcher_type)
+        self.job_manager.set_launcher(self._launcher)
+        self.job_manager.start()
+
+    def process_manifest(self, manifest_path: str) -> None:
+        """Read the runtime manifest for the experiment and track new entities
+
+        :param manifest_path: The full path to the manifest file
+        :type manifest_path: str
+        """
+        try:
+            manifest = load_manifest(manifest_path)
+            if not manifest:
+                return
+        except json.JSONDecodeError:
+            self._logger.error(f"Malformed manifest encountered: {manifest_path}")
+            return
+        except ValueError:
+            self._logger.error("Manifest content error", exc_info=True)
+            return
+
+        if self._launcher is None:
+            self.set_launcher(manifest.launcher)
+
+        if not self._launcher:
+            raise SmartSimError(f"Unable to set launcher from {manifest_path}")
+
+        runs = [run for run in manifest.runs if run.timestamp not in self._tracked_runs]
+
+        exp_dir = pathlib.Path(manifest_path).parent.parent.parent
+
+        for run in runs:
+            for entity in run.flatten(
+                filter_fn=lambda e: e.key not in self._tracked_jobs and e.is_managed
+            ):
+                entity.path = str(exp_dir)
+
+                self._tracked_jobs[entity.key] = entity
+                track_event(
+                    run.timestamp,
+                    entity.task_id,
+                    entity.step_id,
+                    entity.type,
+                    "start",
+                    pathlib.Path(entity.status_dir),
+                    self._logger,
+                )
+
+                if entity.is_managed:
+                    self.job_manager.add_job(
+                        entity.name,
+                        entity.task_id,
+                        entity,
+                        False,
+                    )
+                    self._launcher.step_mapping.add(
+                        entity.name, entity.step_id, entity.task_id, True
+                    )
+            self._tracked_runs[run.timestamp] = run
+
+    def on_modified(self, event: FileModifiedEvent) -> None:
+        """Event handler for when a file or directory is modified.
+
+        :param event: Event representing file/directory modification.
+        :type event: FileModifiedEvent
+        """
+        super().on_modified(event)  # type: ignore
+        self._logger.info(f"processing manifest modified @ {event.src_path}")
+        self.process_manifest(event.src_path)
+
+    def on_created(self, event: FileCreatedEvent) -> None:
+        """Event handler for when a file or directory is created.
+
+        :param event: Event representing file/directory creation.
+        :type event: FileCreatedEvent
+        """
+        super().on_created(event)  # type: ignore
+        self._logger.info(f"processing manifest created @ {event.src_path}")
+        self.process_manifest(event.src_path)
+
+    def _to_completed(
+        self,
+        timestamp: int,
+        entity: JobEntity,
+        step_info: StepInfo,
+    ) -> None:
+        """Move a monitored entity from the active to completed collection to
+        stop monitoring for updates during timesteps.
+
+        :param timestamp: the current timestamp for event logging
+        :type timestamp: int
+        :param entity: the running SmartSim Job
+        :type entity: JobEntity
+        :param experiment_dir: the experiement directory to monitor for changes
+        :type experiment_dir: pathlib.Path
+        :param entity: the StepInfo received when requesting a Job status update
+        :type entity: StepInfo
+        """
+        inactive_entity = self._tracked_jobs.pop(entity.key)
+        if entity.key not in self._completed_jobs:
+            self._completed_jobs[entity.key] = inactive_entity
+
+        job = self.job_manager[entity.name]
+        self.job_manager.move_to_completed(job)
+
+        status_clause = f"status: {step_info.status}"
+        error_clause = f", error: {step_info.error}" if step_info.error else ""
+        detail = f"{status_clause}{error_clause}"
+
+        if hasattr(job.entity, "status_dir"):
+            write_path = pathlib.Path(job.entity.status_dir)
+
+        track_event(
+            timestamp,
+            entity.task_id,
+            entity.step_id,
+            entity.type,
+            "stop",
+            write_path,
+            self._logger,
+            detail=detail,
+            return_code=faux_return_code(step_info),
+        )
+
+    def on_timestep(self, timestamp: int) -> None:
+        """Called at polling frequency to request status updates on
+        monitored entities
+
+        :param timestamp: the current timestamp for event logging
+        :type timestamp: int
+        :param experiment_dir: the experiement directory to monitor for changes
+        :type experiment_dir: pathlib.Path
+        """
+        entity_map = self._tracked_jobs
+
+        if not self._launcher:
+            return
+
+        # consider not using name to avoid collisions
+        names = {entity.name: entity for entity in entity_map.values()}
+
+        if names:
+            step_updates = self._launcher.get_step_update(list(names.keys()))
+
+            for step_name, step_info in step_updates:
+                if step_info and step_info.status in TERMINAL_STATUSES:
+                    completed_entity = names[step_name]
+                    self._to_completed(timestamp, completed_entity, step_info)
+
+
+def can_shutdown(action_handler: ManifestEventHandler, logger: logging.Logger) -> bool:
+    jobs = action_handler.job_manager.jobs
+    db_jobs = action_handler.job_manager.db_jobs
+
+    has_jobs = bool(jobs)
+    has_dbs = bool(db_jobs)
+    has_running_jobs = has_jobs or has_dbs
+
+    if has_jobs:
+        logger.debug(f"telemetry monitor is monitoring {len(jobs)} jobs")
+    if has_dbs:
+        logger.debug(f"telemetry monitor is monitoring {len(db_jobs)} dbs")
+
+    return not has_running_jobs
+
+
+def event_loop(
+    observer: BaseObserver,
+    action_handler: ManifestEventHandler,
+    frequency: t.Union[int, float],
+    logger: logging.Logger,
+    cooldown_duration: int,
+) -> None:
+    """Executes all attached timestep handlers every <frequency> seconds
+
+    :param observer: (optional) a preconfigured watchdog Observer to inject
+    :type observer: t.Optional[BaseObserver]
+    :param action_handler: The manifest event processor instance
+    :type action_handler: ManifestEventHandler
+    :param frequency: frequency (in seconds) of update loop
+    :type frequency: t.Union[int, float]
+    :param logger: a preconfigured Logger instance
+    :type logger: logging.Logger
+    :param cooldown_duration: number of seconds the telemetry monitor should
+                              poll for new jobs before attempting to shutdown
+    :type cooldown_duration: int
+    """
+    elapsed: int = 0
+    last_ts: int = get_ts()
+
+    while observer.is_alive():
+        timestamp = get_ts()
+        logger.debug(f"Telemetry timestep: {timestamp}")
+        action_handler.on_timestep(timestamp)
+
+        elapsed += timestamp - last_ts
+        last_ts = timestamp
+
+        if can_shutdown(action_handler, logger):
+            if elapsed >= cooldown_duration:
+                logger.info("beginning telemetry manager shutdown")
+                observer.stop()  # type: ignore
+        else:
+            # reset cooldown any time there are still jobs running
+            elapsed = 0
+
+        time.sleep(frequency)
+
+
+def main(
+    frequency: t.Union[int, float],
+    experiment_dir: pathlib.Path,
+    logger: logging.Logger,
+    observer: t.Optional[BaseObserver] = None,
+    cooldown_duration: t.Optional[int] = 0,
+) -> int:
+    """Setup the monitoring entities and start the timer-based loop that
+    will poll for telemetry data
+
+    :param frequency: frequency (in seconds) of update loop
+    :type frequency: t.Union[int, float]
+    :param experiment_dir: the experiement directory to monitor for changes
+    :type experiment_dir: pathlib.Path
+    :param logger: a preconfigured Logger instance
+    :type logger: logging.Logger
+    :param observer: (optional) a preconfigured Observer to inject
+    :type observer: t.Optional[BaseObserver]
+    :param cooldown_duration: number of seconds the telemetry monitor should
+                              poll for new jobs before attempting to shutdown
+    :type cooldown_duration: int
+    """
+    manifest_relpath = pathlib.Path(TELMON_SUBDIR) / MANIFEST_FILENAME
+    manifest_path = experiment_dir / manifest_relpath
+    monitor_pattern = str(manifest_relpath)
+
+    logger.info(
+        f"Executing telemetry monitor with frequency: {frequency}s"
+        f", on target directory: {experiment_dir}"
+        f" matching pattern: {monitor_pattern}"
+    )
+
+    cooldown_duration = cooldown_duration or CONFIG.telemetry_cooldown
+    log_handler = LoggingEventHandler(logger)  # type: ignore
+    action_handler = ManifestEventHandler(monitor_pattern, logger)
+
+    if observer is None:
+        observer = Observer()
+
+    try:
+        if manifest_path.exists():
+            # a manifest may not exist depending on startup timing
+            action_handler.process_manifest(str(manifest_path))
+
+        observer.schedule(log_handler, experiment_dir, recursive=True)  # type:ignore
+        observer.schedule(action_handler, experiment_dir, recursive=True)  # type:ignore
+        observer.start()  # type: ignore
+
+        event_loop(observer, action_handler, frequency, logger, cooldown_duration)
+        return os.EX_OK
+    except Exception as ex:
+        logger.error(ex)
+    finally:
+        if observer.is_alive():
+            observer.stop()  # type: ignore
+            observer.join()
+
+    return os.EX_SOFTWARE
+
+
+def handle_signal(signo: int, _frame: t.Optional[FrameType]) -> None:
+    """Helper function to ensure clean process termination"""
+    if not signo:
+        logger = logging.getLogger()
+        logger.warning("Received signal with no signo")
+
+
+def register_signal_handlers() -> None:
+    """Register a signal handling function for all termination events"""
+    for sig in SIGNALS:
+        signal.signal(sig, handle_signal)
+
+
+def get_parser() -> argparse.ArgumentParser:
+    """Instantiate a parser to process command line arguments"""
+    arg_parser = argparse.ArgumentParser(description="SmartSim Telemetry Monitor")
+    arg_parser.add_argument(
+        "-frequency",
+        type=int,
+        help="Frequency of telemetry updates (in seconds))",
+        required=True,
+    )
+    arg_parser.add_argument(
+        "-exp_dir",
+        type=str,
+        help="Experiment root directory",
+        required=True,
+    )
+    arg_parser.add_argument(
+        "-cooldown",
+        type=int,
+        help="Default lifetime of telemetry monitor (in seconds) before auto-shutdown",
+        default=CONFIG.telemetry_cooldown,
+    )
+    return arg_parser
+
+
+if __name__ == "__main__":
+    os.environ["PYTHONUNBUFFERED"] = "1"
+
+    parser = get_parser()
+    args = parser.parse_args()
+
+    log = logging.getLogger(f"{__name__}.TelemetryMonitor")
+    log.setLevel(logging.DEBUG)
+    log.propagate = False
+
+    log_path = os.path.join(args.exp_dir, TELMON_SUBDIR, "telemetrymonitor.log")
+    fh = logging.FileHandler(log_path, "a")
+    log.addHandler(fh)
+
+    # Must register cleanup before the main loop is running
+    register_signal_handlers()
+
+    try:
+        main(
+            int(args.frequency),
+            pathlib.Path(args.exp_dir),
+            log,
+            cooldown_duration=args.cooldown,
+        )
+        sys.exit(0)
+    except Exception:
+        log.exception(
+            "Shutting down telemetry monitor due to unexpected error", exc_info=True
+        )
+
+    sys.exit(1)
diff --git a/smartsim/_core/launcher/cobalt/cobaltLauncher.py b/smartsim/_core/launcher/cobalt/cobaltLauncher.py
index ca0b88a3b..4c7206969 100644
--- a/smartsim/_core/launcher/cobalt/cobaltLauncher.py
+++ b/smartsim/_core/launcher/cobalt/cobaltLauncher.py
@@ -117,16 +117,13 @@ def run(self, step: Step) -> t.Optional[str]:
             # aprun doesn't direct output for us.
             out, err = step.get_output_files()
 
-            # LocalStep.run_command omits env, include it here
-            passed_env = step.env if isinstance(step, LocalStep) else None
-
             # pylint: disable-next=consider-using-with
             output = open(out, "w+", encoding="utf-8")
             # pylint: disable-next=consider-using-with
             error = open(err, "w+", encoding="utf-8")
 
             task_id = self.task_manager.start_task(
-                cmd_list, step.cwd, passed_env, out=output.fileno(), err=error.fileno()
+                cmd_list, step.cwd, step.env, out=output.fileno(), err=error.fileno()
             )
 
         # if batch submission did not successfully retrieve job ID
diff --git a/smartsim/_core/launcher/launcher.py b/smartsim/_core/launcher/launcher.py
index ec8bb0120..1441fe8b0 100644
--- a/smartsim/_core/launcher/launcher.py
+++ b/smartsim/_core/launcher/launcher.py
@@ -47,11 +47,6 @@ class Launcher(abc.ABC):  # pragma: no cover
     step_mapping: StepMapping
     task_manager: TaskManager
 
-    @property
-    @abc.abstractmethod
-    def supported_rs(self) -> t.Dict[t.Type[SettingsBase], t.Type[Step]]:
-        raise NotImplementedError
-
     @abc.abstractmethod
     def create_step(self, name: str, cwd: str, step_settings: SettingsBase) -> Step:
         raise NotImplementedError
@@ -86,6 +81,11 @@ def __init__(self) -> None:
         self.task_manager = TaskManager()
         self.step_mapping = StepMapping()
 
+    @property
+    @abc.abstractmethod
+    def supported_rs(self) -> t.Dict[t.Type[SettingsBase], t.Type[Step]]:
+        raise NotImplementedError
+
     # every launcher utilizing this interface must have a map
     # of supported RunSettings types (see slurmLauncher.py for ex)
     def create_step(
@@ -176,6 +176,6 @@ def _get_unmanaged_step_update(
     # pylint: disable-next=no-self-use
     def _get_managed_step_update(
         self,
-        step_ids: t.List[str], # pylint: disable=unused-argument
+        step_ids: t.List[str],  # pylint: disable=unused-argument
     ) -> t.List[StepInfo]:  # pragma: no cover
         return []
diff --git a/smartsim/_core/launcher/local/local.py b/smartsim/_core/launcher/local/local.py
index 7e5c56f7b..3f0f2d8d2 100644
--- a/smartsim/_core/launcher/local/local.py
+++ b/smartsim/_core/launcher/local/local.py
@@ -24,29 +24,24 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import os
+import sys
 import typing as t
 
 from ..launcher import Launcher
 from ....log import get_logger
 from ....settings import RunSettings, SettingsBase
-from ..step import LocalStep
-from ..step import Step
+from ..step import LocalStep, Step
 from ..stepInfo import UnmanagedStepInfo, StepInfo
 from ..stepMapping import StepMapping
 from ..taskManager import TaskManager
-
-logger = get_logger(__name__)
+from ...utils.helpers import encode_cmd
+from ...config import CONFIG
 
 
 class LocalLauncher(Launcher):
     """Launcher used for spawning proceses on a localhost machine."""
 
-    @property
-    def supported_rs(self) -> t.Dict[t.Type[SettingsBase], t.Type[Step]]:
-       return {
-            RunSettings: LocalStep,
-        }    
-    
     def __init__(self) -> None:
         self.task_manager = TaskManager()
         self.step_mapping = StepMapping()
@@ -60,16 +55,17 @@ def create_step(self, name: str, cwd: str, step_settings: SettingsBase) -> Step:
             raise TypeError(
                 f"Local Launcher only supports entities with RunSettings, not {type(step_settings)}"
             )
-        step = LocalStep(name, cwd, step_settings)
-        return step
+        return LocalStep(name, cwd, step_settings)
 
-    def get_step_update(self, step_names: t.List[str]) -> t.List[t.Tuple[str, t.Optional[StepInfo]]]:
+    def get_step_update(
+        self, step_names: t.List[str]
+    ) -> t.List[t.Tuple[str, t.Optional[StepInfo]]]:
         """Get status updates of each job step name provided
 
         :param step_names: list of step_names
         :type step_names: list[str]
         :return: list of tuples for update
-        :rtype: list[(str, UnmanagedStepInfo)]
+        :rtype: list[tuple[str, StepInfo | None]]
         """
         # step ids are process ids of the tasks
         # as there is no WLM intermediary
@@ -85,8 +81,12 @@ def get_step_update(self, step_names: t.List[str]) -> t.List[t.Tuple[str, t.Opti
     def get_step_nodes(self, step_names: t.List[str]) -> t.List[t.List[str]]:
         """Return the address of nodes assigned to the step
 
+        :param step_names: list of step_names
+        :type step_names: list[str]
+        :return: list of node addresses
+        :rtype: list[list[str]]
+
         TODO: Use socket to find the actual Lo address?
-        :return: a list containing the local host address
         """
         return [["127.0.0.1"] * len(step_names)]
 
@@ -104,16 +104,17 @@ def run(self, step: Step) -> str:
             self.task_manager.start()
 
         out, err = step.get_output_files()
-        output = open(out, "w+")
-        error = open(err, "w+")
         cmd = step.get_launch_cmd()
 
-        # LocalStep.run_command omits env, include it here
-        passed_env = step.env if isinstance(step, LocalStep) else None
+        # pylint: disable-next=consider-using-with
+        output = open(out, "w+", encoding="utf-8")
+        # pylint: disable-next=consider-using-with
+        error = open(err, "w+", encoding="utf-8")
 
         task_id = self.task_manager.start_task(
-            cmd, step.cwd, env=passed_env, out=output.fileno(), err=error.fileno()
+            cmd, step.cwd, env=step.env, out=output.fileno(), err=error.fileno()
         )
+
         self.step_mapping.add(step.name, task_id=task_id, managed=False)
         return task_id
 
@@ -127,7 +128,7 @@ def stop(self, step_name: str) -> UnmanagedStepInfo:
         """
         # step_id is task_id for local. Naming for consistency
         step_id = self.step_mapping[step_name].task_id
-        
+
         self.task_manager.remove_task(str(step_id))
         _, rc, out, err = self.task_manager.get_task_update(str(step_id))
         step_info = UnmanagedStepInfo("Cancelled", rc, out, err)
diff --git a/smartsim/_core/launcher/lsf/lsfLauncher.py b/smartsim/_core/launcher/lsf/lsfLauncher.py
index a8d0e27aa..13b3be9bb 100644
--- a/smartsim/_core/launcher/lsf/lsfLauncher.py
+++ b/smartsim/_core/launcher/lsf/lsfLauncher.py
@@ -42,13 +42,13 @@
 from ...config import CONFIG
 from ..launcher import WLMLauncher
 from ..step import (
-    Step,
     BsubBatchStep,
     JsrunStep,
     LocalStep,
     MpiexecStep,
     MpirunStep,
     OrterunStep,
+    Step,
 )
 from ..stepInfo import LSFBatchStepInfo, LSFJsrunStepInfo, StepInfo
 from .lsfCommands import bjobs, bkill, jskill, jslist
@@ -115,19 +115,16 @@ def run(self, step: Step) -> t.Optional[str]:
             time.sleep(1)
             step_id = self._get_lsf_step_id(step)
             logger.debug(f"Gleaned jsrun step id: {step_id} for {step.name}")
-        else:  # isinstance(step, MpirunStep) or isinstance(step, LocalStep)
+        else:
             # mpirun and local launch don't direct output for us
             out, err = step.get_output_files()
 
-            # LocalStep.run_command omits env, include it here
-            passed_env = step.env if isinstance(step, LocalStep) else None
-
             # pylint: disable-next=consider-using-with
             output = open(out, "w+", encoding="utf-8")
             # pylint: disable-next=consider-using-with
             error = open(err, "w+", encoding="utf-8")
             task_id = self.task_manager.start_task(
-                cmd_list, step.cwd, passed_env, out=output.fileno(), err=error.fileno()
+                cmd_list, step.cwd, step.env, out=output.fileno(), err=error.fileno()
             )
 
         self.step_mapping.add(step.name, step_id, task_id, step.managed)
diff --git a/smartsim/_core/launcher/pbs/pbsLauncher.py b/smartsim/_core/launcher/pbs/pbsLauncher.py
index cbb85337c..f7d854a7b 100644
--- a/smartsim/_core/launcher/pbs/pbsLauncher.py
+++ b/smartsim/_core/launcher/pbs/pbsLauncher.py
@@ -111,15 +111,12 @@ def run(self, step: Step) -> t.Optional[str]:
             # aprun/local doesn't direct output for us.
             out, err = step.get_output_files()
 
-            # LocalStep.run_command omits env, include it here
-            passed_env = step.env if isinstance(step, LocalStep) else None
-
             # pylint: disable-next=consider-using-with
             output = open(out, "w+", encoding="utf-8")
             # pylint: disable-next=consider-using-with
             error = open(err, "w+", encoding="utf-8")
             task_id = self.task_manager.start_task(
-                cmd_list, step.cwd, passed_env, out=output.fileno(), err=error.fileno()
+                cmd_list, step.cwd, step.env, out=output.fileno(), err=error.fileno()
             )
 
         # if batch submission did not successfully retrieve job ID
diff --git a/smartsim/_core/launcher/slurm/slurmLauncher.py b/smartsim/_core/launcher/slurm/slurmLauncher.py
index 70bdab5a2..ae44ddc8e 100644
--- a/smartsim/_core/launcher/slurm/slurmLauncher.py
+++ b/smartsim/_core/launcher/slurm/slurmLauncher.py
@@ -155,15 +155,12 @@ def run(self, step: Step) -> t.Optional[str]:
             # MPI/local steps don't direct output like slurm steps
             out, err = step.get_output_files()
 
-            # LocalStep.run_command omits env, include it here
-            passed_env = step.env if isinstance(step, LocalStep) else None
-
             # pylint: disable-next=consider-using-with
             output = open(out, "w+", encoding="utf-8")
             # pylint: disable-next=consider-using-with
             error = open(err, "w+", encoding="utf-8")
             task_id = self.task_manager.start_task(
-                cmd_list, step.cwd, passed_env, out=output.fileno(), err=error.fileno()
+                cmd_list, step.cwd, step.env, out=output.fileno(), err=error.fileno()
             )
 
         if not step_id and step.managed:
diff --git a/smartsim/_core/launcher/step/alpsStep.py b/smartsim/_core/launcher/step/alpsStep.py
index 80e7e7658..6169df083 100644
--- a/smartsim/_core/launcher/step/alpsStep.py
+++ b/smartsim/_core/launcher/step/alpsStep.py
@@ -31,7 +31,7 @@
 
 from ....error import AllocationError
 from ....log import get_logger
-from .step import Step
+from .step import Step, proxyable_launch_cmd
 from ....settings import AprunSettings, RunSettings, Singularity
 
 logger = get_logger(__name__)
@@ -56,9 +56,11 @@ def __init__(self, name: str, cwd: str, run_settings: AprunSettings) -> None:
 
     def _get_mpmd(self) -> t.List[RunSettings]:
         """Temporary convenience function to return a typed list
-        of attached RunSettings"""
+        of attached RunSettings
+        """
         return self.run_settings.mpmd
 
+    @proxyable_launch_cmd
     def get_launch_cmd(self) -> t.List[str]:
         """Get the command to launch this step
 
diff --git a/smartsim/_core/launcher/step/localStep.py b/smartsim/_core/launcher/step/localStep.py
index d15a48381..709137e5b 100644
--- a/smartsim/_core/launcher/step/localStep.py
+++ b/smartsim/_core/launcher/step/localStep.py
@@ -28,7 +28,7 @@
 import shutil
 import typing as t
 
-from .step import Step
+from .step import Step, proxyable_launch_cmd
 from ....settings.base import RunSettings
 from ....settings import Singularity
 
@@ -37,8 +37,13 @@ class LocalStep(Step):
     def __init__(self, name: str, cwd: str, run_settings: RunSettings):
         super().__init__(name, cwd, run_settings)
         self.run_settings = run_settings
-        self.env = self._set_env()
+        self._env = self._set_env()
 
+    @property
+    def env(self) -> t.Dict[str, str]:
+        return self._env
+
+    @proxyable_launch_cmd
     def get_launch_cmd(self) -> t.List[str]:
         cmd = []
 
diff --git a/smartsim/_core/launcher/step/lsfStep.py b/smartsim/_core/launcher/step/lsfStep.py
index ae6c3525b..a10827950 100644
--- a/smartsim/_core/launcher/step/lsfStep.py
+++ b/smartsim/_core/launcher/step/lsfStep.py
@@ -213,7 +213,8 @@ def _set_alloc(self) -> None:
 
     def _get_mpmd(self) -> t.List[RunSettings]:
         """Temporary convenience function to return a typed list
-        of attached RunSettings"""
+        of attached RunSettings
+        """
         if isinstance(self.step_settings, JsrunSettings):
             return self.step_settings.mpmd
         return []
diff --git a/smartsim/_core/launcher/step/mpiStep.py b/smartsim/_core/launcher/step/mpiStep.py
index 9a0796c0f..8ab6c0d47 100644
--- a/smartsim/_core/launcher/step/mpiStep.py
+++ b/smartsim/_core/launcher/step/mpiStep.py
@@ -26,12 +26,12 @@
 
 import os
 import shutil
-from shlex import split as sh_split
 import typing as t
+from shlex import split as sh_split
 
 from ....error import AllocationError, SmartSimError
 from ....log import get_logger
-from .step import Step
+from .step import Step, proxyable_launch_cmd
 from ....settings import MpirunSettings, MpiexecSettings, OrterunSettings
 from ....settings.base import RunSettings
 
@@ -59,6 +59,7 @@ def __init__(self, name: str, cwd: str, run_settings: RunSettings) -> None:
 
     _supported_launchers = ["PBS", "COBALT", "SLURM", "LSB"]
 
+    @proxyable_launch_cmd
     def get_launch_cmd(self) -> t.List[str]:
         """Get the command to launch this step
 
@@ -118,7 +119,8 @@ def _set_alloc(self) -> None:
 
     def _get_mpmd(self) -> t.List[RunSettings]:
         """Temporary convenience function to return a typed list
-        of attached RunSettings"""
+        of attached RunSettings
+        """
         if hasattr(self.run_settings, "mpmd") and self.run_settings.mpmd:
             rs_mpmd: t.List[RunSettings] = self.run_settings.mpmd
             return rs_mpmd
diff --git a/smartsim/_core/launcher/step/slurmStep.py b/smartsim/_core/launcher/step/slurmStep.py
index 18575e4e9..67353faa7 100644
--- a/smartsim/_core/launcher/step/slurmStep.py
+++ b/smartsim/_core/launcher/step/slurmStep.py
@@ -26,13 +26,13 @@
 
 import os
 import shutil
-from shlex import split as sh_split
 import typing as t
+from shlex import split as sh_split
 
 from ....error import AllocationError
 from ....log import get_logger
 from .step import Step
-from ....settings import SrunSettings, SbatchSettings, RunSettings, Singularity
+from ....settings import RunSettings, SbatchSettings, Singularity, SrunSettings
 
 logger = get_logger(__name__)
 
@@ -189,13 +189,15 @@ def _set_alloc(self) -> None:
 
     def _get_mpmd(self) -> t.List[RunSettings]:
         """Temporary convenience function to return a typed list
-        of attached RunSettings"""
+        of attached RunSettings
+        """
         return self.run_settings.mpmd
 
     @staticmethod
     def _get_exe_args_list(run_setting: RunSettings) -> t.List[str]:
         """Convenience function to encapsulate checking the
-        runsettings.exe_args type to always return a list"""
+        runsettings.exe_args type to always return a list
+        """
         exe_args = run_setting.exe_args
         args: t.List[str] = exe_args if isinstance(exe_args, list) else [exe_args]
         return args
diff --git a/smartsim/_core/launcher/step/step.py b/smartsim/_core/launcher/step/step.py
index 2aa995768..d77616cc2 100644
--- a/smartsim/_core/launcher/step/step.py
+++ b/smartsim/_core/launcher/step/step.py
@@ -26,17 +26,20 @@
 
 from __future__ import annotations
 
+import functools
 import os.path as osp
+import sys
 import time
 import typing as t
-
 from os import makedirs
-from smartsim.error.errors import SmartSimError
+
+from smartsim.error.errors import SmartSimError, UnproxyableStepError
+from smartsim._core.config import CONFIG
 
 from ....log import get_logger
-from ...utils.helpers import get_base_36_repr
+from ...utils.helpers import get_base_36_repr, encode_cmd
 from ..colocated import write_colocated_launch_script
-from ....settings.base import SettingsBase, RunSettings
+from ....settings.base import RunSettings, SettingsBase
 
 logger = get_logger(__name__)
 
@@ -48,6 +51,12 @@ def __init__(self, name: str, cwd: str, step_settings: SettingsBase) -> None:
         self.cwd = cwd
         self.managed = False
         self.step_settings = step_settings
+        self.meta: t.Dict[str, str] = {}
+
+    @property
+    def env(self) -> t.Optional[t.Dict[str, str]]:
+        """Overridable, read only property for step to specify its environment"""
+        return None
 
     def get_launch_cmd(self) -> t.List[str]:
         raise NotImplementedError
@@ -68,7 +77,8 @@ def get_step_file(
     ) -> str:
         """Get the name for a file/script created by the step class
 
-        Used for Batch scripts, mpmd scripts, etc"""
+        Used for Batch scripts, mpmd scripts, etc.
+        """
         if script_name:
             script_name = script_name if "." in script_name else script_name + ending
             return osp.join(self.cwd, script_name)
@@ -107,3 +117,49 @@ def add_to_batch(self, step: Step) -> None:
         :type step: Step
         """
         raise SmartSimError("add_to_batch not implemented for this step type")
+
+
+_StepT = t.TypeVar("_StepT", bound=Step)
+
+
+def proxyable_launch_cmd(
+    fn: t.Callable[[_StepT], t.List[str]], /
+) -> t.Callable[[_StepT], t.List[str]]:
+    @functools.wraps(fn)
+    def _get_launch_cmd(self: _StepT) -> t.List[str]:
+        original_cmd_list = fn(self)
+
+        if not CONFIG.telemetry_enabled:
+            return original_cmd_list
+
+        if self.managed:
+            raise UnproxyableStepError(
+                f"Attempting to proxy managed step of type {type(self)}"
+                "through the unmanaged step proxy entry point"
+            )
+
+        proxy_module = "smartsim._core.entrypoints.indirect"
+        etype = self.meta["entity_type"]
+        status_dir = self.meta["status_dir"]
+        encoded_cmd = encode_cmd(original_cmd_list)
+
+        # NOTE: this is NOT safe. should either 1) sign cmd and verify OR 2)
+        #       serialize step and let the indirect entrypoint rebuild the
+        #       cmd... for now, test away...
+        return [
+            sys.executable,
+            "-m",
+            proxy_module,
+            "+name",
+            self.name,
+            "+command",
+            encoded_cmd,
+            "+entity_type",
+            etype,
+            "+telemetry_dir",
+            status_dir,
+            "+working_dir",
+            self.cwd,
+        ]
+
+    return _get_launch_cmd
diff --git a/smartsim/_core/utils/helpers.py b/smartsim/_core/utils/helpers.py
index 5d6b6d769..fea0269f0 100644
--- a/smartsim/_core/utils/helpers.py
+++ b/smartsim/_core/utils/helpers.py
@@ -27,9 +27,11 @@
 """
 A file of helper functions for SmartSim
 """
+import base64
 import os
 import uuid
 import typing as t
+from datetime import datetime
 from functools import lru_cache
 from pathlib import Path
 from shutil import which
@@ -64,21 +66,20 @@ def unpack_colo_db_identifier(db_id: str) -> str:
     return "_" + db_id if db_id else ""
 
 
+def create_short_id_str() -> str:
+    return str(uuid.uuid4())[:7]
+
+
 def create_lockfile_name() -> str:
     """Generate a unique lock filename using UUID"""
-    lock_suffix = str(uuid.uuid4())[:7]
+    lock_suffix = create_short_id_str()
     return f"smartsim-{lock_suffix}.lock"
 
 
 @lru_cache(maxsize=20, typed=False)
 def check_dev_log_level() -> bool:
-    try:
-        lvl = os.environ["SMARTSIM_LOG_LEVEL"]
-        if lvl == "developer":
-            return True
-        return False
-    except KeyError:
-        return False
+    lvl = os.environ.get("SMARTSIM_LOG_LEVEL", "")
+    return lvl == "developer"
 
 
 def fmt_dict(value: t.Dict[str, t.Any]) -> str:
@@ -273,3 +274,31 @@ def installed_redisai_backends(
     }
 
     return {backend for backend in backends if _installed(base_path, backend)}
+
+
+def get_ts() -> int:
+    """Return the current timestamp (accurate to seconds) cast to an integer"""
+    return int(datetime.timestamp(datetime.now()))
+
+
+def encode_cmd(cmd: t.List[str]) -> str:
+    """Transform a standard command list into an encoded string safe for providing as an
+    argument to a proxy entrypoint
+    """
+    if not cmd:
+        raise ValueError("Invalid cmd supplied")
+
+    ascii_cmd = "|".join(cmd).encode("ascii")
+    encoded_cmd = base64.b64encode(ascii_cmd).decode("ascii")
+    return encoded_cmd
+
+
+def decode_cmd(encoded_cmd: str) -> t.List[str]:
+    """Decode an encoded command string to the original command list format"""
+    if not encoded_cmd.strip():
+        raise ValueError("Invalid cmd supplied")
+
+    decoded_cmd = base64.b64decode(encoded_cmd.encode("ascii"))
+    cleaned_cmd = decoded_cmd.decode("ascii").split("|")
+
+    return cleaned_cmd
diff --git a/smartsim/_core/utils/serialize.py b/smartsim/_core/utils/serialize.py
new file mode 100644
index 000000000..5547a49f8
--- /dev/null
+++ b/smartsim/_core/utils/serialize.py
@@ -0,0 +1,246 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2023, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import annotations
+
+import json
+import time
+import typing as t
+from pathlib import Path
+
+import smartsim.log
+import smartsim._core._cli.utils as _utils
+from smartsim._core.config import CONFIG
+
+if t.TYPE_CHECKING:
+    from smartsim import Experiment
+    from smartsim._core.control.manifest import LaunchedManifest as _Manifest
+    from smartsim.database.orchestrator import Orchestrator
+    from smartsim.entity import DBNode, Ensemble, Model
+    from smartsim.entity.dbobject import DBModel, DBScript
+    from smartsim.settings.base import BatchSettings, RunSettings
+
+
+TStepLaunchMetaData = t.Tuple[
+    t.Optional[str], t.Optional[str], t.Optional[bool], str, str, Path
+]
+TELMON_SUBDIR: t.Final[str] = ".smartsim/telemetry"
+MANIFEST_FILENAME: t.Final[str] = "manifest.json"
+
+_LOGGER = smartsim.log.get_logger(__name__)
+
+
+def save_launch_manifest(manifest: _Manifest[TStepLaunchMetaData]) -> None:
+    if not CONFIG.telemetry_enabled:
+        return
+
+    manifest.metadata.run_telemetry_subdirectory.mkdir(parents=True, exist_ok=True)
+
+    new_run = {
+        "run_id": manifest.metadata.run_id,
+        "timestamp": int(time.time_ns()),
+        "model": [
+            _dictify_model(model, *telemetry_metadata)
+            for model, telemetry_metadata in manifest.models
+        ],
+        "orchestrator": [
+            _dictify_db(db, nodes_info) for db, nodes_info in manifest.databases
+        ],
+        "ensemble": [
+            _dictify_ensemble(ens, member_info)
+            for ens, member_info in manifest.ensembles
+        ],
+    }
+    try:
+        with open(manifest.metadata.manifest_file_path, "r", encoding="utf-8") as file:
+            manifest_dict = json.load(file)
+    except (FileNotFoundError, json.JSONDecodeError):
+        manifest_dict = {
+            "schema info": {
+                "schema_name": "entity manifest",
+                "version": "0.0.1",
+            },
+            "experiment": {
+                "name": manifest.metadata.exp_name,
+                "path": manifest.metadata.exp_path,
+                "launcher": manifest.metadata.launcher_name,
+            },
+            "runs": [new_run],
+        }
+    else:
+        manifest_dict["runs"].append(new_run)
+    finally:
+        with open(manifest.metadata.manifest_file_path, "w", encoding="utf-8") as file:
+            json.dump(manifest_dict, file, indent=2)
+
+
+def _dictify_model(
+    model: Model,
+    step_id: t.Optional[str],
+    task_id: t.Optional[str],
+    managed: t.Optional[bool],
+    out_file: str,
+    err_file: str,
+    telemetry_data_path: Path,
+) -> t.Dict[str, t.Any]:
+    colo_settings = (model.run_settings.colocated_db_settings or {}).copy()
+    db_scripts = t.cast("t.List[DBScript]", colo_settings.pop("db_scripts", []))
+    db_models = t.cast("t.List[DBModel]", colo_settings.pop("db_models", []))
+    return {
+        "name": model.name,
+        "path": model.path,
+        "exe_args": model.run_settings.exe_args,
+        "run_settings": _dictify_run_settings(model.run_settings),
+        "batch_settings": _dictify_batch_settings(model.batch_settings)
+        if model.batch_settings
+        else {},
+        "params": model.params,
+        "files": {
+            "Symlink": model.files.link,
+            "Configure": model.files.tagged,
+            "Copy": model.files.copy,
+        }
+        if model.files
+        else {
+            "Symlink": [],
+            "Configure": [],
+            "Copy": [],
+        },
+        "colocated_db": {
+            "settings": colo_settings,
+            "scripts": [
+                {
+                    script.name: {
+                        "backend": "TORCH",
+                        "device": script.device,
+                    }
+                }
+                for script in db_scripts
+            ],
+            "models": [
+                {
+                    model.name: {
+                        "backend": model.backend,
+                        "device": model.device,
+                    }
+                }
+                for model in db_models
+            ],
+        }
+        if colo_settings
+        else {},
+        "telemetry_metadata": {
+            "status_dir": str(telemetry_data_path),
+            "step_id": step_id,
+            "task_id": task_id,
+            "managed": managed,
+        },
+        "out_file": out_file,
+        "err_file": err_file,
+    }
+
+
+def _dictify_ensemble(
+    ens: Ensemble,
+    members: t.Sequence[t.Tuple[Model, TStepLaunchMetaData]],
+) -> t.Dict[str, t.Any]:
+    return {
+        "name": ens.name,
+        "params": ens.params,
+        "batch_settings": _dictify_batch_settings(ens.batch_settings)
+        # FIXME: Typehint here is wrong, ``ens.batch_settings`` can
+        # also be an empty dict for no discernible reason...
+        if ens.batch_settings else {},
+        "models": [
+            _dictify_model(model, *launching_metadata)
+            for model, launching_metadata in members
+        ],
+    }
+
+
+def _dictify_run_settings(run_settings: RunSettings) -> t.Dict[str, t.Any]:
+    # TODO: remove this downcast
+    if hasattr(run_settings, "mpmd") and run_settings.mpmd:
+        _LOGGER.warning(
+            "SmartSim currently cannot properly serialize all information in "
+            "MPMD run settings"
+        )
+    return {
+        "exe": run_settings.exe,
+        # TODO: We should try to move this back
+        # "exe_args": run_settings.exe_args,
+        "run_command": run_settings.run_command,
+        "run_args": run_settings.run_args,
+        # TODO: We currently do not have a way to represent MPMD commands!
+        #       Maybe add a ``"mpmd"`` key here that is a
+        #       ``list[TDictifiedRunSettings]``?
+    }
+
+
+def _dictify_batch_settings(batch_settings: BatchSettings) -> t.Dict[str, t.Any]:
+    return {
+        "batch_command": batch_settings.batch_cmd,
+        "batch_args": batch_settings.batch_args,
+    }
+
+
+def _dictify_db(
+    db: Orchestrator,
+    nodes: t.Sequence[t.Tuple[DBNode, TStepLaunchMetaData]],
+) -> t.Dict[str, t.Any]:
+    db_path = _utils.get_db_path()
+    if db_path:
+        db_type, _ = db_path.name.split("-", 1)
+    else:
+        db_type = "Unknown"
+    return {
+        "name": db.name,
+        "type": db_type,
+        "interface": db._interfaces,  # pylint: disable=protected-access
+        "shards": [
+            {
+                **shard.to_dict(),
+                "conf_file": shard.cluster_conf_file,
+                "out_file": out_file,
+                "err_file": err_file,
+                "telemetry_metadata": {
+                    "status_dir": str(status_dir),
+                    "step_id": step_id,
+                    "task_id": task_id,
+                    "managed": managed,
+                },
+            }
+            for dbnode, (
+                step_id,
+                task_id,
+                managed,
+                out_file,
+                err_file,
+                status_dir,
+            ) in nodes
+            for shard in dbnode.get_launched_shard_info()
+        ],
+    }
diff --git a/smartsim/entity/dbnode.py b/smartsim/entity/dbnode.py
index 35445c42d..ba9a50c80 100644
--- a/smartsim/entity/dbnode.py
+++ b/smartsim/entity/dbnode.py
@@ -76,11 +76,12 @@ def __init__(
 
     @property
     def num_shards(self) -> int:
-        try:
-            return len(self.run_settings.mpmd) + 1  # type: ignore[attr-defined]
-        except AttributeError:
+        if not hasattr(self.run_settings, "mpmd"):
+            # return default number of shards if mpmd is not set
             return 1
 
+        return len(self.run_settings.mpmd) + 1
+
     @property
     def host(self) -> str:
         try:
@@ -99,11 +100,12 @@ def hosts(self) -> t.List[str]:
 
     @property
     def is_mpmd(self) -> bool:
-        try:
-            return bool(self.run_settings.mpmd)  # type: ignore[attr-defined]
-        except AttributeError:
+        if not hasattr(self.run_settings, "mpmd"):
+            # missing mpmd property guarantees this is not an mpmd run
             return False
 
+        return bool(self.run_settings.mpmd)
+
     def set_hosts(self, hosts: t.List[str]) -> None:
         self._hosts = [str(host) for host in hosts]
 
diff --git a/smartsim/error/errors.py b/smartsim/error/errors.py
index ffa1cfb17..ad67ae88b 100644
--- a/smartsim/error/errors.py
+++ b/smartsim/error/errors.py
@@ -39,12 +39,14 @@ class SSUnsupportedError(Exception):
 
 class EntityExistsError(SmartSimError):
     """Raised when a user tries to create an entity or files/directories for
-    an entity and either the entity/files/directories already exist"""
+    an entity and either the entity/files/directories already exist
+    """
 
 
 class UserStrategyError(SmartSimError):
     """Raised when there is an error with model creation inside an ensemble
-    that is from a user provided permutation strategy"""
+    that is from a user provided permutation strategy
+    """
 
     def __init__(self, perm_strat: str) -> None:
         message = self.create_message(perm_strat)
@@ -80,16 +82,15 @@ class SSReservedKeywordError(SmartSimError):
 
 class SSDBIDConflictError(SmartSimError):
     """Raised in the event that a database identifier
-    is not unique when multiple databases are created"""
+    is not unique when multiple databases are created
+    """
 
 
 # Internal Exceptions
 
 
 class SSInternalError(Exception):
-    """
-    SSInternalError is raised when an internal error is encountered.
-    """
+    """SSInternalError is raised when an internal error is encountered"""
 
 
 class SSConfigError(SSInternalError):
@@ -106,7 +107,8 @@ class AllocationError(LauncherError):
 
 class ShellError(LauncherError):
     """Raised when error arises from function within launcher.shell
-    Closely related to error from subprocess(Popen) commands"""
+    Closely related to error from subprocess(Popen) commands
+    """
 
     def __init__(
         self,
@@ -130,3 +132,17 @@ def create_message(
         if details:
             msg += f"\nError from shell: {details}"
         return msg
+
+
+class TelemetryError(SSInternalError):
+    """Raised when SmartSim runs into trouble establishing or communicating
+    telemetry information
+    """
+
+class UnproxyableStepError(TelemetryError):
+    """Raised when a user attempts to proxy a managed ``Step`` through the
+    unmanaged step proxy entry point
+    """
+
+class SmartSimCLIActionCancelled(SmartSimError):
+    """Raised when a `smart` CLI command is terminated"""
diff --git a/smartsim/experiment.py b/smartsim/experiment.py
index a9d275088..b3ef2fd09 100644
--- a/smartsim/experiment.py
+++ b/smartsim/experiment.py
@@ -24,6 +24,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import os
 import os.path as osp
 import typing as t
 from os import getcwd
@@ -194,6 +195,8 @@ def start(
             if summary:
                 self._launch_summary(start_manifest)
             self._control.start(
+                exp_name=self.name,
+                exp_path=self.exp_path,
                 manifest=start_manifest,
                 block=block,
                 kill_on_interrupt=kill_on_interrupt,
@@ -878,3 +881,35 @@ def append_to_db_identifier_list(self, db_identifier: str) -> None:
             )
         # Otherwise, add
         self.db_identifiers.add(db_identifier)
+
+    def enable_telemetry(self) -> None:
+        """Experiments will start producing telemetry for all entities run
+        through ``Experiment.start``
+
+        .. warning::
+
+            This method is currently implemented so that ALL ``Experiment``
+            instances will begin producing telemetry data. In the future it
+            is planned to have this method work on a "per instance" basis!
+        """
+        self._set_telemetry(True)
+
+    def disable_telemetry(self) -> None:
+        """Experiments will stop producing telemetry for all entities run
+        through ``Experiment.start``
+
+        .. warning::
+
+            This method is currently implemented so that ALL ``Experiment``
+            instances will stop producing telemetry data. In the future it
+            is planned to have this method work on a "per instance" basis!
+        """
+        self._set_telemetry(False)
+
+    @staticmethod
+    def _set_telemetry(switch: bool, /) -> None:
+        tm_key = "SMARTSIM_FLAG_TELEMETRY"
+        if switch:
+            os.environ[tm_key] = "1"
+        else:
+            os.environ[tm_key] = "0"
diff --git a/smartsim/log.py b/smartsim/log.py
index 9011b3d1b..72d5ad817 100644
--- a/smartsim/log.py
+++ b/smartsim/log.py
@@ -30,12 +30,16 @@
 
 import coloredlogs
 
-# constants for logging
-coloredlogs.DEFAULT_DATE_FORMAT = "%H:%M:%S"
-coloredlogs.DEFAULT_LOG_FORMAT = (
+# constants
+DEFAULT_DATE_FORMAT: t.Final[str] = "%H:%M:%S"
+DEFAULT_LOG_FORMAT: t.Final[str] = (
     "%(asctime)s %(hostname)s %(name)s[%(process)d] %(levelname)s %(message)s"
 )
 
+# configure colored loggs
+coloredlogs.DEFAULT_DATE_FORMAT = DEFAULT_DATE_FORMAT
+coloredlogs.DEFAULT_LOG_FORMAT = DEFAULT_LOG_FORMAT
+
 
 def _get_log_level() -> str:
     """Get the logging level based on environment variable
diff --git a/smartsim/wlm/slurm.py b/smartsim/wlm/slurm.py
index 8fe12b3f9..ba46fb64c 100644
--- a/smartsim/wlm/slurm.py
+++ b/smartsim/wlm/slurm.py
@@ -237,7 +237,8 @@ def _get_alloc_cmd(
     options: t.Optional[t.Dict[str, str]] = None,
 ) -> t.List[str]:
     """Return the command to request an allocation from Slurm with
-    the class variables as the slurm options."""
+    the class variables as the slurm options.
+    """
 
     salloc_args = [
         "--no-shell",
diff --git a/tests/backends/test_dbmodel.py b/tests/backends/test_dbmodel.py
index 386631a50..7472db706 100644
--- a/tests/backends/test_dbmodel.py
+++ b/tests/backends/test_dbmodel.py
@@ -398,7 +398,7 @@ def test_colocated_db_model_tf(fileutils, wlmutils, mlutils):
     test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
 
     # Create SmartSim Experience
-    exp = Experiment(exp_name, launcher=test_launcher)
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
 
     # Create RunSettings
     colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
@@ -469,7 +469,7 @@ def test_colocated_db_model_pytorch(fileutils, wlmutils, mlutils):
     test_script = fileutils.get_test_conf_path("run_pt_dbmodel_smartredis.py")
 
     # Create the SmartSim Experiment
-    exp = Experiment(exp_name, launcher=test_launcher)
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
 
     # Create colocated RunSettings
     colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
@@ -633,7 +633,7 @@ def test_colocated_db_model_ensemble_reordered(fileutils, wlmutils, mlutils):
     test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
 
     # Create the SmartSim Experiment
-    exp = Experiment(exp_name, launcher=test_launcher)
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
 
     # Create colocated RunSettings
     colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
@@ -735,7 +735,7 @@ def test_colocated_db_model_errors(fileutils, wlmutils, mlutils):
     test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
 
     # Create SmartSim Experiment
-    exp = Experiment(exp_name, launcher=test_launcher)
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
 
     # Create colocated RunSettings
     colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
@@ -818,6 +818,7 @@ def test_colocated_db_model_errors(fileutils, wlmutils, mlutils):
     with pytest.raises(SSUnsupportedError):
         colo_ensemble.add_model(colo_model)
 
+
 @pytest.mark.skipif(not should_run_tf, reason="Test needs TensorFlow to run")
 def test_inconsistent_params_db_model():
     """Test error when devices_per_node parameter>1 when devices is set to CPU in DBModel"""
diff --git a/tests/backends/test_dbscript.py b/tests/backends/test_dbscript.py
index c92be31de..4d1743402 100644
--- a/tests/backends/test_dbscript.py
+++ b/tests/backends/test_dbscript.py
@@ -245,7 +245,7 @@ def test_colocated_db_script(fileutils, wlmutils, mlutils):
     torch_script = fileutils.get_test_conf_path("torchscript.py")
 
     # Create the SmartSim Experiment
-    exp = Experiment(exp_name, launcher=test_launcher)
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
 
     # Create RunSettings
     colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
@@ -313,7 +313,7 @@ def test_colocated_db_script_ensemble(fileutils, wlmutils, mlutils):
     torch_script = fileutils.get_test_conf_path("torchscript.py")
 
     # Create SmartSim Experiment
-    exp = Experiment(exp_name, launcher=test_launcher)
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
 
     # Create RunSettings
     colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
@@ -412,7 +412,7 @@ def test_colocated_db_script_ensemble_reordered(fileutils, wlmutils, mlutils):
     torch_script = fileutils.get_test_conf_path("torchscript.py")
 
     # Create SmartSim Experiment
-    exp = Experiment(exp_name, launcher=test_launcher)
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
 
     # Create RunSettings
     colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
@@ -509,7 +509,7 @@ def test_db_script_errors(fileutils, wlmutils, mlutils):
     torch_script = fileutils.get_test_conf_path("torchscript.py")
 
     # Create SmartSim experiment
-    exp = Experiment(exp_name, launcher=test_launcher)
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
 
     # Create RunSettings
     colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
diff --git a/tests/full_wlm/test_generic_batch_launch.py b/tests/full_wlm/test_generic_batch_launch.py
index 4beccd41b..e3d07118d 100644
--- a/tests/full_wlm/test_generic_batch_launch.py
+++ b/tests/full_wlm/test_generic_batch_launch.py
@@ -39,8 +39,10 @@ def test_batch_model(fileutils, wlmutils):
     """Test the launch of a manually construced batch model"""
 
     exp_name = "test-batch-model"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir
+    )
 
     script = fileutils.get_test_conf_path("sleep.py")
     batch_settings = exp.create_batch_settings(nodes=1, time="00:01:00")
@@ -64,8 +66,10 @@ def test_batch_ensemble(fileutils, wlmutils):
     """Test the launch of a manually constructed batch ensemble"""
 
     exp_name = "test-batch-ensemble"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir
+    )
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = wlmutils.get_run_settings("python", f"{script} --time=5")
@@ -89,8 +93,10 @@ def test_batch_ensemble(fileutils, wlmutils):
 
 def test_batch_ensemble_replicas(fileutils, wlmutils):
     exp_name = "test-batch-ensemble-replicas"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir
+    )
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = wlmutils.get_run_settings("python", f"{script} --time=5")
diff --git a/tests/full_wlm/test_generic_orc_launch_batch.py b/tests/full_wlm/test_generic_orc_launch_batch.py
index 7e5591a30..ab4a3dc59 100644
--- a/tests/full_wlm/test_generic_orc_launch_batch.py
+++ b/tests/full_wlm/test_generic_orc_launch_batch.py
@@ -41,8 +41,8 @@ def test_launch_orc_auto_batch(fileutils, wlmutils):
     launcher = wlmutils.get_test_launcher()
 
     exp_name = "test-launch-auto-orc-batch"
-    exp = Experiment(exp_name, launcher=launcher)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
 
     # batch = False to launch on existing allocation
     network_interface = wlmutils.get_test_interface()
@@ -77,8 +77,8 @@ def test_launch_cluster_orc_batch_single(fileutils, wlmutils):
     launcher = wlmutils.get_test_launcher()
 
     exp_name = "test-launch-auto-cluster-orc-batch-single"
-    exp = Experiment(exp_name, launcher=launcher)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
 
     # batch = False to launch on existing allocation
     network_interface = wlmutils.get_test_interface()
@@ -116,8 +116,8 @@ def test_launch_cluster_orc_batch_multi(fileutils, wlmutils):
     launcher = wlmutils.get_test_launcher()
 
     exp_name = "test-launch-auto-cluster-orc-batch-multi"
-    exp = Experiment(exp_name, launcher=launcher)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
 
     # batch = False to launch on existing allocation
     network_interface = wlmutils.get_test_interface()
@@ -153,8 +153,8 @@ def test_launch_cluster_orc_reconnect(fileutils, wlmutils):
     """test reconnecting to clustered 3-node orchestrator"""
     launcher = wlmutils.get_test_launcher()
     exp_name = "test-launch-cluster-orc-batch-reconect"
-    exp = Experiment(exp_name, launcher=launcher)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
 
     # batch = False to launch on existing allocation
     network_interface = wlmutils.get_test_interface()
diff --git a/tests/full_wlm/test_mpmd.py b/tests/full_wlm/test_mpmd.py
index 19f4660c2..0ec9fb2c7 100644
--- a/tests/full_wlm/test_mpmd.py
+++ b/tests/full_wlm/test_mpmd.py
@@ -61,7 +61,8 @@ def test_mpmd(fileutils, wlmutils):
         "cobalt": ["mpirun"],
     }
 
-    exp = Experiment(exp_name, launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
 
     def prune_commands(launcher):
         available_commands = []
@@ -77,7 +78,6 @@ def prune_commands(launcher):
             f"MPMD on {launcher} only supported for run commands {by_launcher[launcher]}"
         )
 
-    test_dir = fileutils.make_test_dir()
     for run_command in run_commands:
         script = fileutils.get_test_conf_path("sleep.py")
         settings = exp.create_run_settings(
diff --git a/tests/on_wlm/test_base_settings_on_wlm.py b/tests/on_wlm/test_base_settings_on_wlm.py
index 3aa77983f..f555336ec 100644
--- a/tests/on_wlm/test_base_settings_on_wlm.py
+++ b/tests/on_wlm/test_base_settings_on_wlm.py
@@ -42,8 +42,10 @@
 
 def test_model_on_wlm(fileutils, wlmutils):
     exp_name = "test-base-settings-model-launch"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir
+    )
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings1 = wlmutils.get_base_run_settings("python", f"{script} --time=5")
@@ -60,8 +62,10 @@ def test_model_on_wlm(fileutils, wlmutils):
 
 def test_model_stop_on_wlm(fileutils, wlmutils):
     exp_name = "test-base-settings-model-stop"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir
+    )
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings1 = wlmutils.get_base_run_settings("python", f"{script} --time=5")
diff --git a/tests/on_wlm/test_colocated_model.py b/tests/on_wlm/test_colocated_model.py
index a38fabd06..92db78a11 100644
--- a/tests/on_wlm/test_colocated_model.py
+++ b/tests/on_wlm/test_colocated_model.py
@@ -47,7 +47,8 @@ def test_launch_colocated_model_defaults(fileutils, coloutils, db_type):
 
     db_args = { }
 
-    exp = Experiment("colocated_model_defaults", launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment("colocated_model_defaults", launcher=launcher, exp_path=test_dir)
     colo_model = coloutils.setup_test_colo(
         fileutils,
         db_type,
@@ -69,7 +70,12 @@ def test_launch_colocated_model_defaults(fileutils, coloutils, db_type):
 @pytest.mark.parametrize("db_type", supported_dbs)
 def test_colocated_model_disable_pinning(fileutils, coloutils, db_type):
 
-    exp = Experiment("colocated_model_pinning_auto_1cpu", launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        "colocated_model_pinning_auto_1cpu",
+        launcher=launcher,
+        exp_path=test_dir
+    )
     db_args = {
         "db_cpus": 1,
         "custom_pinning": [],
@@ -91,7 +97,12 @@ def test_colocated_model_disable_pinning(fileutils, coloutils, db_type):
 @pytest.mark.parametrize("db_type", supported_dbs)
 def test_colocated_model_pinning_auto_2cpu(fileutils, coloutils, db_type):
 
-    exp = Experiment("colocated_model_pinning_auto_2cpu", launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        "colocated_model_pinning_auto_2cpu",
+        launcher=launcher,
+        exp_path=test_dir,
+    )
 
     db_args = {
         "db_cpus": 2,
@@ -115,7 +126,12 @@ def test_colocated_model_pinning_range(fileutils, coloutils, db_type):
     # Check to make sure that the CPU mask was correctly generated
     # Assume that there are at least 4 cpus on the node
 
-    exp = Experiment("colocated_model_pinning_manual", launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        "colocated_model_pinning_manual",
+        launcher=launcher,
+        exp_path=test_dir,
+    )
 
     db_args = {
         "db_cpus": 4,
@@ -139,7 +155,12 @@ def test_colocated_model_pinning_list(fileutils, coloutils, db_type):
     # Check to make sure that the CPU mask was correctly generated
     # note we presume that this has more than 2 CPUs on the supercomputer node
 
-    exp = Experiment("colocated_model_pinning_manual", launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        "colocated_model_pinning_manual",
+        launcher=launcher,
+        exp_path=test_dir,
+    )
 
     db_args = {
         "db_cpus": 2,
@@ -163,7 +184,12 @@ def test_colocated_model_pinning_mixed(fileutils, coloutils, db_type):
     # Check to make sure that the CPU mask was correctly generated
     # note we presume that this at least 4 CPUs on the supercomputer node
 
-    exp = Experiment("colocated_model_pinning_manual", launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        "colocated_model_pinning_manual",
+        launcher=launcher,
+        exp_path=test_dir,
+    )
 
     db_args = {
         "db_cpus": 2,
diff --git a/tests/on_wlm/test_generic_orc_launch.py b/tests/on_wlm/test_generic_orc_launch.py
index 919317c73..7d8143789 100644
--- a/tests/on_wlm/test_generic_orc_launch.py
+++ b/tests/on_wlm/test_generic_orc_launch.py
@@ -38,8 +38,8 @@ def test_launch_orc_auto(fileutils, wlmutils):
     launcher = wlmutils.get_test_launcher()
 
     exp_name = "test-launch-auto-orc"
-    exp = Experiment(exp_name, launcher=launcher)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
 
     # batch = False to launch on existing allocation
     network_interface = wlmutils.get_test_interface()
@@ -71,8 +71,8 @@ def test_launch_cluster_orc_single(fileutils, wlmutils):
     launcher = wlmutils.get_test_launcher()
 
     exp_name = "test-launch-auto-cluster-orc-single"
-    exp = Experiment(exp_name, launcher=launcher)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
 
     # batch = False to launch on existing allocation
     network_interface = wlmutils.get_test_interface()
@@ -105,8 +105,8 @@ def test_launch_cluster_orc_multi(fileutils, wlmutils):
     launcher = wlmutils.get_test_launcher()
 
     exp_name = "test-launch-auto-cluster-orc-multi"
-    exp = Experiment(exp_name, launcher=launcher)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
 
     # batch = False to launch on existing allocation
     network_interface = wlmutils.get_test_interface()
diff --git a/tests/on_wlm/test_launch_errors.py b/tests/on_wlm/test_launch_errors.py
index 77ba8a69a..1b8aeb1f1 100644
--- a/tests/on_wlm/test_launch_errors.py
+++ b/tests/on_wlm/test_launch_errors.py
@@ -40,8 +40,10 @@ def test_failed_status(fileutils, wlmutils):
     """Test when a failure occurs deep into model execution"""
 
     exp_name = "test-report-failure"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name,
+            launcher=wlmutils.get_test_launcher(),
+            exp_path=test_dir)
 
     script = fileutils.get_test_conf_path("bad.py")
     settings = exp.create_run_settings(
@@ -69,8 +71,8 @@ def test_bad_run_command_args(fileutils, wlmutils):
         pytest.skip(f"Only fails with slurm. Launcher is {launcher}")
 
     exp_name = "test-bad-run-command-args"
-    exp = Experiment(exp_name, launcher=launcher)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
 
     script = fileutils.get_test_conf_path("bad.py")
 
diff --git a/tests/on_wlm/test_launch_ompi_lsf.py b/tests/on_wlm/test_launch_ompi_lsf.py
index e3327514a..144b699ca 100644
--- a/tests/on_wlm/test_launch_ompi_lsf.py
+++ b/tests/on_wlm/test_launch_ompi_lsf.py
@@ -39,8 +39,8 @@ def test_launch_openmpi_lsf(wlmutils, fileutils):
     if launcher != "lsf":
         pytest.skip("Test only runs on systems with LSF as WLM")
     exp_name = "test-launch-openmpi-lsf"
-    exp = Experiment(exp_name, launcher=launcher)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = exp.create_run_settings("python", script, "mpirun")
diff --git a/tests/on_wlm/test_restart.py b/tests/on_wlm/test_restart.py
index 86d883358..baed9c97b 100644
--- a/tests/on_wlm/test_restart.py
+++ b/tests/on_wlm/test_restart.py
@@ -38,8 +38,10 @@
 def test_restart(fileutils, wlmutils):
 
     exp_name = "test-restart"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name,
+            launcher=wlmutils.get_test_launcher(),
+            exp_path=test_dir)
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = exp.create_run_settings("python", f"{script} --time=5")
diff --git a/tests/on_wlm/test_simple_base_settings_on_wlm.py b/tests/on_wlm/test_simple_base_settings_on_wlm.py
index d46a46aae..6a8e3d24f 100644
--- a/tests/on_wlm/test_simple_base_settings_on_wlm.py
+++ b/tests/on_wlm/test_simple_base_settings_on_wlm.py
@@ -56,8 +56,10 @@ def test_simple_model_on_wlm(fileutils, wlmutils):
         )
 
     exp_name = "test-simplebase-settings-model-launch"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir
+    )
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = RunSettings("python", exe_args=f"{script} --time=5")
@@ -77,8 +79,10 @@ def test_simple_model_stop_on_wlm(fileutils, wlmutils):
         )
 
     exp_name = "test-simplebase-settings-model-stop"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir
+    )
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = RunSettings("python", exe_args=f"{script} --time=5")
diff --git a/tests/on_wlm/test_simple_entity_launch.py b/tests/on_wlm/test_simple_entity_launch.py
index 16cfa8f38..f909325cb 100644
--- a/tests/on_wlm/test_simple_entity_launch.py
+++ b/tests/on_wlm/test_simple_entity_launch.py
@@ -48,8 +48,10 @@
 
 def test_models(fileutils, wlmutils):
     exp_name = "test-models-launch"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir
+    )
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = exp.create_run_settings("python", f"{script} --time=5")
@@ -65,8 +67,10 @@ def test_models(fileutils, wlmutils):
 
 def test_ensemble(fileutils, wlmutils):
     exp_name = "test-ensemble-launch"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir
+    )
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = exp.create_run_settings("python", f"{script} --time=5")
@@ -84,8 +88,10 @@ def test_summary(fileutils, wlmutils):
     """Fairly rudimentary test of the summary dataframe"""
 
     exp_name = "test-launch-summary"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir
+    )
 
     sleep = fileutils.get_test_conf_path("sleep.py")
     bad = fileutils.get_test_conf_path("bad.py")
diff --git a/tests/on_wlm/test_stop.py b/tests/on_wlm/test_stop.py
index a786ce1a4..7f0255f01 100644
--- a/tests/on_wlm/test_stop.py
+++ b/tests/on_wlm/test_stop.py
@@ -44,8 +44,10 @@
 
 def test_stop_entity(fileutils, wlmutils):
     exp_name = "test-launch-stop-model"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir
+    )
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = exp.create_run_settings("python", f"{script} --time=10")
@@ -62,8 +64,10 @@ def test_stop_entity(fileutils, wlmutils):
 def test_stop_entity_list(fileutils, wlmutils):
 
     exp_name = "test-launch-stop-ensemble"
-    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher())
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir
+    )
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = exp.create_run_settings("python", f"{script} --time=10")
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 79471a355..31fce4cd0 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -26,12 +26,15 @@
 
 import argparse
 from contextlib import contextmanager
+import logging
+import os
+import pathlib
 import typing as t
 
 import pytest
 
 import smartsim
-from smartsim._core._cli import build, cli
+from smartsim._core._cli import build, cli, plugin
 from smartsim._core._cli.build import configure_parser as build_parser
 from smartsim._core._cli.build import execute as build_execute
 from smartsim._core._cli.clean import configure_parser as clean_parser
@@ -48,6 +51,14 @@
 # The tests in this file belong to the group_a group
 pytestmark = pytest.mark.group_a
 
+_TEST_LOGGER = logging.getLogger(__name__)
+
+try:
+    import smartdashboard
+except:
+    test_dash_plugin = False
+else:
+    test_dash_plugin = True
 
 def mock_execute_custom(msg: str = None, good: bool = True) -> int:
     retval = 0 if good else 1
@@ -55,11 +66,11 @@ def mock_execute_custom(msg: str = None, good: bool = True) -> int:
     return retval
 
 
-def mock_execute_good(_ns: argparse.Namespace) -> int:
+def mock_execute_good(_ns: argparse.Namespace, _unparsed: t.Optional[t.List[str]] = None) -> int:
     return mock_execute_custom("GOOD THINGS", good = True)
 
 
-def mock_execute_fail(_ns: argparse.Namespace) -> int:
+def mock_execute_fail(_ns: argparse.Namespace, _unparsed: t.Optional[t.List[str]] = None) -> int:
     return mock_execute_custom("BAD THINGS", good = False)
 
 
@@ -220,8 +231,8 @@ def test_cli_command_execution(capsys):
     exp_b_help = "this is my mock help text for build"
     exp_b_cmd = "build"
     
-    dbcli_exec = lambda x: mock_execute_custom(msg="Database", good=True)
-    build_exec = lambda x: mock_execute_custom(msg="Builder", good=True)
+    dbcli_exec = lambda x, y: mock_execute_custom(msg="Database", good=True)
+    build_exec = lambda x, y: mock_execute_custom(msg="Builder", good=True)
     
     menu = [cli.MenuItemConfig(exp_a_cmd,
                                exp_a_help,
@@ -269,7 +280,7 @@ def test_cli_default_cli(capsys):
     # show that `smart dbcli` calls the build parser and build execute function
     assert "usage: smart [-h] <command>" in captured.out
     assert "Available commands" in captured.out
-    assert ret_val == 0
+    assert ret_val == os.EX_USAGE
 
     # execute with `build` argument, expect build-specific help text
     with pytest.raises(SystemExit) as e:
@@ -281,7 +292,7 @@ def test_cli_default_cli(capsys):
     assert "usage: smart build [-h]" in captured.out
     assert "Build SmartSim dependencies" in captured.out
     assert "optional arguments:" in captured.out or "options:" in captured.out
-    assert ret_val == 0
+    assert ret_val == os.EX_USAGE
 
     # execute with `clean` argument, expect clean-specific help text
     with pytest.raises(SystemExit) as e:
@@ -294,7 +305,7 @@ def test_cli_default_cli(capsys):
     assert "Remove previous ML runtime installation" in captured.out
     assert "optional arguments:" in captured.out or "options:" in captured.out
     assert "--clobber" in captured.out
-    assert ret_val == 0
+    assert ret_val == os.EX_USAGE
 
     # execute with `dbcli` argument, expect dbcli-specific help text
     with pytest.raises(SystemExit) as e:
@@ -306,7 +317,7 @@ def test_cli_default_cli(capsys):
     assert "usage: smart dbcli [-h]" in captured.out
     assert "Print the path to the redis-cli binary" in captured.out
     assert "optional arguments:" in captured.out or "options:" in captured.out
-    assert ret_val == 0
+    assert ret_val == os.EX_USAGE
 
     # execute with `site` argument, expect site-specific help text
     with pytest.raises(SystemExit) as e:
@@ -318,7 +329,7 @@ def test_cli_default_cli(capsys):
     assert "usage: smart site [-h]" in captured.out
     assert "Print the installation site of SmartSim" in captured.out
     assert "optional arguments:" in captured.out or "options:" in captured.out
-    assert ret_val == 0
+    assert ret_val == os.EX_USAGE
 
     # execute with `clobber` argument, expect clobber-specific help text
     with pytest.raises(SystemExit) as e:
@@ -331,8 +342,61 @@ def test_cli_default_cli(capsys):
     assert "Remove all previous dependency installations" in captured.out
     assert "optional arguments:" in captured.out or "options:" in captured.out
     # assert "--clobber" not in captured.out
-    assert ret_val == 0
+    assert ret_val == os.EX_USAGE
+
+
+@pytest.mark.skipif(not test_dash_plugin, reason="plugin not found")
+def test_cli_plugin_dashboard(capfd):
+    """Ensure expected dashboard CLI plugin commands are supported"""
+    smart_cli = cli.default_cli()
+    capfd.readouterr()  # throw away existing output
+
+    # execute with `dashboard` argument, expect dashboard-specific help text
+    build_args = ["smart", "dashboard", "-h"]
+    rc = smart_cli.execute(build_args)
+
+    captured = capfd.readouterr()  # capture new output
+
+    assert "[-d DIRECTORY]" in captured.out
+    assert "[-p PORT]" in captured.out
+
+    assert "optional arguments:" in captured.out
+    assert rc == 0
+
+
+def test_cli_plugin_invalid(
+    monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture
+):
+    """Ensure unexpected CLI plugins are reported"""
+    import smartsim._core._cli.cli
+    import smartsim._core._cli.plugin
+    plugin_module = "notinstalled.Experiment_Overview"
+    bad_plugins = [
+        lambda: MenuItemConfig(
+            "dashboard",
+            "Start the SmartSim dashboard",
+            plugin.dynamic_execute(plugin_module, "Dashboard!"),
+            is_plugin=True,
+        )
+    ]
+    monkeypatch.setattr(smartsim._core._cli.cli, "plugins", bad_plugins)
+    # Coloredlogs doesn't play nice with caplog
+    monkeypatch.setattr(
+        smartsim._core._cli.plugin,
+        "_LOGGER",
+        _TEST_LOGGER,
+    )
+
+    smart_cli = cli.default_cli()
 
+    # execute with `dashboard` argument, expect failure to find dashboard plugin
+    build_args = ["smart", "dashboard", "-h"]
+
+    rc = smart_cli.execute(build_args)
+
+    assert plugin_module in caplog.text
+    assert "not found" in caplog.text
+    assert rc == os.EX_CONFIG
 
 @pytest.mark.parametrize(
     "command,mock_location,exp_output",
@@ -348,7 +412,7 @@ def test_cli_default_cli(capsys):
 )
 def test_cli_action(capsys, monkeypatch, command, mock_location, exp_output):
     """Ensure the default CLI executes the build action"""
-    def mock_execute(ns: argparse.Namespace):
+    def mock_execute(ns: argparse.Namespace, _unparsed: t.Optional[t.List[str]] = None):
         print(exp_output)
         return 0
 
@@ -400,7 +464,7 @@ def test_cli_optional_args(capsys,
                            check_prop: str,
                            exp_prop_val: t.Any):
     """Ensure the parser for a command handles expected optional arguments"""
-    def mock_execute(ns: argparse.Namespace):
+    def mock_execute(ns: argparse.Namespace, _unparsed: t.Optional[t.List[str]] = None):
         print(exp_output)
         return 0
 
@@ -418,9 +482,6 @@ def mock_execute(ns: argparse.Namespace):
 
         assert exp_output in captured.out  # did the expected execution method occur?
         assert ret_val == 0  # is the retval is non-failure code?
-        
-        # is the value from the optional argument set in the parsed args?
-        assert smart_cli.args.__dict__[check_prop] == exp_prop_val
     else:
         with pytest.raises(SystemExit) as e:
             ret_val = smart_cli.execute(build_args)
@@ -449,7 +510,7 @@ def test_cli_help_support(capsys,
                            mock_output: str,
                            exp_output: str):
     """Ensure the parser supports help optional for commands as expected"""
-    def mock_execute(ns: argparse.Namespace):
+    def mock_execute(ns: argparse.Namespace, unparsed: t.Optional[t.List[str]] = None):
         print(mock_output)
         return 0
 
@@ -487,7 +548,7 @@ def test_cli_invalid_optional_args(capsys,
                            mock_location: str,
                            exp_output: str):
     """Ensure the parser throws expected error for an invalid argument"""
-    def mock_execute(ns: argparse.Namespace):
+    def mock_execute(ns: argparse.Namespace, unparsed: t.Optional[t.List[str]] = None):
         print(exp_output)
         return 0
 
@@ -540,12 +601,12 @@ def test_cli_full_clean_execute(capsys, monkeypatch):
     exp_retval = 0
     exp_output = "mocked-clean utility"
 
-    def mock_operation(*args, **kwargs) -> int:
+    # mock out the internal clean method so we don't actually delete anything
+    def mock_clean(core_path: pathlib.Path, _all: bool = False) -> int:
         print(exp_output)
         return exp_retval
-
-    # mock out the internal clean method so we don't actually delete anything
-    monkeypatch.setattr(smartsim._core._cli.clean, "clean", mock_operation)
+    
+    monkeypatch.setattr(smartsim._core._cli.clean, "clean", mock_clean)
 
     command = "clean"
     cfg = MenuItemConfig(command,
@@ -692,7 +753,7 @@ def mock_operation(*args, **kwargs) -> int:
 
 
 def _good_build(*args, **kwargs):
-    print("LGTM")
+    _TEST_LOGGER.info("LGTM")
 
 
 def _bad_build(*args, **kwargs):
@@ -707,17 +768,17 @@ def _mock_temp_dir(*a, **kw):
 @pytest.mark.parametrize(
     "mock_verify_fn, expected_stdout, expected_retval",
     [
-        pytest.param(_good_build, 'LGTM', 0, id="Configured Correctly"),
+        pytest.param(_good_build, 'LGTM', os.EX_OK, id="Configured Correctly"),
         pytest.param(
             _bad_build,
             "SmartSim failed to run a simple experiment", 
-            2, 
+            os.EX_SOFTWARE,
             id="Configured Incorrectly",
         )
     ],
 )
-def test_cli_build_test_execute(
-    capsys,
+def test_cli_validation_test_execute(
+    caplog,
     monkeypatch,
     mock_verify_fn,
     expected_stdout,
@@ -728,6 +789,7 @@ def test_cli_build_test_execute(
     checks that if at any point the test raises an exception an appropriate error
     code and error msg are returned.
     """
+    caplog.set_level(logging.INFO)
 
     # Mock out the verification tests/avoid file system ops
     monkeypatch.setattr(smartsim._core._cli.validate, "test_install", mock_verify_fn)
@@ -736,11 +798,11 @@ def test_cli_build_test_execute(
         "_VerificationTempDir",
         _mock_temp_dir,
     )
-    # Coloredlogs doesn't play nice with capsys
+    # Coloredlogs doesn't play nice with caplog
     monkeypatch.setattr(
-        smartsim._core._cli.validate.logger,
-        "error",
-        print,
+        smartsim._core._cli.validate,
+        "logger",
+        _TEST_LOGGER,
     )
 
     command = "validate"
@@ -751,12 +813,8 @@ def test_cli_build_test_execute(
     menu = [cfg]
     smart_cli = cli.SmartCli(menu)
 
-    captured = capsys.readouterr()  # throw away existing output
-
     verify_args = ["smart", command]
     actual_retval = smart_cli.execute(verify_args)
 
-    captured = capsys.readouterr()  # capture new output
-
-    assert expected_stdout in captured.out
+    assert expected_stdout in caplog.text
     assert actual_retval == expected_retval
diff --git a/tests/test_colo_model_local.py b/tests/test_colo_model_local.py
index df5f65350..8cd8a575a 100644
--- a/tests/test_colo_model_local.py
+++ b/tests/test_colo_model_local.py
@@ -49,7 +49,8 @@ def test_macosx_warning(fileutils, coloutils):
     db_args = {"custom_pinning": [1]}
     db_type = "uds"  # Test is insensitive to choice of db
 
-    exp = Experiment("colocated_model_defaults", launcher="local")
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment("colocated_model_defaults", launcher="local", exp_path=test_dir)
     with pytest.warns(
         RuntimeWarning,
         match="CPU pinning is not supported on MacOSX. Ignoring pinning specification.",
@@ -67,7 +68,8 @@ def test_unsupported_limit_app(fileutils, coloutils):
     db_args = {"limit_app_cpus": True}
     db_type = "uds"  # Test is insensitive to choice of db
 
-    exp = Experiment("colocated_model_defaults", launcher="local")
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment("colocated_model_defaults", launcher="local", exp_path=test_dir)
     with pytest.raises(SSUnsupportedError):
         coloutils.setup_test_colo(
             fileutils,
@@ -84,7 +86,8 @@ def test_unsupported_custom_pinning(fileutils, coloutils, custom_pinning):
     db_type = "uds"  # Test is insensitive to choice of db
     db_args = {"custom_pinning": custom_pinning}
 
-    exp = Experiment("colocated_model_defaults", launcher="local")
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment("colocated_model_defaults", launcher="local", exp_path=test_dir)
     with pytest.raises(TypeError):
         coloutils.setup_test_colo(
             fileutils,
@@ -120,7 +123,8 @@ def test_launch_colocated_model_defaults(
 
     db_args = {}
 
-    exp = Experiment("colocated_model_defaults", launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment("colocated_model_defaults", launcher=launcher, exp_path=test_dir)
     colo_model = coloutils.setup_test_colo(
         fileutils,
         db_type,
@@ -150,12 +154,12 @@ def test_launch_colocated_model_defaults(
 def test_launch_multiple_colocated_models(
     fileutils, coloutils, wlmutils, db_type, launcher="local"
 ):
-    """Test the concurrent launch of two models with a colocated database and local launcher
-    """
+    """Test the concurrent launch of two models with a colocated database and local launcher"""
 
     db_args = {}
 
-    exp = Experiment("multi_colo_models", launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment("multi_colo_models", launcher=launcher, exp_path=test_dir)
     colo_models = [
         coloutils.setup_test_colo(
             fileutils,
@@ -191,7 +195,10 @@ def test_launch_multiple_colocated_models(
 def test_colocated_model_disable_pinning(
     fileutils, coloutils, db_type, launcher="local"
 ):
-    exp = Experiment("colocated_model_pinning_auto_1cpu", launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        "colocated_model_pinning_auto_1cpu", launcher=launcher, exp_path=test_dir
+    )
     db_args = {
         "db_cpus": 1,
         "custom_pinning": [],
@@ -214,7 +221,10 @@ def test_colocated_model_disable_pinning(
 def test_colocated_model_pinning_auto_2cpu(
     fileutils, coloutils, db_type, launcher="local"
 ):
-    exp = Experiment("colocated_model_pinning_auto_2cpu", launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        "colocated_model_pinning_auto_2cpu", launcher=launcher, exp_path=test_dir
+    )
 
     db_args = {
         "db_cpus": 2,
@@ -245,7 +255,10 @@ def test_colocated_model_pinning_auto_2cpu(
 def test_colocated_model_pinning_range(fileutils, coloutils, db_type, launcher="local"):
     # Check to make sure that the CPU mask was correctly generated
 
-    exp = Experiment("colocated_model_pinning_manual", launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        "colocated_model_pinning_manual", launcher=launcher, exp_path=test_dir
+    )
 
     db_args = {"db_cpus": 2, "custom_pinning": range(2)}
 
@@ -267,7 +280,10 @@ def test_colocated_model_pinning_range(fileutils, coloutils, db_type, launcher="
 def test_colocated_model_pinning_list(fileutils, coloutils, db_type, launcher="local"):
     # Check to make sure that the CPU mask was correctly generated
 
-    exp = Experiment("colocated_model_pinning_manual", launcher=launcher)
+    test_dir = fileutils.make_test_dir()
+    exp = Experiment(
+        "colocated_model_pinning_manual", launcher=launcher, exp_path=test_dir
+    )
 
     db_args = {"db_cpus": 1, "custom_pinning": [1]}
 
diff --git a/tests/test_config.py b/tests/test_config.py
index e33ea7dfd..2321e008f 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -190,3 +190,57 @@ def test_redis_cli():
     with pytest.raises(SSConfigError):
         config.database_cli
     os.environ.pop("REDIS_CLI_PATH")
+
+
+@pytest.mark.parametrize(
+        "value, exp_result", [
+            pytest.param("0", False, id="letter zero"),
+            pytest.param("1", True, id="letter one"),
+            pytest.param("-1", False, id="letter negative one"),
+            pytest.param(None, False, id="not in env"),
+        ]
+)
+def test_telemetry_flag(monkeypatch: pytest.MonkeyPatch, 
+                        value: t.Optional[str],
+                        exp_result: bool):
+    if value is not None:
+        monkeypatch.setenv("SMARTSIM_FLAG_TELEMETRY", value)
+    else:
+        monkeypatch.delenv("SMARTSIM_FLAG_TELEMETRY", raising=False)
+    config = Config()
+    assert config.telemetry_enabled == exp_result
+
+@pytest.mark.parametrize(
+    "value, exp_result", [
+        pytest.param("1", 1, id="1"),
+        pytest.param("123", 123, id="123"),
+        pytest.param(None, 5, id="not in env"),
+    ]
+)
+def test_telemetry_frequency(
+    monkeypatch: pytest.MonkeyPatch, value: t.Optional[str], exp_result: int
+):
+    if value is not None:
+        monkeypatch.setenv("SMARTSIM_TELEMETRY_FREQUENCY", value)
+    else:
+        monkeypatch.delenv("SMARTSIM_TELEMETRY_FREQUENCY", raising=False)
+    config = Config()
+    assert config.telemetry_frequency == exp_result
+
+
+@pytest.mark.parametrize(
+    "value, exp_result", [
+        pytest.param("30", 30, id="30"),
+        pytest.param("123", 123, id="123"),
+        pytest.param(None, 90, id="not in env"),
+    ]
+)
+def test_telemetry_cooldown(
+    monkeypatch: pytest.MonkeyPatch, value: t.Optional[str], exp_result: bool
+):
+    if value is not None:
+        monkeypatch.setenv("SMARTSIM_TELEMETRY_COOLDOWN", value)
+    else:
+        monkeypatch.delenv("SMARTSIM_TELEMETRY_COOLDOWN", raising=False)
+    config = Config()
+    assert config.telemetry_cooldown == exp_result
diff --git a/tests/test_configs/echo.py b/tests/test_configs/echo.py
new file mode 100644
index 000000000..8a34a0b6f
--- /dev/null
+++ b/tests/test_configs/echo.py
@@ -0,0 +1,42 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2023, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import time
+
+
+def echo(message: str, sleep_time: int):
+    if sleep_time > 0:
+        time.sleep(sleep_time)
+    print(f"Echoing: {message}")
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--message", type=str, default="Lorem ipsum")
+    parser.add_argument("--sleep_time", type=int, default=0)
+    args = parser.parse_args()
+    echo(args.message, args.sleep_time)
diff --git a/tests/test_configs/printing_model.py b/tests/test_configs/printing_model.py
new file mode 100644
index 000000000..044b2a03b
--- /dev/null
+++ b/tests/test_configs/printing_model.py
@@ -0,0 +1,18 @@
+import time
+import sys
+
+
+def main() -> int:
+    print(";START;")
+    time.sleep(20)
+    print(";MID;")
+    print("This is an error msg", file=sys.stderr)
+    time.sleep(20)
+    print(";END;")
+
+    print("yay!!")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/test_configs/telemetry/colocatedmodel.json b/tests/test_configs/telemetry/colocatedmodel.json
new file mode 100644
index 000000000..f3e93ac76
--- /dev/null
+++ b/tests/test_configs/telemetry/colocatedmodel.json
@@ -0,0 +1,69 @@
+{
+  "schema info": {
+    "schema_name": "entity manifest",
+    "version": "0.0.1"
+  },
+  "experiment": {
+    "name": "my-exp",
+    "path": "/tmp/my-exp",
+    "launcher": "Slurm"
+  },
+  "runs": [
+    {
+      "run_id": "002816b",
+      "timestamp": 1699037041106269774,
+      "model": [
+        {
+          "name": "colocated_model",
+          "path": "/tmp/my-exp/colocated_model",
+          "exe_args": [
+            "/path/to/my/script.py"
+          ],
+          "run_settings": {
+            "exe": [
+              "/path/to/my/python"
+            ],
+            "run_command": "/opt/slurm/20.11.5/bin/srun",
+            "run_args": {}
+          },
+          "batch_settings": {},
+          "params": {},
+          "files": {
+            "Symlink": [],
+            "Configure": [],
+            "Copy": []
+          },
+          "colocated_db": {
+            "settings": {
+              "unix_socket": "/tmp/redis.socket",
+              "socket_permissions": 755,
+              "port": 0,
+              "cpus": 1,
+              "custom_pinning": "0",
+              "debug": false,
+              "db_identifier": "",
+              "rai_args": {
+                "threads_per_queue": null,
+                "inter_op_parallelism": null,
+                "intra_op_parallelism": null
+              },
+              "extra_db_args": {}
+            },
+            "scripts": [],
+            "models": []
+          },
+          "telemetry_metadata": {
+            "status_dir": "/tmp/my-exp/.smartsim/telemetry/telemetry_ensemble/002816b/model/colocated_model",
+            "step_id": "4139111.21",
+            "task_id": "21529",
+            "managed": true
+          },
+          "out_file": "/tmp/my-exp/colocated_model/colocated_model.out",
+          "err_file": "/tmp/my-exp/colocated_model/colocated_model.err"
+        }
+      ],
+      "orchestrator": [],
+      "ensemble": []
+    }
+  ]
+}
diff --git a/tests/test_configs/telemetry/db_and_model.json b/tests/test_configs/telemetry/db_and_model.json
new file mode 100644
index 000000000..58c1c841a
--- /dev/null
+++ b/tests/test_configs/telemetry/db_and_model.json
@@ -0,0 +1,86 @@
+{
+    "schema info": {
+        "schema_name": "entity manifest",
+        "version": "0.0.1"
+    },
+    "experiment": {
+        "name": "my-exp",
+        "path": "/tmp/my-exp",
+        "launcher": "Slurm"
+    },
+    "runs": [
+        {
+            "run_id": "2ca19ad",
+            "timestamp": 1699038647234488933,
+            "model": [],
+            "orchestrator": [
+                {
+                    "name": "orchestrator",
+                    "type": "redis",
+                    "interface": [
+                        "ipogif0"
+                    ],
+                    "shards": [
+                        {
+                            "name": "orchestrator_0",
+                            "hostname": "10.128.0.4",
+                            "port": 6780,
+                            "cluster": false,
+                            "conf_file": null,
+                            "out_file": "/path/to/some/file.out",
+                            "err_file": "/path/to/some/file.err",
+                            "telemetry_metadata": {
+                                "status_dir": "/tmp/my-exp/.smartsim/telemetry/telemetry_db_and_model/2ca19ad/database/orchestrator/orchestrator_0",
+                                "step_id": "4139111.27",
+                                "task_id": "1452",
+                                "managed": true
+                            }
+                        }
+                    ]
+                }
+            ],
+            "ensemble": []
+        },
+        {
+            "run_id": "4b5507a",
+            "timestamp": 1699038661491043211,
+            "model": [
+                {
+                    "name": "perroquet",
+                    "path": "/tmp/my-exp/perroquet",
+                    "exe_args": [
+                        "/path/to/my/script.py"
+                    ],
+                    "run_settings": {
+                        "exe": [
+                            "/path/to/my/python"
+                        ],
+                        "run_command": "/opt/slurm/20.11.5/bin/srun",
+                        "run_args": {
+                            "nodes": 1,
+                            "ntasks-per-node": 1
+                        }
+                    },
+                    "batch_settings": {},
+                    "params": {},
+                    "files": {
+                        "Symlink": [],
+                        "Configure": [],
+                        "Copy": []
+                    },
+                    "colocated_db": {},
+                    "telemetry_metadata": {
+                        "status_dir": "/tmp/my-exp/.smartsim/telemetry/telemetry_db_and_model/4b5507a/model/perroquet",
+                        "step_id": "4139111.28",
+                        "task_id": "2929",
+                        "managed": true
+                    },
+                    "out_file": "/tmp/my-exp/perroquet/perroquet.out",
+                    "err_file": "/tmp/my-exp/perroquet/perroquet.err"
+                }
+            ],
+            "orchestrator": [],
+            "ensemble": []
+        }
+    ]
+}
diff --git a/tests/test_configs/telemetry/db_and_model_1run.json b/tests/test_configs/telemetry/db_and_model_1run.json
new file mode 100644
index 000000000..44e32bfe4
--- /dev/null
+++ b/tests/test_configs/telemetry/db_and_model_1run.json
@@ -0,0 +1,79 @@
+{
+  "schema info": {
+    "schema_name": "entity manifest",
+    "version": "0.0.1"
+  },
+  "experiment": {
+    "name": "my-exp",
+    "path": "/tmp/my-exp",
+    "launcher": "Slurm"
+  },
+  "runs": [
+    {
+      "run_id": "4b5507a",
+      "timestamp": 1699038661491043211,
+      "model": [
+        {
+          "name": "perroquet",
+          "path": "/tmp/my-exp/perroquet",
+          "exe_args": [
+            "/path/to/my/script.py"
+          ],
+          "run_settings": {
+            "exe": [
+              "/path/to/my/python"
+            ],
+            "run_command": "/opt/slurm/20.11.5/bin/srun",
+            "run_args": {
+              "nodes": 1,
+              "ntasks-per-node": 1
+            }
+          },
+          "batch_settings": {},
+          "params": {},
+          "files": {
+            "Symlink": [],
+            "Configure": [],
+            "Copy": []
+          },
+          "colocated_db": {},
+          "telemetry_metadata": {
+            "status_dir": "/tmp/my-exp/.smartsim/telemetry/telemetry_db_and_model/4b5507a/model/perroquet",
+            "step_id": "4139111.28",
+            "task_id": "2929",
+            "managed": true
+          },
+          "out_file": "/tmp/my-exp/perroquet/perroquet.out",
+          "err_file": "/tmp/my-exp/perroquet/perroquet.err"
+        }
+      ],
+      "orchestrator": [
+        {
+          "name": "orchestrator",
+          "type": "redis",
+          "interface": [
+            "ipogif0"
+          ],
+          "shards": [
+            {
+              "name": "orchestrator_0",
+              "hostname": "10.128.0.4",
+              "port": 6780,
+              "cluster": false,
+              "conf_file": null,
+              "out_file": "/path/to/some/file.out",
+              "err_file": "/path/to/some/file.err",
+              "telemetry_metadata": {
+                "status_dir": "/tmp/my-exp/.smartsim/telemetry/telemetry_db_and_model/2ca19ad/database/orchestrator/orchestrator_0",
+                "step_id": "4139111.27",
+                "task_id": "1452",
+                "managed": true
+              }
+            }
+          ]
+        }
+      ],
+      "ensemble": []
+    }
+  ]
+}
diff --git a/tests/test_configs/telemetry/ensembles.json b/tests/test_configs/telemetry/ensembles.json
new file mode 100644
index 000000000..841324ec6
--- /dev/null
+++ b/tests/test_configs/telemetry/ensembles.json
@@ -0,0 +1,329 @@
+{
+    "schema info": {
+      "schema_name": "entity manifest",
+      "version": "0.0.1"
+    },
+    "experiment": {
+      "name": "my-exp",
+      "path": "/home/someuser/code/ss/my-exp",
+      "launcher": "Local"
+    },
+    "runs": [
+      {
+        "run_id": "d041b90",
+        "timestamp": 1698679830384608928,
+        "model": [],
+        "orchestrator": [],
+        "ensemble": [
+          {
+            "name": "my-ens",
+            "params": {
+              "START": [
+                "spam",
+                "foo"
+              ],
+              "MID": [
+                "eggs",
+                "bar"
+              ],
+              "END": [
+                "ham",
+                "baz"
+              ]
+            },
+            "batch_settings": {},
+            "models": [
+              {
+                "name": "my-ens_0",
+                "path": "/home/someuser/code/ss",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
+                  ],
+                  "run_command": null,
+                  "run_args": {}
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "spam",
+                  "MID": "eggs",
+                  "END": "ham"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/home/someuser/code/ss/manifest/demo/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_0",
+                  "step_id": null,
+                  "task_id": "88118",
+                  "managed": false
+                },
+                "out_file": "/home/someuser/code/ss/my-ens_0.out",
+                "err_file": "/home/someuser/code/ss/my-ens_0.err"
+              },
+              {
+                "name": "my-ens_1",
+                "path": "/home/someuser/code/ss",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
+                  ],
+                  "run_command": null,
+                  "run_args": {}
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "spam",
+                  "MID": "eggs",
+                  "END": "baz"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/home/someuser/code/ss/manifest/demo/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_1",
+                  "step_id": null,
+                  "task_id": "88131",
+                  "managed": false
+                },
+                "out_file": "/home/someuser/code/ss/my-ens_1.out",
+                "err_file": "/home/someuser/code/ss/my-ens_1.err"
+              },
+              {
+                "name": "my-ens_2",
+                "path": "/home/someuser/code/ss",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
+                  ],
+                  "run_command": null,
+                  "run_args": {}
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "spam",
+                  "MID": "bar",
+                  "END": "ham"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/home/someuser/code/ss/manifest/demo/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_2",
+                  "step_id": null,
+                  "task_id": "88146",
+                  "managed": false
+                },
+                "out_file": "/home/someuser/code/ss/my-ens_2.out",
+                "err_file": "/home/someuser/code/ss/my-ens_2.err"
+              },
+              {
+                "name": "my-ens_3",
+                "path": "/home/someuser/code/ss",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
+                  ],
+                  "run_command": null,
+                  "run_args": {}
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "spam",
+                  "MID": "bar",
+                  "END": "baz"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/home/someuser/code/ss/manifest/demo/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_3",
+                  "step_id": null,
+                  "task_id": "88170",
+                  "managed": false
+                },
+                "out_file": "/home/someuser/code/ss/my-ens_3.out",
+                "err_file": "/home/someuser/code/ss/my-ens_3.err"
+              },
+              {
+                "name": "my-ens_4",
+                "path": "/home/someuser/code/ss",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
+                  ],
+                  "run_command": null,
+                  "run_args": {}
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "foo",
+                  "MID": "eggs",
+                  "END": "ham"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/home/someuser/code/ss/manifest/demo/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_4",
+                  "step_id": null,
+                  "task_id": "88178",
+                  "managed": false
+                },
+                "out_file": "/home/someuser/code/ss/my-ens_4.out",
+                "err_file": "/home/someuser/code/ss/my-ens_4.err"
+              },
+              {
+                "name": "my-ens_5",
+                "path": "/home/someuser/code/ss",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
+                  ],
+                  "run_command": null,
+                  "run_args": {}
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "foo",
+                  "MID": "eggs",
+                  "END": "baz"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/home/someuser/code/ss/manifest/demo/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_5",
+                  "step_id": null,
+                  "task_id": "88193",
+                  "managed": false
+                },
+                "out_file": "/home/someuser/code/ss/my-ens_5.out",
+                "err_file": "/home/someuser/code/ss/my-ens_5.err"
+              },
+              {
+                "name": "my-ens_6",
+                "path": "/home/someuser/code/ss",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
+                  ],
+                  "run_command": null,
+                  "run_args": {}
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "foo",
+                  "MID": "bar",
+                  "END": "ham"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/home/someuser/code/ss/manifest/demo/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_6",
+                  "step_id": null,
+                  "task_id": "88221",
+                  "managed": false
+                },
+                "out_file": "/home/someuser/code/ss/my-ens_6.out",
+                "err_file": "/home/someuser/code/ss/my-ens_6.err"
+              },
+              {
+                "name": "my-ens_7",
+                "path": "/home/someuser/code/ss",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
+                  ],
+                  "run_command": null,
+                  "run_args": {}
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "foo",
+                  "MID": "bar",
+                  "END": "baz"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/home/someuser/code/ss/manifest/demo/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_7",
+                  "step_id": null,
+                  "task_id": "88241",
+                  "managed": false
+                },
+                "out_file": "/home/someuser/code/ss/my-ens_7.out",
+                "err_file": "/home/someuser/code/ss/my-ens_7.err"
+              }
+            ]
+          }
+        ]
+      }
+    ]
+  }
diff --git a/tests/test_configs/telemetry/serialmodels.json b/tests/test_configs/telemetry/serialmodels.json
new file mode 100644
index 000000000..40337eceb
--- /dev/null
+++ b/tests/test_configs/telemetry/serialmodels.json
@@ -0,0 +1,186 @@
+{
+    "schema info": {
+        "schema_name": "entity manifest",
+        "version": "0.0.1"
+    },
+    "experiment": {
+        "name": "my-exp",
+        "path": "/tmp/my-exp",
+        "launcher": "Slurm"
+    },
+    "runs": [
+        {
+            "run_id": "8c0fbb1",
+            "timestamp": 1699037881502730708,
+            "model": [
+                {
+                    "name": "perroquet_0",
+                    "path": "/tmp/my-exp/perroquet_0",
+                    "exe_args": [
+                        "/tmp/echo.py"
+                    ],
+                    "run_settings": {
+                        "exe": [
+                            "/path/to/some/python"
+                        ],
+                        "run_command": "/opt/slurm/20.11.5/bin/srun",
+                        "run_args": {
+                            "nodes": 1,
+                            "ntasks-per-node": 1
+                        }
+                    },
+                    "batch_settings": {},
+                    "params": {},
+                    "files": {
+                        "Symlink": [],
+                        "Configure": [],
+                        "Copy": []
+                    },
+                    "colocated_db": {},
+                    "telemetry_metadata": {
+                        "status_dir": "/tmp/my-exp/.smartsim/telemetry/telemetry_serial_models/8c0fbb1/model/perroquet_0",
+                        "step_id": "4139111.22",
+                        "task_id": "17966",
+                        "managed": true
+                    },
+                    "out_file": "/tmp/my-exp/perroquet_0/perroquet_0.out",
+                    "err_file": "/tmp/my-exp/perroquet_0/perroquet_0.err"
+                },
+                {
+                    "name": "perroquet_1",
+                    "path": "/tmp/my-exp/perroquet_1",
+                    "exe_args": [
+                        "/tmp/echo.py"
+                    ],
+                    "run_settings": {
+                        "exe": [
+                            "/path/to/some/python"
+                        ],
+                        "run_command": "/opt/slurm/20.11.5/bin/srun",
+                        "run_args": {
+                            "nodes": 1,
+                            "ntasks-per-node": 1
+                        }
+                    },
+                    "batch_settings": {},
+                    "params": {},
+                    "files": {
+                        "Symlink": [],
+                        "Configure": [],
+                        "Copy": []
+                    },
+                    "colocated_db": {},
+                    "telemetry_metadata": {
+                        "status_dir": "/tmp/my-exp/.smartsim/telemetry/telemetry_serial_models/8c0fbb1/model/perroquet_1",
+                        "step_id": "4139111.23",
+                        "task_id": "18100",
+                        "managed": true
+                    },
+                    "out_file": "/tmp/my-exp/perroquet_1/perroquet_1.out",
+                    "err_file": "/tmp/my-exp/perroquet_1/perroquet_1.err"
+                },
+                {
+                    "name": "perroquet_2",
+                    "path": "/tmp/my-exp/perroquet_2",
+                    "exe_args": [
+                        "/tmp/echo.py"
+                    ],
+                    "run_settings": {
+                        "exe": [
+                            "/path/to/some/python"
+                        ],
+                        "run_command": "/opt/slurm/20.11.5/bin/srun",
+                        "run_args": {
+                            "nodes": 1,
+                            "ntasks-per-node": 1
+                        }
+                    },
+                    "batch_settings": {},
+                    "params": {},
+                    "files": {
+                        "Symlink": [],
+                        "Configure": [],
+                        "Copy": []
+                    },
+                    "colocated_db": {},
+                    "telemetry_metadata": {
+                        "status_dir": "/tmp/my-exp/.smartsim/telemetry/telemetry_serial_models/8c0fbb1/model/perroquet_2",
+                        "step_id": "4139111.24",
+                        "task_id": "18159",
+                        "managed": true
+                    },
+                    "out_file": "/tmp/my-exp/perroquet_2/perroquet_2.out",
+                    "err_file": "/tmp/my-exp/perroquet_2/perroquet_2.err"
+                },
+                {
+                    "name": "perroquet_3",
+                    "path": "/tmp/my-exp/perroquet_3",
+                    "exe_args": [
+                        "/tmp/echo.py"
+                    ],
+                    "run_settings": {
+                        "exe": [
+                            "/path/to/some/python"
+                        ],
+                        "run_command": "/opt/slurm/20.11.5/bin/srun",
+                        "run_args": {
+                            "nodes": 1,
+                            "ntasks-per-node": 1
+                        }
+                    },
+                    "batch_settings": {},
+                    "params": {},
+                    "files": {
+                        "Symlink": [],
+                        "Configure": [],
+                        "Copy": []
+                    },
+                    "colocated_db": {},
+                    "telemetry_metadata": {
+                        "status_dir": "/tmp/my-exp/.smartsim/telemetry/telemetry_serial_models/8c0fbb1/model/perroquet_3",
+                        "step_id": "4139111.25",
+                        "task_id": "18499",
+                        "managed": true
+                    },
+                    "out_file": "/tmp/my-exp/perroquet_3/perroquet_3.out",
+                    "err_file": "/tmp/my-exp/perroquet_3/perroquet_3.err"
+                },
+                {
+                    "name": "perroquet_4",
+                    "path": "/tmp/my-exp/perroquet_4",
+                    "exe_args": [
+                        "/tmp/echo.py"
+                    ],
+                    "run_settings": {
+                        "exe": [
+                            "/path/to/some/python"
+                        ],
+                        "run_command": "/opt/slurm/20.11.5/bin/srun",
+                        "run_args": {
+                            "nodes": 1,
+                            "ntasks-per-node": 1
+                        }
+                    },
+                    "batch_settings": {},
+                    "params": {},
+                    "files": {
+                        "Symlink": [],
+                        "Configure": [],
+                        "Copy": []
+                    },
+                    "colocated_db": {},
+                    "telemetry_metadata": {
+                        "status_dir": "/tmp/my-exp/.smartsim/telemetry/telemetry_serial_models/8c0fbb1/model/perroquet_4",
+                        "step_id": "4139111.26",
+                        "task_id": "18832",
+                        "managed": true
+                    },
+                    "out_file": "/tmp/my-exp/perroquet_4/perroquet_4.out",
+                    "err_file": "/tmp/my-exp/perroquet_4/perroquet_4.err"
+                }
+            ],
+            "orchestrator": [],
+            "ensemble": []
+        }
+    ]
+}
diff --git a/tests/test_configs/telemetry/telemetry.json b/tests/test_configs/telemetry/telemetry.json
new file mode 100644
index 000000000..a380bc5fb
--- /dev/null
+++ b/tests/test_configs/telemetry/telemetry.json
@@ -0,0 +1,946 @@
+{
+    "experiment": {
+      "name": "my-exp",
+      "path": "/path/to/my-exp",
+      "launcher": "Slurm"
+    },
+    "runs": [
+      {
+        "run_id": "d999ad89-020f-4e6a-b834-dbd88658ce84",
+        "timestamp": 1697824072792854287,
+        "model": [
+          {
+            "name": "my-model",
+            "path": "/path/to/my-exp/my-model",
+            "exe_args": [
+              "hello",
+              "world"
+            ],
+            "run_settings": {
+              "exe": [
+                "/usr/bin/echo"
+              ],
+              "run_command": "/opt/slurm/20.11.5/bin/srun",
+              "run_args": {
+                "nodes": 1,
+                "ntasks": 1
+              }
+            },
+            "batch_settings": {},
+            "params": {},
+            "files": {
+              "Symlink": [],
+              "Configure": [],
+              "Copy": []
+            },
+            "colocated_db": {
+              "settings": {
+                "port": 5757,
+                "ifname": "lo",
+                "cpus": 1,
+                "custom_pinning": "0",
+                "debug": false,
+                "db_identifier": "COLO",
+                "rai_args": {
+                  "threads_per_queue": null,
+                  "inter_op_parallelism": null,
+                  "intra_op_parallelism": null
+                },
+                "extra_db_args": {}
+              },
+              "scripts": [],
+              "models": [
+                {
+                  "cnn": {
+                    "backend": "TORCH",
+                    "device": "CPU"
+                  }
+                }
+              ]
+            },
+            "telemetry_metadata": {
+              "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d999ad89-020f-4e6a-b834-dbd88658ce84/model/my-model",
+              "step_id": "4121050.30",
+              "task_id": "25230",
+              "managed": true
+            },
+            "out_file": "/path/to/my-exp/my-model/my-model.out",
+            "err_file": "/path/to/my-exp/my-model/my-model.err"
+          }
+        ],
+        "orchestrator": [],
+        "ensemble": []
+      },
+      {
+        "run_id": "fd3cd1a8-cb8f-4f61-b847-73a8eb0881fa",
+        "timestamp": 1697824102122439975,
+        "model": [],
+        "orchestrator": [
+          {
+            "name": "orchestrator",
+            "type": "redis",
+            "interface": [
+              "ipogif0"
+            ],
+            "shards": [
+              {
+                "name": "orchestrator_1",
+                "hostname": "10.128.0.70",
+                "port": 2424,
+                "cluster": true,
+                "conf_file": "nodes-orchestrator_1-2424.conf",
+                "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
+                "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/fd3cd1a8-cb8f-4f61-b847-73a8eb0881fa/database/orchestrator/orchestrator",
+                  "step_id": "4121050.31+2",
+                  "task_id": "25241",
+                  "managed": true
+                }
+              },
+              {
+                "name": "orchestrator_2",
+                "hostname": "10.128.0.71",
+                "port": 2424,
+                "cluster": true,
+                "conf_file": "nodes-orchestrator_2-2424.conf",
+                "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
+                "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/fd3cd1a8-cb8f-4f61-b847-73a8eb0881fa/database/orchestrator/orchestrator",
+                  "step_id": "4121050.31+2",
+                  "task_id": "25241",
+                  "managed": true
+                }
+              },
+              {
+                "name": "orchestrator_0",
+                "hostname": "10.128.0.69",
+                "port": 2424,
+                "cluster": true,
+                "conf_file": "nodes-orchestrator_0-2424.conf",
+                "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
+                "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/fd3cd1a8-cb8f-4f61-b847-73a8eb0881fa/database/orchestrator/orchestrator",
+                  "step_id": "4121050.31+2",
+                  "task_id": "25241",
+                  "managed": true
+                }
+              }
+            ]
+          }
+        ],
+        "ensemble": []
+      },
+      {
+        "run_id": "d65ae1df-cb5e-45d9-ab09-6fa641755997",
+        "timestamp": 1697824127962219505,
+        "model": [],
+        "orchestrator": [],
+        "ensemble": [
+          {
+            "name": "my-ens",
+            "params": {
+              "START": [
+                "spam",
+                "foo"
+              ],
+              "MID": [
+                "eggs",
+                "bar"
+              ],
+              "END": [
+                "ham",
+                "baz"
+              ]
+            },
+            "batch_settings": {},
+            "models": [
+              {
+                "name": "my-ens_0",
+                "path": "/path/to/my-exp/my-ens/my-ens_0",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "spam",
+                  "MID": "eggs",
+                  "END": "ham"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_0",
+                  "step_id": "4121050.32",
+                  "task_id": "25639",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_0/my-ens_0.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_0/my-ens_0.err"
+              },
+              {
+                "name": "my-ens_1",
+                "path": "/path/to/my-exp/my-ens/my-ens_1",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "spam",
+                  "MID": "eggs",
+                  "END": "baz"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_1",
+                  "step_id": "4121050.33",
+                  "task_id": "25768",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_1/my-ens_1.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_1/my-ens_1.err"
+              },
+              {
+                "name": "my-ens_2",
+                "path": "/path/to/my-exp/my-ens/my-ens_2",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "spam",
+                  "MID": "bar",
+                  "END": "ham"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_2",
+                  "step_id": "4121050.34",
+                  "task_id": "25817",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_2/my-ens_2.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_2/my-ens_2.err"
+              },
+              {
+                "name": "my-ens_3",
+                "path": "/path/to/my-exp/my-ens/my-ens_3",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "spam",
+                  "MID": "bar",
+                  "END": "baz"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_3",
+                  "step_id": "4121050.35",
+                  "task_id": "25837",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_3/my-ens_3.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_3/my-ens_3.err"
+              },
+              {
+                "name": "my-ens_4",
+                "path": "/path/to/my-exp/my-ens/my-ens_4",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "foo",
+                  "MID": "eggs",
+                  "END": "ham"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_4",
+                  "step_id": "4121050.36",
+                  "task_id": "25872",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_4/my-ens_4.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_4/my-ens_4.err"
+              },
+              {
+                "name": "my-ens_5",
+                "path": "/path/to/my-exp/my-ens/my-ens_5",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "foo",
+                  "MID": "eggs",
+                  "END": "baz"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_5",
+                  "step_id": "4121050.37",
+                  "task_id": "25930",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_5/my-ens_5.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_5/my-ens_5.err"
+              },
+              {
+                "name": "my-ens_6",
+                "path": "/path/to/my-exp/my-ens/my-ens_6",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "foo",
+                  "MID": "bar",
+                  "END": "ham"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_6",
+                  "step_id": "4121050.38",
+                  "task_id": "25945",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_6/my-ens_6.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_6/my-ens_6.err"
+              },
+              {
+                "name": "my-ens_7",
+                "path": "/path/to/my-exp/my-ens/my-ens_7",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "foo",
+                  "MID": "bar",
+                  "END": "baz"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_7",
+                  "step_id": "4121050.39",
+                  "task_id": "25967",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_7/my-ens_7.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_7/my-ens_7.err"
+              }
+            ]
+          }
+        ]
+      },
+      {
+        "run_id": "e41f8e17-c4b2-441d-adf9-707443ee2c72",
+        "timestamp": 1697835227560376025,
+        "model": [
+          {
+            "name": "my-model",
+            "path": "/path/to/my-exp/my-model",
+            "exe_args": [
+              "hello",
+              "world"
+            ],
+            "run_settings": {
+              "exe": [
+                "/usr/bin/echo"
+              ],
+              "run_command": "/opt/slurm/20.11.5/bin/srun",
+              "run_args": {
+                "nodes": 1,
+                "ntasks": 1
+              }
+            },
+            "batch_settings": {},
+            "params": {},
+            "files": {
+              "Symlink": [],
+              "Configure": [],
+              "Copy": []
+            },
+            "colocated_db": {
+              "settings": {
+                "port": 5757,
+                "ifname": "lo",
+                "cpus": 1,
+                "custom_pinning": "0",
+                "debug": false,
+                "db_identifier": "COLO",
+                "rai_args": {
+                  "threads_per_queue": null,
+                  "inter_op_parallelism": null,
+                  "intra_op_parallelism": null
+                },
+                "extra_db_args": {}
+              },
+              "scripts": [],
+              "models": [
+                {
+                  "cnn": {
+                    "backend": "TORCH",
+                    "device": "CPU"
+                  }
+                }
+              ]
+            },
+            "telemetry_metadata": {
+              "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/e41f8e17-c4b2-441d-adf9-707443ee2c72/model/my-model",
+              "step_id": "4121904.0",
+              "task_id": "28277",
+              "managed": true
+            },
+            "out_file": "/path/to/my-exp/my-model/my-model.out",
+            "err_file": "/path/to/my-exp/my-model/my-model.err"
+          }
+        ],
+        "orchestrator": [],
+        "ensemble": []
+      },
+      {
+        "run_id": "b33a5d27-6822-4795-8e0e-cfea18551fa4",
+        "timestamp": 1697835261956135240,
+        "model": [],
+        "orchestrator": [
+          {
+            "name": "orchestrator",
+            "type": "redis",
+            "interface": [
+              "ipogif0"
+            ],
+            "shards": [
+              {
+                "name": "orchestrator_0",
+                "hostname": "10.128.0.2",
+                "port": 2424,
+                "cluster": true,
+                "conf_file": "nodes-orchestrator_0-2424.conf",
+                "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
+                "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/b33a5d27-6822-4795-8e0e-cfea18551fa4/database/orchestrator/orchestrator",
+                  "step_id": "4121904.1+2",
+                  "task_id": "28289",
+                  "managed": true
+                }
+              },
+              {
+                "name": "orchestrator_2",
+                "hostname": "10.128.0.4",
+                "port": 2424,
+                "cluster": true,
+                "conf_file": "nodes-orchestrator_2-2424.conf",
+                "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
+                "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/b33a5d27-6822-4795-8e0e-cfea18551fa4/database/orchestrator/orchestrator",
+                  "step_id": "4121904.1+2",
+                  "task_id": "28289",
+                  "managed": true
+                }
+              },
+              {
+                "name": "orchestrator_1",
+                "hostname": "10.128.0.3",
+                "port": 2424,
+                "cluster": true,
+                "conf_file": "nodes-orchestrator_1-2424.conf",
+                "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
+                "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/b33a5d27-6822-4795-8e0e-cfea18551fa4/database/orchestrator/orchestrator",
+                  "step_id": "4121904.1+2",
+                  "task_id": "28289",
+                  "managed": true
+                }
+              }
+            ]
+          }
+        ],
+        "ensemble": []
+      },
+      {
+        "run_id": "45772df2-fd80-43fd-adf0-d5e319870182",
+        "timestamp": 1697835287798613875,
+        "model": [],
+        "orchestrator": [],
+        "ensemble": [
+          {
+            "name": "my-ens",
+            "params": {
+              "START": [
+                "spam",
+                "foo"
+              ],
+              "MID": [
+                "eggs",
+                "bar"
+              ],
+              "END": [
+                "ham",
+                "baz"
+              ]
+            },
+            "batch_settings": {},
+            "models": [
+              {
+                "name": "my-ens_0",
+                "path": "/path/to/my-exp/my-ens/my-ens_0",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "spam",
+                  "MID": "eggs",
+                  "END": "ham"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_0",
+                  "step_id": "4121904.2",
+                  "task_id": "28333",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_0/my-ens_0.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_0/my-ens_0.err"
+              },
+              {
+                "name": "my-ens_1",
+                "path": "/path/to/my-exp/my-ens/my-ens_1",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "spam",
+                  "MID": "eggs",
+                  "END": "baz"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_1",
+                  "step_id": "4121904.3",
+                  "task_id": "28342",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_1/my-ens_1.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_1/my-ens_1.err"
+              },
+              {
+                "name": "my-ens_2",
+                "path": "/path/to/my-exp/my-ens/my-ens_2",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "spam",
+                  "MID": "bar",
+                  "END": "ham"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_2",
+                  "step_id": "4121904.4",
+                  "task_id": "28353",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_2/my-ens_2.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_2/my-ens_2.err"
+              },
+              {
+                "name": "my-ens_3",
+                "path": "/path/to/my-exp/my-ens/my-ens_3",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "spam",
+                  "MID": "bar",
+                  "END": "baz"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_3",
+                  "step_id": "4121904.5",
+                  "task_id": "28362",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_3/my-ens_3.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_3/my-ens_3.err"
+              },
+              {
+                "name": "my-ens_4",
+                "path": "/path/to/my-exp/my-ens/my-ens_4",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "foo",
+                  "MID": "eggs",
+                  "END": "ham"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_4",
+                  "step_id": "4121904.6",
+                  "task_id": "28371",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_4/my-ens_4.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_4/my-ens_4.err"
+              },
+              {
+                "name": "my-ens_5",
+                "path": "/path/to/my-exp/my-ens/my-ens_5",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "foo",
+                  "MID": "eggs",
+                  "END": "baz"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_5",
+                  "step_id": "4121904.7",
+                  "task_id": "28380",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_5/my-ens_5.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_5/my-ens_5.err"
+              },
+              {
+                "name": "my-ens_6",
+                "path": "/path/to/my-exp/my-ens/my-ens_6",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "foo",
+                  "MID": "bar",
+                  "END": "ham"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_6",
+                  "step_id": "4121904.8",
+                  "task_id": "28389",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_6/my-ens_6.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_6/my-ens_6.err"
+              },
+              {
+                "name": "my-ens_7",
+                "path": "/path/to/my-exp/my-ens/my-ens_7",
+                "exe_args": [
+                  "yo.py"
+                ],
+                "run_settings": {
+                  "exe": [
+                    "/path/to/my/python3"
+                  ],
+                  "run_command": "/opt/slurm/20.11.5/bin/srun",
+                  "run_args": {
+                    "nodes": 1,
+                    "ntasks": 1
+                  }
+                },
+                "batch_settings": {},
+                "params": {
+                  "START": "foo",
+                  "MID": "bar",
+                  "END": "baz"
+                },
+                "files": {
+                  "Symlink": [],
+                  "Configure": [
+                    "/path/to/yo.py"
+                  ],
+                  "Copy": []
+                },
+                "colocated_db": {},
+                "telemetry_metadata": {
+                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_7",
+                  "step_id": "4121904.9",
+                  "task_id": "28398",
+                  "managed": true
+                },
+                "out_file": "/path/to/my-exp/my-ens/my-ens_7/my-ens_7.out",
+                "err_file": "/path/to/my-exp/my-ens/my-ens_7/my-ens_7.err"
+              }
+            ]
+          }
+        ]
+      }
+    ]
+  }
+  
diff --git a/tests/test_controller.py b/tests/test_controller.py
new file mode 100644
index 000000000..c00adce91
--- /dev/null
+++ b/tests/test_controller.py
@@ -0,0 +1,68 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2023, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import pathlib
+
+from smartsim._core.control.controller import Controller
+from smartsim.settings.slurmSettings import SbatchSettings, SrunSettings
+from smartsim._core.launcher.step import Step
+from smartsim.entity.ensemble import Ensemble
+from smartsim.database.orchestrator import Orchestrator
+
+controller = Controller()
+
+rs = SrunSettings('echo', ['spam', 'eggs'])
+bs = SbatchSettings()
+
+ens = Ensemble("ens", params={}, run_settings=rs, batch_settings=bs, replicas=3)
+orc = Orchestrator(db_nodes=3, batch=True, launcher="slurm", run_command="srun")
+
+class MockStep(Step):
+    @staticmethod
+    def _create_unique_name(name):
+        return name
+
+    def add_to_batch(self, step):
+        ...
+
+    def get_launch_cmd(self):
+        return []
+
+@pytest.mark.parametrize("collection", [
+    pytest.param(ens, id="Ensemble"),
+    pytest.param(orc, id="Database"),
+])
+def test_controller_batch_step_creation_preserves_entity_order(collection, monkeypatch):
+    monkeypatch.setattr(controller._launcher, "create_step",
+                        lambda name, path, settings: MockStep(name, path, settings))
+    entity_names = [x.name for x in collection.entities]
+    assert len(entity_names) == len(set(entity_names))
+    _, steps = controller._create_batch_job_step(collection, pathlib.Path("mock/exp/path"))
+    assert entity_names == [step.name for step in steps]
+
+    
diff --git a/tests/test_controller_errors.py b/tests/test_controller_errors.py
index 30d9870cf..a40ccdf66 100644
--- a/tests/test_controller_errors.py
+++ b/tests/test_controller_errors.py
@@ -100,7 +100,7 @@ def test_wrong_orchestrator(wlmutils):
     cont = Controller(launcher="local")
     manifest = Manifest(orc)
     with pytest.raises(SmartSimError):
-        cont._launch(manifest)
+        cont._launch("exp_name", "exp_path", manifest)
 
 
 def test_bad_orc_checkpoint():
diff --git a/tests/test_dbnode.py b/tests/test_dbnode.py
index bf5604c41..273c6de20 100644
--- a/tests/test_dbnode.py
+++ b/tests/test_dbnode.py
@@ -51,8 +51,8 @@ def test_parse_db_host_error():
 
 def test_hosts(fileutils, wlmutils):
     exp_name = "test_hosts"
-    exp = Experiment(exp_name)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, exp_path=test_dir)
 
     orc = Orchestrator(port=wlmutils.get_test_port(), interface="lo", launcher="local")
     orc.set_path(test_dir)
diff --git a/tests/test_experiment.py b/tests/test_experiment.py
index edc69527c..de6ab37e3 100644
--- a/tests/test_experiment.py
+++ b/tests/test_experiment.py
@@ -26,10 +26,13 @@
 
 import pytest
 
+import os
+
 from smartsim import Experiment
 from smartsim.entity import Model
 from smartsim.error import SmartSimError
 from smartsim.settings import RunSettings
+from smartsim._core.config import CONFIG
 
 # The tests in this file belong to the slow_tests group
 pytestmark = pytest.mark.slow_tests
@@ -111,8 +114,8 @@ def test_bad_ensemble_init_no_rs_bs():
 
 def test_stop_entity(fileutils):
     exp_name = "test_stop_entity"
-    exp = Experiment(exp_name)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, exp_path=test_dir)
     m = exp.create_model("model", path=test_dir, run_settings=RunSettings("sleep", "5"))
     exp.start(m, block=False)
     assert exp.finished(m) == False
@@ -123,8 +126,8 @@ def test_stop_entity(fileutils):
 def test_poll(fileutils):
     # Ensure that a SmartSimError is not raised
     exp_name = "test_exp_poll"
-    exp = Experiment(exp_name)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, exp_path=test_dir)
     model = exp.create_model(
         "model", path=test_dir, run_settings=RunSettings("sleep", "5")
     )
@@ -135,8 +138,8 @@ def test_poll(fileutils):
 
 def test_summary(fileutils):
     exp_name = "test_exp_summary"
-    exp = Experiment(exp_name)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, exp_path=test_dir)
     m = exp.create_model(
         "model", path=test_dir, run_settings=RunSettings("echo", "Hello")
     )
@@ -156,6 +159,7 @@ def test_summary(fileutils):
     assert 0 == int(row["RunID"])
     assert 0 == int(row["Returncode"])
 
+
 def test_launcher_detection(wlmutils, monkeypatch):
     if wlmutils.get_test_launcher() == "pals":
         pytest.skip(reason="Launcher detection cannot currently detect pbs vs pals")
@@ -165,3 +169,16 @@ def test_launcher_detection(wlmutils, monkeypatch):
     exp = Experiment("test-launcher-detection", launcher="auto")
 
     assert exp._launcher == wlmutils.get_test_launcher()
+
+
+def test_enable_disable_telemtery(monkeypatch):
+    # TODO: Currently these are implemented by setting an environment variable
+    #       so that ALL experiments instanced in a driver script will begin
+    #       producing telemetry data. In the future it is planned to have this
+    #       work on a "per-instance" basis
+    monkeypatch.setattr(os, "environ", {})
+    exp = Experiment("my-exp")
+    exp.enable_telemetry()
+    assert CONFIG.telemetry_enabled
+    exp.disable_telemetry()
+    assert not CONFIG.telemetry_enabled
diff --git a/tests/test_generator.py b/tests/test_generator.py
index 91f242ab1..a99d5795d 100644
--- a/tests/test_generator.py
+++ b/tests/test_generator.py
@@ -264,10 +264,8 @@ def test_multiple_tags(fileutils):
     exp.start(parameterized_model, block=True)
 
     with open(osp.join(parameterized_model.path, "multi-tags.out")) as f:
-        line = f.readline()
-        assert (
-            line.strip() == "My two parameters are 6379 and unbreakable_password, OK?"
-        )
+        log_content = f.read()
+        assert "My two parameters are 6379 and unbreakable_password, OK?" in log_content
 
 
 def test_generation_log(fileutils):
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 55dd7cbe3..ca145042e 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -27,6 +27,7 @@
 import pytest
 
 from smartsim._core.utils.helpers import cat_arg_and_value
+from smartsim._core.utils import helpers
 
 # The tests in this file belong to the group_a group
 pytestmark = pytest.mark.group_a
@@ -47,3 +48,17 @@ def test_single_char_concat():
 def test_fallthrough_concat():
     result = cat_arg_and_value("xx", "FOO")  # <-- no dashes, > 1 char
     assert result == "--xx=FOO"
+
+def test_encode_decode_cmd_round_trip():
+    orig_cmd = ["this", "is", "a", "cmd"]
+    decoded_cmd = helpers.decode_cmd(helpers.encode_cmd(orig_cmd))
+    assert orig_cmd == decoded_cmd
+    assert orig_cmd is not decoded_cmd
+
+def test_encode_raises_on_empty():
+    with pytest.raises(ValueError):
+        helpers.encode_cmd([])
+
+def test_decode_raises_on_empty():
+    with pytest.raises(ValueError):
+        helpers.decode_cmd("")
diff --git a/tests/test_indirect.py b/tests/test_indirect.py
new file mode 100644
index 000000000..09f630304
--- /dev/null
+++ b/tests/test_indirect.py
@@ -0,0 +1,195 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2023, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import pathlib
+import psutil
+import pytest
+import sys
+import uuid
+
+from smartsim._core.entrypoints.indirect import get_parser, cleanup, get_ts, main
+from smartsim._core.utils.serialize import TELMON_SUBDIR, MANIFEST_FILENAME
+from smartsim._core.utils.helpers import encode_cmd
+
+ALL_ARGS = {"+command", "+entity_type", "+telemetry_dir", "+output_file", "+error_file", "+working_dir"}
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+@pytest.mark.parametrize(
+        ["cmd", "missing"],
+        [
+            pytest.param("indirect.py", {"+name", "+command", "+entity_type", "+telemetry_dir", "+working_dir"}, id="no args"),
+            pytest.param("indirect.py -c echo +entity_type ttt +telemetry_dir ddd +output_file ooo +working_dir www +error_file eee", {"+command"}, id="cmd typo"),
+            pytest.param("indirect.py -t orchestrator +command ccc +telemetry_dir ddd +output_file ooo +working_dir www +error_file eee", {"+entity_type"}, id="etype typo"),
+            pytest.param("indirect.py -d /foo/bar +entity_type ttt +command ccc +output_file ooo +working_dir www +error_file eee", {"+telemetry_dir"}, id="dir typo"),
+            pytest.param("indirect.py        +entity_type ttt +telemetry_dir ddd +output_file ooo +working_dir www +error_file eee", {"+command"}, id="no cmd"),
+            pytest.param("indirect.py +command ccc        +telemetry_dir ddd +output_file ooo +working_dir www +error_file eee", {"+entity_type"}, id="no etype"),
+            pytest.param("indirect.py +command ccc +entity_type ttt        +output_file ooo +working_dir www +error_file eee", {"+telemetry_dir"}, id="no dir"),
+        ]
+)
+def test_parser(capsys, cmd, missing):
+    """Test that the parser reports any missing required arguments"""
+    parser = get_parser()
+
+    args = cmd.split()
+
+    captured = capsys.readouterr()  # throw away existing output
+    with pytest.raises(SystemExit) as ex:
+        ns = parser.parse_args(args)
+
+    captured = capsys.readouterr()
+    assert "the following arguments are required" in captured.err
+    for arg in missing:
+        assert arg in captured.err
+
+    expected = ALL_ARGS - missing
+    msg_tuple = captured.err.split("the following arguments are required: ")
+    if len(msg_tuple) < 2:
+        assert False, "error message indicates no missing arguments"
+
+    actual_missing = msg_tuple[1].strip()
+    for exp in expected:
+        assert f"{exp}/" not in actual_missing
+
+
+def test_cleanup(capsys, monkeypatch):
+    """Ensure cleanup attempts termination of correct process"""
+    mock_pid = 123
+    create_msg = "creating: {0}"
+    term_msg = "terminating: {0}"
+
+    class MockProc:
+        def __init__(self, pid: int):
+            print(create_msg.format(pid))
+        def terminate(self):
+            print(term_msg.format(mock_pid))
+    
+    captured = capsys.readouterr()  # throw away existing output
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr('psutil.pid_exists', lambda pid: True)
+        ctx.setattr('psutil.Process', MockProc)
+        ctx.setattr('smartsim._core.entrypoints.indirect.STEP_PID', mock_pid)
+        cleanup()        
+
+    captured = capsys.readouterr()
+    assert create_msg.format(mock_pid) in captured.out
+    assert term_msg.format(mock_pid) in captured.out
+
+
+def test_cleanup_late(capsys, monkeypatch):
+    """Ensure cleanup exceptions are swallowed if a process is already terminated"""
+    mock_pid = 123
+    create_msg = "creating: {0}"
+    term_msg = "terminating: {0}"
+
+    class MockMissingProc:
+        def __init__(self, pid: int) -> None:
+            print(create_msg.format(mock_pid))
+            raise psutil.NoSuchProcess(pid)
+        def terminate(self) -> None:
+            print(term_msg.format(mock_pid))
+    
+    captured = capsys.readouterr()  # throw away existing output
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr('psutil.pid_exists', lambda pid: True)
+        ctx.setattr('psutil.Process', MockMissingProc)
+        ctx.setattr('smartsim._core.entrypoints.indirect.STEP_PID', mock_pid)
+        cleanup()
+
+    captured = capsys.readouterr()
+    assert create_msg.format(mock_pid) in captured.out
+
+
+def test_ts():
+    """Ensure expected output type"""
+    ts = get_ts()
+    assert isinstance(ts, int)
+
+
+def test_indirect_main_dir_check(fileutils):
+    """Ensure that the proxy validates the test directory exists"""
+    test_dir = fileutils.make_test_dir()
+    exp_dir = pathlib.Path(test_dir)
+
+    cmd = ["echo", "unit-test"]
+    encoded_cmd = encode_cmd(cmd)
+
+    status_path = exp_dir / TELMON_SUBDIR
+    
+    # show that a missing status_path is created when missing
+    main(encoded_cmd, "application", exp_dir, status_path)
+
+    assert status_path.exists()
+
+
+def test_indirect_main_cmd_check(capsys, fileutils, monkeypatch):
+    """Ensure that the proxy validates the cmd is not empty or whitespace-only"""
+    test_dir = fileutils.make_test_dir()
+    exp_dir = pathlib.Path(test_dir)
+
+    captured = capsys.readouterr()  # throw away existing output
+    with monkeypatch.context() as ctx, pytest.raises(ValueError) as ex:
+        ctx.setattr('smartsim._core.entrypoints.indirect.logger.error', print)
+        _ = main("", "application", exp_dir, exp_dir / TELMON_SUBDIR)
+
+    captured = capsys.readouterr()
+    assert "Invalid cmd supplied" in ex.value.args[0]
+
+    # test with non-emptystring cmd
+    with monkeypatch.context() as ctx, pytest.raises(ValueError) as ex:
+        ctx.setattr('smartsim._core.entrypoints.indirect.logger.error', print)
+        _ = main("  \n  \t   ", "application", exp_dir, exp_dir / TELMON_SUBDIR)
+
+    captured = capsys.readouterr()
+    assert "Invalid cmd supplied" in ex.value.args[0]
+
+
+def test_complete_process(fileutils):
+    """Ensure the happy-path completes and returns a success return code"""
+    script = fileutils.get_test_conf_path("sleep.py")
+
+    test_dir = fileutils.make_test_dir()
+    exp_dir = pathlib.Path(test_dir)
+
+    raw_cmd = f"{sys.executable} {script} --time=1"
+    cmd = encode_cmd(raw_cmd.split())
+
+    rc = main(cmd, "application", exp_dir, exp_dir / TELMON_SUBDIR)
+    assert rc == 0
+
+    assert exp_dir.exists()
+
+    # NOTE: don't have a manifest so we're falling back to default event path
+    data_dir = exp_dir / TELMON_SUBDIR
+    start_events = list(data_dir.rglob("start.json"))
+    stop_events = list(data_dir.rglob("stop.json"))
+
+    assert start_events
+    assert stop_events
diff --git a/tests/test_launch_errors.py b/tests/test_launch_errors.py
index 89659db42..c40db4d18 100644
--- a/tests/test_launch_errors.py
+++ b/tests/test_launch_errors.py
@@ -48,8 +48,8 @@ def test_unsupported_run_settings():
 
 def test_model_failure(fileutils):
     exp_name = "test-model-failure"
-    exp = Experiment(exp_name, launcher="local")
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
     script = fileutils.get_test_conf_path("bad.py")
     settings = RunSettings("python", f"{script} --time=3")
@@ -64,8 +64,8 @@ def test_model_failure(fileutils):
 def test_orchestrator_relaunch(fileutils, wlmutils):
     """Test when users try to launch second orchestrator"""
     exp_name = "test-orc-on-relaunch"
-    exp = Experiment(exp_name, launcher="local")
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
     orc = Orchestrator(port=wlmutils.get_test_port())
     orc.set_path(test_dir)
diff --git a/tests/test_local_launch.py b/tests/test_local_launch.py
index 30389a0b0..9cacd810e 100644
--- a/tests/test_local_launch.py
+++ b/tests/test_local_launch.py
@@ -38,8 +38,8 @@
 
 def test_models(fileutils):
     exp_name = "test-models-local-launch"
-    exp = Experiment(exp_name, launcher="local")
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = exp.create_run_settings("python", f"{script} --time=3")
@@ -54,8 +54,8 @@ def test_models(fileutils):
 
 def test_ensemble(fileutils):
     exp_name = "test-ensemble-launch"
-    exp = Experiment(exp_name, launcher="local")
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = exp.create_run_settings("python", f"{script} --time=3")
diff --git a/tests/test_local_multi_run.py b/tests/test_local_multi_run.py
index 7f1f5c624..5edaff1c9 100644
--- a/tests/test_local_multi_run.py
+++ b/tests/test_local_multi_run.py
@@ -38,8 +38,8 @@
 
 def test_models(fileutils):
     exp_name = "test-models-local-launch"
-    exp = Experiment(exp_name, launcher="local")
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = exp.create_run_settings("python", f"{script} --time=5")
diff --git a/tests/test_local_restart.py b/tests/test_local_restart.py
index c9078f125..54884a3f4 100644
--- a/tests/test_local_restart.py
+++ b/tests/test_local_restart.py
@@ -39,8 +39,8 @@
 def test_restart(fileutils):
 
     exp_name = "test-models-local-restart"
-    exp = Experiment(exp_name, launcher="local")
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = exp.create_run_settings("python", f"{script} --time=3")
@@ -59,8 +59,8 @@ def test_restart(fileutils):
 
 def test_ensemble(fileutils):
     exp_name = "test-ensemble-restart"
-    exp = Experiment(exp_name, launcher="local")
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
     script = fileutils.get_test_conf_path("sleep.py")
     settings = exp.create_run_settings("python", f"{script} --time=3")
diff --git a/tests/test_manifest.py b/tests/test_manifest.py
index f68219c73..5bb373fc1 100644
--- a/tests/test_manifest.py
+++ b/tests/test_manifest.py
@@ -26,11 +26,17 @@
 
 
 from copy import deepcopy
+import os.path
 
 import pytest
 
 from smartsim import Experiment
-from smartsim._core.control import Manifest
+from smartsim._core.control.manifest import (
+    Manifest,
+    LaunchedManifest,
+    LaunchedManifestBuilder,
+    _LaunchedManifestMetadata as LaunchedManifestMetadata,
+)
 from smartsim.database import Orchestrator
 from smartsim.error import SmartSimError
 from smartsim.settings import RunSettings
@@ -48,7 +54,6 @@
 model_2 = exp.create_model("model_1", run_settings=rs)
 ensemble = exp.create_ensemble("ensemble", run_settings=rs, replicas=1)
 
-
 orc = Orchestrator()
 orc_1 = deepcopy(orc)
 orc_1.name = "orc2"
@@ -97,3 +102,67 @@ class Person:
     p = Person()
     with pytest.raises(TypeError):
         _ = Manifest(p)
+
+def test_launched_manifest_transform_data():
+    models = [(model, 1), (model_2, 2)]
+    ensembles = [(ensemble, [(m, i) for i, m in enumerate(ensemble.entities)])]
+    dbs = [(orc, [(n, i) for i, n in enumerate(orc.entities)])]
+    launched = LaunchedManifest(
+        metadata=LaunchedManifestMetadata("name", "path", "launcher", "run_id"),
+        models=models,
+        ensembles=ensembles,
+        databases=dbs,
+    )
+    transformed = launched.map(lambda x: str(x))
+    assert transformed.models == tuple((m, str(i)) for m, i in models)
+    assert transformed.ensembles[0][1] == tuple((m, str(i)) for m, i in ensembles[0][1])
+    assert transformed.databases[0][1] == tuple((n, str(i)) for n, i in dbs[0][1])
+
+
+def test_launched_manifest_builder_correctly_maps_data():
+    lmb = LaunchedManifestBuilder("name", "path", "launcher name")
+    lmb.add_model(model, 1)
+    lmb.add_model(model_2, 1)
+    lmb.add_ensemble(ensemble, [i for i in range(len(ensemble.entities))])
+    lmb.add_database(orc, [i for i in range(len(orc.entities))])
+
+    manifest = lmb.finalize()
+    assert len(manifest.models) == 2
+    assert len(manifest.ensembles) == 1
+    assert len(manifest.databases) == 1
+
+
+def test_launced_manifest_builder_raises_if_lens_do_not_match():
+    lmb = LaunchedManifestBuilder("name", "path", "launcher name")
+    with pytest.raises(ValueError):
+        lmb.add_ensemble(ensemble, list(range(123)))
+    with pytest.raises(ValueError):
+        lmb.add_database(orc, list(range(123)))
+
+
+def test_launched_manifest_builer_raises_if_attaching_data_to_empty_collection(
+    monkeypatch
+):
+    lmb = LaunchedManifestBuilder("name", "path", "launcher")
+    monkeypatch.setattr(ensemble, "entities", [])
+    with pytest.raises(ValueError):
+        lmb.add_ensemble(ensemble, [])
+
+
+def test_lmb_and_launched_manifest_have_same_paths_for_launched_metadata():
+    exp_path = "/path/to/some/exp"
+    lmb = LaunchedManifestBuilder("exp_name", exp_path, "launcher")
+    manifest = lmb.finalize()
+    assert lmb.exp_telemetry_subdirectory == manifest.metadata.exp_telemetry_subdirectory
+    assert lmb.run_telemetry_subdirectory == manifest.metadata.run_telemetry_subdirectory
+    assert os.path.commonprefix([
+        manifest.metadata.run_telemetry_subdirectory,
+        manifest.metadata.exp_telemetry_subdirectory,
+        manifest.metadata.manifest_file_path,
+        exp_path,
+    ]) == exp_path
+    assert os.path.commonprefix([
+        manifest.metadata.run_telemetry_subdirectory,
+        manifest.metadata.exp_telemetry_subdirectory,
+        manifest.metadata.manifest_file_path,
+    ]) == str(manifest.metadata.exp_telemetry_subdirectory)
diff --git a/tests/test_model.py b/tests/test_model.py
index 103e8a09c..76af50b54 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -28,6 +28,7 @@
 
 from smartsim import Experiment
 from smartsim._core.launcher.step import SbatchStep, SrunStep
+from smartsim._core.control.manifest import LaunchedManifestBuilder
 from smartsim.entity import Ensemble, Model
 from smartsim.error import EntityExistsError, SSUnsupportedError
 from smartsim.settings import RunSettings, SbatchSettings, SrunSettings
@@ -88,8 +89,10 @@ def monkeypatch_exp_controller(monkeypatch):
     def _monkeypatch_exp_controller(exp):
         entity_steps = []
 
-        def start_wo_job_manager(self, manifest, block=True, kill_on_interrupt=True):
-            self._launch(manifest)
+        def start_wo_job_manager(self, exp_name, exp_path, manifest,
+                                 block=True, kill_on_interrupt=True):
+            self._launch(exp_name, exp_path, manifest)
+            return LaunchedManifestBuilder("name", "path", "launcher").finalize()
 
         def launch_step_nop(self, step, entity):
             entity_steps.append((step, entity))
diff --git a/tests/test_multidb.py b/tests/test_multidb.py
index 9b50f1e80..7bea4e0c8 100644
--- a/tests/test_multidb.py
+++ b/tests/test_multidb.py
@@ -59,7 +59,7 @@ def test_db_identifier_standard_then_colo(fileutils, wlmutils, coloutils, db_typ
     test_script = fileutils.get_test_conf_path("smartredis/db_id_err.py")
 
     # Create SmartSim Experiment
-    exp = Experiment(exp_name, launcher=test_launcher)
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
 
     # create regular database
     orc = exp.create_database(
@@ -129,7 +129,7 @@ def test_db_identifier_colo_then_standard(fileutils, wlmutils, coloutils, db_typ
     test_script = fileutils.get_test_conf_path("smartredis/db_id_err.py")
 
     # Create SmartSim Experiment
-    exp = Experiment(exp_name, launcher=test_launcher)
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
 
     # Create run settings
     colo_settings = exp.create_run_settings("python", test_script)
@@ -172,7 +172,7 @@ def test_db_identifier_colo_then_standard(fileutils, wlmutils, coloutils, db_typ
     exp.stop(orc)
 
 
-def test_db_identifier_standard_twice_not_unique(wlmutils):
+def test_db_identifier_standard_twice_not_unique(wlmutils, fileutils):
     """Test uniqueness of db_identifier several calls to create_database, with non unique names,
     checking error is raised before exp start is called"""
 
@@ -183,9 +183,10 @@ def test_db_identifier_standard_twice_not_unique(wlmutils):
     test_launcher = wlmutils.get_test_launcher()
     test_interface = wlmutils.get_test_interface()
     test_port = wlmutils.get_test_port()
+    test_dir = fileutils.make_test_dir()
 
     # Create SmartSim Experiment
-    exp = Experiment(exp_name, launcher=test_launcher)
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
 
     # CREATE DATABASE with db_identifier
     orc = exp.create_database(
@@ -297,7 +298,9 @@ def test_multidb_colo_once(fileutils, wlmutils, coloutils, db_type):
     test_script = fileutils.get_test_conf_path("smartredis/dbid.py")
 
     # start a new Experiment for this section
-    exp = Experiment("test_multidb_colo_once", launcher=test_launcher)
+    exp = Experiment("test_multidb_colo_once",
+                     launcher=test_launcher,
+                     exp_path=test_dir)
 
     # create run settings
     run_settings = exp.create_run_settings("python", test_script)
@@ -463,8 +466,8 @@ def test_launch_cluster_orc_single_dbid(fileutils, wlmutils):
 
     exp_name = "test_launch_cluster_orc_single_dbid"
     launcher = wlmutils.get_test_launcher()
-    exp = Experiment(exp_name, launcher=launcher)
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
 
     # batch = False to launch on existing allocation
     network_interface = wlmutils.get_test_interface()
diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py
index e61139931..586dbcefa 100644
--- a/tests/test_orchestrator.py
+++ b/tests/test_orchestrator.py
@@ -70,8 +70,8 @@ def test_inactive_orc_get_address():
 
 def test_orc_active_functions(fileutils, wlmutils):
     exp_name = "test_orc_active_functions"
-    exp = Experiment(exp_name, launcher="local")
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
     db = Orchestrator(port=wlmutils.get_test_port())
     db.set_path(test_dir)
@@ -98,8 +98,8 @@ def test_orc_active_functions(fileutils, wlmutils):
 
 def test_multiple_interfaces(fileutils, wlmutils):
     exp_name = "test_multiple_interfaces"
-    exp = Experiment(exp_name, launcher="local")
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
     net_if_addrs = psutil.net_if_addrs()
     net_if_addrs = [
diff --git a/tests/test_pals_settings.py b/tests/test_pals_settings.py
index 1edb183fa..ce35e135f 100644
--- a/tests/test_pals_settings.py
+++ b/tests/test_pals_settings.py
@@ -30,6 +30,9 @@
 import shutil
 import sys
 
+import smartsim._core.config.config
+from smartsim.error import SSUnsupportedError
+
 from smartsim.settings import PalsMpiexecSettings
 from smartsim._core.launcher import PBSLauncher
 from smartsim._core.launcher.step.mpiStep import MpiexecStep
@@ -41,6 +44,15 @@
 default_exe = sys.executable
 default_kwargs = {"fail_if_missing_exec": False}
 
+
+@pytest.fixture(autouse=True)
+def turn_off_telemetry_indirect(monkeypatch):
+    monkeypatch.setattr(
+        smartsim._core.config.config.Config,
+        "telemetry_enabled", False)
+    yield
+
+
 # Uncomment when
 # @pytest.mark.parametrize(
 #    "function_name",[
@@ -56,6 +68,7 @@
 #    with pytest.raises(SSUnsupportedError):
 #        func(None)
 
+
 def test_affinity_script():
     settings = PalsMpiexecSettings(default_exe, **default_kwargs)
     settings.set_gpu_affinity_script("/path/to/set_affinity_gpu.sh", 1, 2)
diff --git a/tests/test_reconnect_orchestrator.py b/tests/test_reconnect_orchestrator.py
index ecda7f1e6..60105da41 100644
--- a/tests/test_reconnect_orchestrator.py
+++ b/tests/test_reconnect_orchestrator.py
@@ -45,8 +45,8 @@ def test_local_orchestrator(fileutils, wlmutils):
     """Test launching orchestrator locally"""
     global first_dir
     exp_name = "test-orc-launch-local"
-    exp = Experiment(exp_name, launcher="local")
     test_dir = fileutils.make_test_dir()
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
     first_dir = test_dir
 
     orc = Orchestrator(port=wlmutils.get_test_port())
@@ -61,12 +61,13 @@ def test_local_orchestrator(fileutils, wlmutils):
     exp._control._launcher.task_manager.actively_monitoring = False
 
 
-def test_reconnect_local_orc():
+def test_reconnect_local_orc(fileutils):
     """Test reconnecting to orchestrator from first experiment"""
     global first_dir
     # start new experiment
     exp_name = "test-orc-local-reconnect-2nd"
-    exp_2 = Experiment(exp_name, launcher="local")
+    test_dir = fileutils.make_test_dir()
+    exp_2 = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
     checkpoint = osp.join(first_dir, "smartsim_db.dat")
     reloaded_orc = exp_2.reconnect_orchestrator(checkpoint)
diff --git a/tests/test_serialize.py b/tests/test_serialize.py
new file mode 100644
index 000000000..2010b77e2
--- /dev/null
+++ b/tests/test_serialize.py
@@ -0,0 +1,175 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2023, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import logging
+from pathlib import Path
+import json
+
+from smartsim import Experiment
+from smartsim.database.orchestrator import Orchestrator
+from smartsim._core.utils import serialize
+from smartsim._core._cli import utils
+from smartsim._core.control.manifest import LaunchedManifestBuilder
+import smartsim._core.config.config
+
+_REL_MANIFEST_PATH = f"{serialize.TELMON_SUBDIR}/{serialize.MANIFEST_FILENAME}"
+_CFG_TM_ENABLED_ATTR = "telemetry_enabled"
+
+# The tests in this file belong to the group_b group
+pytestmark = pytest.mark.group_b
+
+@pytest.fixture(autouse=True)
+def turn_on_tm(monkeypatch):
+    monkeypatch.setattr(
+        smartsim._core.config.config.Config,
+        _CFG_TM_ENABLED_ATTR,
+        property(lambda self: True))
+    yield
+
+
+def test_serialize_creates_a_manifest_json_file_if_dne(fileutils):
+    test_dir = fileutils.get_test_dir()
+    lmb = LaunchedManifestBuilder("exp", test_dir, "launcher")
+    serialize.save_launch_manifest(lmb.finalize())
+    manifest_json = Path(test_dir) / _REL_MANIFEST_PATH
+
+    assert manifest_json.is_file()
+    with open(manifest_json, 'r') as f:
+        manifest = json.load(f)
+        assert manifest["experiment"]["name"] == "exp"
+        assert manifest["experiment"]["launcher"] == "launcher"
+        assert isinstance(manifest["runs"], list)
+        assert len(manifest["runs"]) == 1
+
+
+def test_serialize_does_not_write_manifest_json_if_telemetry_monitor_is_off(
+    fileutils, monkeypatch
+):
+    monkeypatch.setattr(
+        smartsim._core.config.config.Config,
+        _CFG_TM_ENABLED_ATTR,
+        property(lambda self: False))
+    test_dir = fileutils.get_test_dir()
+    lmb = LaunchedManifestBuilder("exp", test_dir, "launcher")
+    serialize.save_launch_manifest(lmb.finalize())
+    manifest_json = Path(test_dir) / _REL_MANIFEST_PATH
+    assert not manifest_json.exists()
+
+
+def test_serialize_appends_a_manifest_json_exists(fileutils):
+    test_dir = fileutils.get_test_dir()
+    manifest_json = Path(test_dir) / _REL_MANIFEST_PATH
+    serialize.save_launch_manifest(
+        LaunchedManifestBuilder("exp", test_dir, "launcher").finalize())
+    serialize.save_launch_manifest(
+        LaunchedManifestBuilder("exp", test_dir, "launcher").finalize())
+    serialize.save_launch_manifest(
+        LaunchedManifestBuilder("exp", test_dir, "launcher").finalize())
+
+    assert manifest_json.is_file()
+    with open(manifest_json, 'r') as f:
+        manifest = json.load(f)
+        assert isinstance(manifest["runs"], list)
+        assert len(manifest["runs"]) == 3
+        assert len({run["run_id"] for run in manifest["runs"]}) == 3
+
+
+def test_serialize_overwites_file_if_not_json(fileutils):
+    test_dir = fileutils.get_test_dir()
+    manifest_json = Path(test_dir) / _REL_MANIFEST_PATH
+    manifest_json.parent.mkdir(parents=True, exist_ok=True)
+    with open(manifest_json, 'w') as f:
+        f.write("This is not a json\n")
+
+    lmb = LaunchedManifestBuilder("exp", test_dir, "launcher")
+    serialize.save_launch_manifest(lmb.finalize())
+    with open(manifest_json, 'r') as f:
+        assert isinstance(json.load(f), dict)
+
+
+def test_started_entities_are_serialized(fileutils):
+    exp_name = "test-exp"
+    test_dir = Path(fileutils.make_test_dir()) / exp_name
+    test_dir.mkdir(parents=True)
+    exp = Experiment(exp_name, exp_path=str(test_dir), launcher="local")
+
+    rs1 = exp.create_run_settings("echo", ["hello", "world"])
+    rs2 = exp.create_run_settings("echo", ["spam", "eggs"])
+
+    hello_world_model = exp.create_model("echo-hello", run_settings=rs1)
+    spam_eggs_model = exp.create_model("echo-spam", run_settings=rs2)
+    hello_ensemble = exp.create_ensemble('echo-ensemble', run_settings=rs1, replicas=3)
+
+    exp.generate(hello_world_model, spam_eggs_model, hello_ensemble)
+    exp.start(hello_world_model, spam_eggs_model, block=False)
+    exp.start(hello_ensemble, block=False)
+
+    manifest_json = Path(exp.exp_path) / _REL_MANIFEST_PATH
+    try:
+        with open(manifest_json, 'r') as f:
+            manifest = json.load(f)
+            assert len(manifest["runs"]) == 2
+            assert len(manifest["runs"][0]["model"]) == 2
+            assert len(manifest["runs"][0]["ensemble"]) == 0
+            assert len(manifest["runs"][1]["model"]) == 0
+            assert len(manifest["runs"][1]["ensemble"]) == 1
+            assert len(manifest["runs"][1]["ensemble"][0]["models"]) == 3
+    finally:
+        exp.stop(hello_world_model, spam_eggs_model, hello_ensemble)
+
+
+def test_serialzed_database_does_not_break_if_using_a_non_standard_install(
+    monkeypatch
+):
+    monkeypatch.setattr(utils, "get_db_path", lambda: None)
+    db = Orchestrator()
+    dict_ = serialize._dictify_db(db, [])
+    assert dict_["type"] == "Unknown"
+
+
+def test_dictify_run_settings_warns_when_attepting_to_dictify_mpmd(
+    monkeypatch, caplog, fileutils
+):
+    # TODO: Eventually this test should be removed and we should be able to
+    #       handle MPMD run settings as part of the output dict
+    exp_name = "test-exp"
+    test_dir = Path(fileutils.make_test_dir()) / exp_name
+    test_dir.mkdir(parents=True)
+    exp = Experiment(exp_name, exp_path=str(test_dir), launcher="local")
+
+    rs1 = exp.create_run_settings("echo", ["hello", "world"])
+    rs2 = exp.create_run_settings("echo", ["spam", "eggs"])
+
+    # Make rs "MPMD"
+    monkeypatch.setattr(rs1, "mpmd", [rs2], raising=False)
+    # Make work with colored logs
+    monkeypatch.setattr(serialize, "_LOGGER", logging.getLogger())
+    serialize._dictify_run_settings(rs1)
+    rec ,= caplog.records
+    assert rec.levelno == logging.WARNING
+    assert "MPMD run settings" in rec.msg
diff --git a/tests/test_telemetry_monitor.py b/tests/test_telemetry_monitor.py
new file mode 100644
index 000000000..cf85e26e2
--- /dev/null
+++ b/tests/test_telemetry_monitor.py
@@ -0,0 +1,1139 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2023, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import logging
+import pathlib
+from random import sample
+import pytest
+import shutil
+import sys
+import typing as t
+import time
+import uuid
+from conftest import FileUtils, MLUtils, WLMUtils
+import smartsim
+
+from smartsim._core.control.jobmanager import JobManager
+from smartsim._core.control.job import Job, JobEntity, _JobKey
+from smartsim._core.launcher.launcher import WLMLauncher
+from smartsim._core.launcher.slurm.slurmLauncher import SlurmLauncher
+from smartsim._core.launcher.step.step import Step, proxyable_launch_cmd
+from smartsim._core.launcher.stepInfo import StepInfo
+from smartsim.error.errors import UnproxyableStepError
+from smartsim.settings.base import RunSettings
+from smartsim.status import (
+    STATUS_COMPLETED,
+    STATUS_CANCELLED,
+    STATUS_FAILED,
+    STATUS_NEW,
+    STATUS_PAUSED,
+    STATUS_RUNNING,
+    TERMINAL_STATUSES,
+)
+import smartsim._core.config.config as cfg
+
+from smartsim._core.entrypoints.telemetrymonitor import (
+    can_shutdown,
+    event_loop,
+    faux_return_code,
+    get_parser,
+    get_ts,
+    track_event,
+    load_manifest,
+    hydrate_persistable,
+    ManifestEventHandler,
+)
+from smartsim._core.utils import serialize
+from smartsim import Experiment
+
+
+ALL_ARGS = {"-exp_dir", "-frequency"}
+PROXY_ENTRY_POINT = "smartsim._core.entrypoints.indirect"
+CFG_TM_ENABLED_ATTR = "telemetry_enabled"
+
+
+for_all_wlm_launchers = pytest.mark.parametrize(
+    "wlm_launcher",
+    [pytest.param(cls(), id=cls.__name__) for cls in WLMLauncher.__subclasses__()],
+)
+
+requires_wlm = pytest.mark.skipif(
+    pytest.test_launcher == "local",
+    reason="Test requires WLM"
+)
+
+
+logger = logging.getLogger()
+
+# The tests in this file belong to the slow_tests group
+pytestmark = pytest.mark.slow_tests
+
+
+@pytest.fixture(autouse=True)
+def turn_on_tm(monkeypatch):
+    monkeypatch.setattr(
+        cfg.Config,
+        CFG_TM_ENABLED_ATTR,
+        property(lambda self: True))
+    yield
+
+
+def snooze_nonblocking(test_dir: str, max_delay: int = 20, post_data_delay: int = 2):
+    telmon_subdir = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+    # let the non-blocking experiment complete.
+    for _ in range(max_delay):
+        time.sleep(1)
+        if telmon_subdir.exists():
+            time.sleep(post_data_delay)
+            break
+
+
+@pytest.mark.parametrize(
+    ["cmd", "missing"],
+    [
+        pytest.param("", {"-exp_dir", "-frequency"}, id="no args"),
+        pytest.param("-exp_dir /foo/bar", {"-frequency"}, id="no freq"),
+        pytest.param("-frequency 123", {"-exp_dir"}, id="no dir"),
+    ],
+)
+def test_parser_reqd_args(capsys, cmd, missing):
+    """Test that the parser reports any missing required arguments"""
+    parser = get_parser()
+
+    args = cmd.split()
+
+    captured = capsys.readouterr()  # throw away existing output
+    with pytest.raises(SystemExit) as ex:
+        ns = parser.parse_args(args)
+
+    captured = capsys.readouterr()
+    assert "the following arguments are required" in captured.err
+    err_desc = captured.err.split("the following arguments are required:")[-1]
+    for arg in missing:
+        assert arg in err_desc
+
+    expected = ALL_ARGS - missing
+    for exp in expected:
+        assert exp not in err_desc
+
+
+def test_parser():
+    """Test that the parser succeeds when receiving expected args"""
+    parser = get_parser()
+
+    test_dir = "/foo/bar"
+    test_freq = 123
+
+    cmd = f"-exp_dir {test_dir} -frequency {test_freq}"
+    args = cmd.split()
+
+    ns = parser.parse_args(args)
+
+    assert ns.exp_dir == test_dir
+    assert ns.frequency == test_freq
+
+
+def test_ts():
+    """Ensure expected output type"""
+    ts = get_ts()
+    assert isinstance(ts, int)
+
+
+@pytest.mark.parametrize(
+    ["etype", "task_id", "step_id", "timestamp", "evt_type"],
+    [
+        pytest.param("ensemble", "", "123", get_ts(), "start", id="start event"),
+        pytest.param("ensemble", "", "123", get_ts(), "stop", id="stop event"),
+    ],
+)
+def test_track_event(
+    etype: str,
+    task_id: str,
+    step_id: str,
+    timestamp: int,
+    evt_type: str,
+    fileutils,
+):
+    """Ensure that track event writes a file to the expected location"""
+    exp_dir = fileutils.make_test_dir()
+    exp_path = pathlib.Path(exp_dir)
+    track_event(timestamp, task_id, step_id, etype, evt_type, exp_path, logger)
+
+    expected_output = exp_path / f"{evt_type}.json"
+
+    assert expected_output.exists()
+    assert expected_output.is_file()
+
+
+def test_load_manifest(fileutils: FileUtils):
+    """Ensure that the runtime manifest loads correctly"""
+    sample_manifest_path = fileutils.get_test_conf_path("telemetry/telemetry.json")
+    sample_manifest = pathlib.Path(sample_manifest_path)
+    assert sample_manifest.exists()
+
+    test_manifest_path = fileutils.make_test_file(
+        serialize.MANIFEST_FILENAME,
+        serialize.TELMON_SUBDIR,
+        sample_manifest.read_text(),
+    )
+    test_manifest = pathlib.Path(test_manifest_path)
+    assert test_manifest.exists()
+
+    manifest = load_manifest(test_manifest_path)
+    assert manifest.name == "my-exp"
+    assert str(manifest.path) == "/path/to/my-exp"
+    assert manifest.launcher == "Slurm"
+    assert len(manifest.runs) == 6
+
+    assert len(manifest.runs[0].models) == 1
+    assert len(manifest.runs[2].models) == 8  # 8 models in ensemble
+    assert len(manifest.runs[0].orchestrators) == 0
+    assert len(manifest.runs[1].orchestrators) == 3  # 3 shards in db
+
+
+def test_load_manifest_colo_model(fileutils: FileUtils):
+    """Ensure that the runtime manifest loads correctly when containing a colocated model"""
+    # NOTE: for regeneration, this manifest can use `test_telemetry_colo`
+    sample_manifest_path = fileutils.get_test_conf_path("telemetry/colocatedmodel.json")
+    sample_manifest = pathlib.Path(sample_manifest_path)
+    assert sample_manifest.exists()
+
+    manifest = load_manifest(sample_manifest_path)
+    assert manifest.name == "my-exp"
+    assert (
+        str(manifest.path)
+        == "/tmp/my-exp"
+    )
+    assert manifest.launcher == "Slurm"
+    assert len(manifest.runs) == 1
+
+    assert len(manifest.runs[0].models) == 1
+
+
+def test_load_manifest_serial_models(fileutils: FileUtils):
+    """Ensure that the runtime manifest loads correctly when containing multiple models"""
+    # NOTE: for regeneration, this manifest can use `test_telemetry_colo`
+    sample_manifest_path = fileutils.get_test_conf_path("telemetry/serialmodels.json")
+    sample_manifest = pathlib.Path(sample_manifest_path)
+    assert sample_manifest.exists()
+
+    manifest = load_manifest(sample_manifest_path)
+    assert manifest.name == "my-exp"
+    assert str(manifest.path) == "/tmp/my-exp"
+    assert manifest.launcher == "Slurm"
+    assert len(manifest.runs) == 1
+
+    assert len(manifest.runs[0].models) == 5
+
+
+def test_load_manifest_db_and_models(fileutils: FileUtils):
+    """Ensure that the runtime manifest loads correctly when containing models &
+    orchestrator across 2 separate runs"""
+    # NOTE: for regeneration, this manifest can use `test_telemetry_colo`
+    sample_manifest_path = fileutils.get_test_conf_path("telemetry/db_and_model.json")
+    sample_manifest = pathlib.Path(sample_manifest_path)
+    assert sample_manifest.exists()
+
+    manifest = load_manifest(sample_manifest_path)
+    assert manifest.name == "my-exp"
+    assert str(manifest.path) == "/tmp/my-exp"
+    assert manifest.launcher == "Slurm"
+    assert len(manifest.runs) == 2
+
+    assert len(manifest.runs[0].orchestrators) == 1
+    assert len(manifest.runs[1].models) == 1
+
+
+def test_load_manifest_db_and_models_1run(fileutils: FileUtils):
+    """Ensure that the runtime manifest loads correctly when containing models &
+    orchestrator in a single run"""
+    # NOTE: for regeneration, this manifest can use `test_telemetry_colo`
+    sample_manifest_path = fileutils.get_test_conf_path(
+        "telemetry/db_and_model_1run.json"
+    )
+    sample_manifest = pathlib.Path(sample_manifest_path)
+    assert sample_manifest.exists()
+
+    manifest = load_manifest(sample_manifest_path)
+    assert manifest.name == "my-exp"
+    assert str(manifest.path) == "/tmp/my-exp"
+    assert manifest.launcher == "Slurm"
+    assert len(manifest.runs) == 1
+
+    assert len(manifest.runs[0].orchestrators) == 1
+    assert len(manifest.runs[0].models) == 1
+
+
+@pytest.mark.parametrize(
+    ["task_id", "step_id", "etype", "exp_isorch", "exp_ismanaged"],
+    [
+        pytest.param("123", "", "model", False, False, id="unmanaged, non-orch"),
+        pytest.param("456", "123", "ensemble", False, True, id="managed, non-orch"),
+        pytest.param("789", "987", "orchestrator", True, True, id="managed, orch"),
+        pytest.param("987", "", "orchestrator", True, False, id="unmanaged, orch"),
+    ],
+)
+def test_persistable_computed_properties(
+    task_id: str, step_id: str, etype: str, exp_isorch: bool, exp_ismanaged: bool
+):
+    name = f"test-{etype}-{uuid.uuid4()}"
+    timestamp = get_ts()
+    exp_dir = pathlib.Path("/foo/bar")
+    stored = {
+        "name": name,
+        "run_id": timestamp,
+        "telemetry_metadata": {
+            "status_dir": str(exp_dir),
+            "task_id": task_id,
+            "step_id": step_id,
+        },
+    }
+    persistables = hydrate_persistable(etype, stored, exp_dir)
+    persistable = persistables[0] if persistables else None
+
+    assert persistable.is_managed == exp_ismanaged
+    assert persistable.is_db == exp_isorch
+
+
+def test_deserialize_ensemble(fileutils: FileUtils):
+    """Ensure that the children of ensembles (models) are correctly
+    placed in the models collection"""
+    sample_manifest_path = fileutils.get_test_conf_path("telemetry/ensembles.json")
+    sample_manifest = pathlib.Path(sample_manifest_path)
+    assert sample_manifest.exists()
+
+    manifest = load_manifest(sample_manifest_path)
+    assert manifest
+
+    assert len(manifest.runs) == 1
+
+    # NOTE: no longer returning ensembles, only children...
+    # assert len(manifest.runs[0].ensembles) == 1
+    assert len(manifest.runs[0].models) == 8
+
+
+def test_shutdown_conditions():
+    """Ensure conditions to shutdown telemetry monitor are correctly evaluated"""
+    job_entity1 = JobEntity()
+    job_entity1.name = "xyz"
+    job_entity1.step_id = "123"
+    job_entity1.task_id = ""
+
+    logger = logging.getLogger()
+
+    # show that an event handler w/no monitored jobs can shutdown
+    mani_handler = ManifestEventHandler("xyz", logger)
+    assert can_shutdown(mani_handler, logger)
+
+    # show that an event handler w/a monitored job cannot shutdown
+    mani_handler = ManifestEventHandler("xyz", logger)
+    mani_handler.job_manager.add_job(
+        job_entity1.name, job_entity1.step_id, job_entity1, False
+    )
+    assert not can_shutdown(mani_handler, logger)
+    assert not bool(mani_handler.job_manager.db_jobs)
+    assert bool(mani_handler.job_manager.jobs)
+
+    # show that an event handler w/a monitored db cannot shutdown
+    mani_handler = ManifestEventHandler("xyz", logger)
+    job_entity1.type = "orchestrator"
+    mani_handler.job_manager.add_job(
+        job_entity1.name, job_entity1.step_id, job_entity1, False
+    )
+    assert not can_shutdown(mani_handler, logger)
+    assert bool(mani_handler.job_manager.db_jobs)
+    assert not bool(mani_handler.job_manager.jobs)
+
+    # show that an event handler w/a dbs & tasks cannot shutdown
+    job_entity2 = JobEntity()
+    job_entity2.name = "xyz"
+    job_entity2.step_id = "123"
+    job_entity2.task_id = ""
+
+    mani_handler = ManifestEventHandler("xyz", logger)
+    job_entity1.type = "orchestrator"
+    mani_handler.job_manager.add_job(
+        job_entity1.name, job_entity1.step_id, job_entity1, False
+    )
+
+    mani_handler.job_manager.add_job(
+        job_entity2.name, job_entity2.step_id, job_entity2, False
+    )
+    assert not can_shutdown(mani_handler, logger)
+    assert bool(mani_handler.job_manager.db_jobs)
+    assert bool(mani_handler.job_manager.jobs)
+
+    # ... now, show that removing 1 of 2 jobs still doesn't shutdown
+    mani_handler.job_manager.db_jobs.popitem()
+    assert not can_shutdown(mani_handler, logger)
+
+    # ... now, show that removing final job will allow shutdown
+    mani_handler.job_manager.jobs.popitem()
+    assert can_shutdown(mani_handler, logger)
+
+
+def test_auto_shutdown():
+    """Ensure that the cooldown timer is respected"""
+
+    class FauxObserver:
+        def __init__(self):
+            self.stop_count = 0
+
+        def stop(self):
+            self.stop_count += 1
+
+        def is_alive(self) -> bool:
+            if self.stop_count > 0:
+                return False
+
+            return True
+
+    job_entity1 = JobEntity()
+    job_entity1.name = "xyz"
+    job_entity1.step_id = "123"
+    job_entity1.task_id = ""
+
+    frequency = 1
+
+    # show that an event handler w/out a monitored task will automatically stop
+    mani_handler = ManifestEventHandler("xyz", logger)
+    observer = FauxObserver()
+    duration = 2
+
+    ts0 = get_ts()
+    event_loop(observer, mani_handler, frequency, logger, cooldown_duration=duration)
+    ts1 = get_ts()
+
+    assert ts1 - ts0 >= duration
+    assert observer.stop_count == 1
+
+    # show that the new cooldown duration is respected
+    mani_handler = ManifestEventHandler("xyz", logger)
+    observer = FauxObserver()
+    duration = 5
+
+    ts0 = get_ts()
+    event_loop(observer, mani_handler, frequency, logger, cooldown_duration=duration)
+    ts1 = get_ts()
+
+    assert ts1 - ts0 >= duration
+    assert observer.stop_count == 1
+
+
+def test_telemetry_single_model(fileutils, wlmutils):
+    """Test that it is possible to create_database then colocate_db_uds/colocate_db_tcp
+    with unique db_identifiers"""
+
+    # Set experiment name
+    exp_name = "telemetry_single_model"
+
+    # Retrieve parameters from testing environment
+    test_launcher = wlmutils.get_test_launcher()
+    test_dir = fileutils.make_test_dir()
+    test_script = fileutils.get_test_conf_path("echo.py")
+
+    # Create SmartSim Experiment
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+
+    # create run settings
+    app_settings = exp.create_run_settings("python", test_script)
+    app_settings.set_nodes(1)
+    app_settings.set_tasks_per_node(1)
+
+    # Create the SmartSim Model
+    smartsim_model = exp.create_model("perroquet", app_settings)
+    exp.generate(smartsim_model)
+    exp.start(smartsim_model, block=True)
+    assert exp.get_status(smartsim_model)[0] == STATUS_COMPLETED
+
+    telemetry_output_path = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+    start_events = list(telemetry_output_path.rglob("start.json"))
+    stop_events = list(telemetry_output_path.rglob("stop.json"))
+
+    assert len(start_events) == 1
+    assert len(stop_events) == 1
+
+
+def test_telemetry_single_model_nonblocking(fileutils, wlmutils, monkeypatch):
+    """Ensure that the telemetry monitor logs exist when the experiment
+    is non-blocking"""
+    with monkeypatch.context() as ctx:
+        ctx.setattr(cfg.Config, "telemetry_frequency", 1)
+
+        # Set experiment name
+        exp_name = "test_telemetry_single_model_nonblocking"
+
+        # Retrieve parameters from testing environment
+        test_launcher = wlmutils.get_test_launcher()
+        test_dir = fileutils.make_test_dir()
+        test_script = fileutils.get_test_conf_path("echo.py")
+
+        # Create SmartSim Experiment
+        exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+
+        # create run settings
+        app_settings = exp.create_run_settings("python", test_script)
+        app_settings.set_nodes(1)
+        app_settings.set_tasks_per_node(1)
+
+        # Create the SmartSim Model
+        smartsim_model = exp.create_model("perroquet", app_settings)
+        exp.generate(smartsim_model)
+        exp.start(smartsim_model)
+
+        snooze_nonblocking(test_dir, max_delay=60, post_data_delay=30)
+
+        assert exp.get_status(smartsim_model)[0] == STATUS_COMPLETED
+
+        telemetry_output_path = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+        start_events = list(telemetry_output_path.rglob("start.json"))
+        stop_events = list(telemetry_output_path.rglob("stop.json"))
+
+        assert len(start_events) == 1
+        assert len(stop_events) == 1
+
+
+def test_telemetry_serial_models(fileutils, wlmutils, monkeypatch):
+    """
+    Test telemetry with models being run in serial (one after each other)
+    """
+    with monkeypatch.context() as ctx:
+        ctx.setattr(cfg.Config, "telemetry_frequency", 1)
+
+        # Set experiment name
+        exp_name = "telemetry_serial_models"
+
+        # Retrieve parameters from testing environment
+        test_launcher = wlmutils.get_test_launcher()
+        test_dir = fileutils.make_test_dir()
+        test_script = fileutils.get_test_conf_path("echo.py")
+
+        # Create SmartSim Experiment
+        exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+
+        # create run settings
+        app_settings = exp.create_run_settings("python", test_script)
+        app_settings.set_nodes(1)
+        app_settings.set_tasks_per_node(1)
+
+        # Create the SmartSim Model
+        smartsim_models = [
+            exp.create_model(f"perroquet_{i}", app_settings) for i in range(5)
+        ]
+        exp.generate(*smartsim_models)
+        exp.start(*smartsim_models, block=True)
+        assert all(
+            [status == STATUS_COMPLETED for status in exp.get_status(*smartsim_models)]
+        )
+
+        telemetry_output_path = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+        start_events = list(telemetry_output_path.rglob("start.json"))
+        stop_events = list(telemetry_output_path.rglob("stop.json"))
+
+        assert len(start_events) == 5
+        assert len(stop_events) == 5
+
+
+def test_telemetry_serial_models_nonblocking(fileutils, wlmutils, monkeypatch):
+    """
+    Test telemetry with models being run in serial (one after each other)
+    in a non-blocking experiment
+    """
+    with monkeypatch.context() as ctx:
+        ctx.setattr(cfg.Config, "telemetry_frequency", 1)
+
+        # Set experiment name
+        exp_name = "telemetry_serial_models"
+
+        # Retrieve parameters from testing environment
+        test_launcher = wlmutils.get_test_launcher()
+        test_dir = fileutils.make_test_dir()
+        test_script = fileutils.get_test_conf_path("echo.py")
+
+        # Create SmartSim Experiment
+        exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+
+        # create run settings
+        app_settings = exp.create_run_settings("python", test_script)
+        app_settings.set_nodes(1)
+        app_settings.set_tasks_per_node(1)
+
+        # Create the SmartSim Model
+        smartsim_models = [
+            exp.create_model(f"perroquet_{i}", app_settings) for i in range(5)
+        ]
+        exp.generate(*smartsim_models)
+        exp.start(*smartsim_models)
+
+        snooze_nonblocking(test_dir, max_delay=60, post_data_delay=10)
+
+        assert all(
+            [status == STATUS_COMPLETED for status in exp.get_status(*smartsim_models)]
+        )
+
+        telemetry_output_path = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+        start_events = list(telemetry_output_path.rglob("start.json"))
+        stop_events = list(telemetry_output_path.rglob("stop.json"))
+
+        assert len(start_events) == 5
+        assert len(stop_events) == 5
+
+
+def test_telemetry_db_only_with_generate(fileutils, wlmutils, monkeypatch):
+    """
+    Test telemetry with only a database running
+    """
+    with monkeypatch.context() as ctx:
+        ctx.setattr(cfg.Config, "telemetry_frequency", 1)
+
+        # Set experiment name
+        exp_name = "telemetry_db_with_generate"
+
+        # Retrieve parameters from testing environment
+        test_launcher = wlmutils.get_test_launcher()
+        test_interface = wlmutils.get_test_interface()
+        test_port = wlmutils.get_test_port()
+        test_dir = fileutils.make_test_dir()
+
+        # Create SmartSim Experiment
+        exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+
+        # create regular database
+        orc = exp.create_database(port=test_port, interface=test_interface)
+        exp.generate(orc)
+        try:
+            exp.start(orc, block=True)
+
+            snooze_nonblocking(test_dir, max_delay=60, post_data_delay=10)
+
+            telemetry_output_path = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+            start_events = list(telemetry_output_path.rglob("start.json"))
+            stop_events = list(telemetry_output_path.rglob("stop.json"))
+
+            assert len(start_events) == 1
+            assert len(stop_events) <= 1
+        finally:
+            exp.stop(orc)
+            snooze_nonblocking(test_dir, max_delay=60, post_data_delay=10)
+
+        assert exp.get_status(orc)[0] == STATUS_CANCELLED
+
+        stop_events = list(telemetry_output_path.rglob("stop.json"))
+        assert len(stop_events) == 1
+
+
+def test_telemetry_db_only_without_generate(fileutils, wlmutils, monkeypatch):
+    """
+    Test telemetry with only a database running
+    """
+    with monkeypatch.context() as ctx:
+        ctx.setattr(cfg.Config, "telemetry_frequency", 1)
+
+        # Set experiment name
+        exp_name = "telemetry_db_only_without_generate"
+
+        # Retrieve parameters from testing environment
+        test_launcher = wlmutils.get_test_launcher()
+        test_interface = wlmutils.get_test_interface()
+        test_port = wlmutils.get_test_port()
+        test_dir = fileutils.make_test_dir()
+
+        # Create SmartSim Experiment
+        exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+
+        # create regular database
+        orc = exp.create_database(port=test_port, interface=test_interface)
+        try:
+            exp.start(orc)
+
+            snooze_nonblocking(test_dir, max_delay=60, post_data_delay=30)
+
+            telemetry_output_path = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+            start_events = list(telemetry_output_path.rglob("start.json"))
+            stop_events = list(telemetry_output_path.rglob("stop.json"))
+
+            assert len(start_events) == 1
+            assert len(stop_events) == 0
+        finally:
+            exp.stop(orc)
+
+        snooze_nonblocking(test_dir, max_delay=60, post_data_delay=10)
+        assert exp.get_status(orc)[0] == STATUS_CANCELLED
+
+        stop_events = list(telemetry_output_path.rglob("stop.json"))
+        assert len(stop_events) == 1
+
+
+def test_telemetry_db_and_model(fileutils, wlmutils, monkeypatch):
+    """
+    Test telemetry with only a database running
+    """
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(cfg.Config, "telemetry_frequency", 1)
+
+        # Set experiment name
+        exp_name = "telemetry_db_and_model"
+
+        # Retrieve parameters from testing environment
+        test_launcher = wlmutils.get_test_launcher()
+        test_interface = wlmutils.get_test_interface()
+        test_port = wlmutils.get_test_port()
+        test_dir = fileutils.make_test_dir()
+        test_script = fileutils.get_test_conf_path("echo.py")
+
+        # Create SmartSim Experiment
+        exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+
+        # create regular database
+        orc = exp.create_database(port=test_port, interface=test_interface)
+        try:
+            exp.start(orc)
+
+            # create run settings
+            app_settings = exp.create_run_settings("python", test_script)
+            app_settings.set_nodes(1)
+            app_settings.set_tasks_per_node(1)
+
+            # Create the SmartSim Model
+            smartsim_model = exp.create_model("perroquet", app_settings)
+            exp.generate(smartsim_model)
+            exp.start(smartsim_model, block=True)
+        finally:
+            exp.stop(orc)
+
+        snooze_nonblocking(test_dir, max_delay=60, post_data_delay=30)
+
+        assert exp.get_status(orc)[0] == STATUS_CANCELLED
+        assert exp.get_status(smartsim_model)[0] == STATUS_COMPLETED
+
+        telemetry_output_path = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+
+        start_events = list(telemetry_output_path.rglob("database/**/start.json"))
+        stop_events = list(telemetry_output_path.rglob("database/**/stop.json"))
+
+        assert len(start_events) == 1
+        assert len(stop_events) == 1
+
+        start_events = list(telemetry_output_path.rglob("model/**/start.json"))
+        stop_events = list(telemetry_output_path.rglob("model/**/stop.json"))
+        assert len(start_events) == 1
+        assert len(stop_events) == 1
+
+
+def test_telemetry_ensemble(fileutils, wlmutils, monkeypatch):
+    """
+    Test telemetry with only a database running
+    """
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(cfg.Config, "telemetry_frequency", 1)
+
+        # Set experiment name
+        exp_name = "telemetry_ensemble"
+
+        # Retrieve parameters from testing environment
+        test_launcher = wlmutils.get_test_launcher()
+        test_dir = fileutils.make_test_dir()
+        test_script = fileutils.get_test_conf_path("echo.py")
+
+        # Create SmartSim Experiment
+        exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+
+        app_settings = exp.create_run_settings("python", test_script)
+        app_settings.set_nodes(1)
+        app_settings.set_tasks_per_node(1)
+
+        ens = exp.create_ensemble("troupeau", run_settings=app_settings, replicas=5)
+        exp.generate(ens)
+        exp.start(ens, block=True)
+        assert all([status == STATUS_COMPLETED for status in exp.get_status(ens)])
+
+        snooze_nonblocking(test_dir, max_delay=60, post_data_delay=30)
+        telemetry_output_path = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+        start_events = list(telemetry_output_path.rglob("start.json"))
+        stop_events = list(telemetry_output_path.rglob("stop.json"))
+
+        assert len(start_events) == 5
+        assert len(stop_events) == 5
+
+
+def test_telemetry_colo(fileutils, wlmutils, coloutils, monkeypatch):
+    """
+    Test telemetry with only a database running
+    """
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(cfg.Config, "telemetry_frequency", 1)
+
+        # Set experiment name
+        exp_name = "telemetry_ensemble"
+
+        # Retrieve parameters from testing environment
+        test_launcher = wlmutils.get_test_launcher()
+        test_dir = fileutils.make_test_dir()
+
+        # Create SmartSim Experiment
+        exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+
+        smartsim_model = coloutils.setup_test_colo(
+            fileutils,
+            "uds",
+            exp,
+            "echo.py",
+            {},
+        )
+
+        exp.generate(smartsim_model)
+        exp.start(smartsim_model, block=True)
+        assert all(
+            [status == STATUS_COMPLETED for status in exp.get_status(smartsim_model)]
+        )
+
+        telemetry_output_path = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+        start_events = list(telemetry_output_path.rglob("start.json"))
+        stop_events = list(telemetry_output_path.rglob("stop.json"))
+
+        # the colodb does NOT show up as a unique entity in the telemetry
+        assert len(start_events) == 1
+        assert len(stop_events) == 1
+
+
+@pytest.mark.parametrize(
+    "frequency, cooldown",
+    [
+        pytest.param(1, 1, id="1s shutdown"),
+        pytest.param(1, 5, id="5s shutdown"),
+        pytest.param(1, 15, id="15s shutdown"),
+    ],
+)
+def test_telemetry_autoshutdown(fileutils, wlmutils, monkeypatch, frequency, cooldown):
+    """
+    Ensure that the telemetry monitor process shuts down after the desired
+    cooldown period
+    """
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(cfg.Config, "telemetry_frequency", frequency)
+        ctx.setattr(cfg.Config, "telemetry_cooldown", cooldown)
+
+        # Set experiment name
+        exp_name = "telemetry_ensemble"
+
+        # Retrieve parameters from testing environment
+        test_launcher = wlmutils.get_test_launcher()
+        test_dir = fileutils.make_test_dir()
+
+        # Create SmartSim Experiment
+        exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+
+        start_time = get_ts()
+        stop_time = start_time
+        exp.start(block=False)
+
+        telemetry_output_path = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+        empty_mani = list(telemetry_output_path.rglob("manifest.json"))
+        assert len(empty_mani) == 1, "an  manifest.json should be created"
+
+        popen = exp._control._telemetry_monitor
+        assert popen.pid > 0
+        assert popen.returncode is None
+
+        # give some leeway during testing for the cooldown to get hit
+        for i in range(10):
+            if popen.poll() is not None:
+                stop_time = get_ts()
+                print(f"Completed polling for telemetry shutdown after {i} attempts")
+                break
+            time.sleep(3)
+
+        assert popen.returncode is not None
+        assert stop_time >= (start_time + cooldown)
+
+
+class MockStep(Step):
+    """Mock step to implement any abstract methods so that it can be
+    instanced for test purposes
+    """
+
+    def get_launch_cmd(self):
+        return ["spam", "eggs"]
+
+
+@pytest.fixture
+def mock_step_meta_dict(fileutils):
+    test_dir = fileutils.make_test_dir()
+    telemetry_output_path = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+    yield {
+        "entity_type": "mock",
+        "status_dir": telemetry_output_path,
+    }
+
+
+@pytest.fixture
+def mock_step(fileutils, mock_step_meta_dict):
+    test_dir = fileutils.make_test_dir()
+    rs = RunSettings("echo")
+    step = MockStep("mock-step", test_dir, rs)
+    step.meta = mock_step_meta_dict
+    yield step
+
+
+def test_proxy_launch_cmd_decorator_reformats_cmds(mock_step, monkeypatch):
+    monkeypatch.setattr(cfg.Config, CFG_TM_ENABLED_ATTR, True)
+    get_launch_cmd = proxyable_launch_cmd(lambda step: ["some", "cmd", "list"])
+    cmd = get_launch_cmd(mock_step)
+    assert cmd != ["some", "cmd", "list"]
+    assert sys.executable in cmd
+    assert PROXY_ENTRY_POINT in cmd
+
+
+def test_proxy_launch_cmd_decorator_does_not_reformat_cmds_if_the_tm_is_off(
+    mock_step, monkeypatch
+):
+    monkeypatch.setattr(cfg.Config, CFG_TM_ENABLED_ATTR, False)
+    get_launch_cmd = proxyable_launch_cmd(lambda step: ["some", "cmd", "list"])
+    cmd = get_launch_cmd(mock_step)
+    assert cmd == ["some", "cmd", "list"]
+
+
+def test_proxy_launch_cmd_decorator_errors_if_attempt_to_proxy_a_managed_step(
+    mock_step, monkeypatch
+):
+    monkeypatch.setattr(cfg.Config, CFG_TM_ENABLED_ATTR, True)
+    mock_step.managed = True
+    get_launch_cmd = proxyable_launch_cmd(lambda step: ["some", "cmd", "list"])
+    with pytest.raises(UnproxyableStepError):
+        get_launch_cmd(mock_step)
+
+
+@for_all_wlm_launchers
+def test_unmanaged_steps_are_proxyed_through_indirect(
+    wlm_launcher, mock_step_meta_dict, fileutils, monkeypatch
+):
+    monkeypatch.setattr(cfg.Config, CFG_TM_ENABLED_ATTR, True)
+    test_dir = fileutils.make_test_dir()
+    rs = RunSettings("echo", ["hello", "world"])
+    step = wlm_launcher.create_step("test-step", test_dir, rs)
+    step.meta = mock_step_meta_dict
+    assert isinstance(step, Step)
+    assert not step.managed
+    cmd = step.get_launch_cmd()
+    assert sys.executable in cmd
+    assert PROXY_ENTRY_POINT in cmd
+    assert "hello" not in cmd
+    assert "world" not in cmd
+
+
+@for_all_wlm_launchers
+def test_unmanaged_steps_are_not_proxied_if_the_telemetry_monitor_is_disabled(
+    wlm_launcher, mock_step_meta_dict, fileutils, monkeypatch
+):
+    monkeypatch.setattr(cfg.Config, CFG_TM_ENABLED_ATTR, False)
+    test_dir = fileutils.make_test_dir()
+    rs = RunSettings("echo", ["hello", "world"])
+    step = wlm_launcher.create_step("test-step", test_dir, rs)
+    step.meta = mock_step_meta_dict
+    assert isinstance(step, Step)
+    assert not step.managed
+    cmd = step.get_launch_cmd()
+    assert PROXY_ENTRY_POINT not in cmd
+    assert "hello" in cmd
+    assert "world" in cmd
+
+
+@requires_wlm
+@pytest.mark.parametrize(
+    "run_command",
+    [
+        pytest.param("", id="Unmanaged"),
+        pytest.param("auto", id="Managed"),
+    ],
+)
+def test_multistart_experiment(
+    wlmutils: WLMUtils,
+    fileutils: FileUtils,
+    monkeypatch: pytest.MonkeyPatch,
+    run_command: str,
+):
+    """Run an experiment with multiple start calls to ensure that telemetry is
+    saved correctly for each run
+    """
+    test_dir = fileutils.make_test_dir(sub_dir=str(uuid.uuid4()))
+
+    exp_name = "my-exp"
+    exp = Experiment(exp_name,
+                     launcher=wlmutils.get_test_launcher(),
+                     exp_path=test_dir)
+    rs_e = exp.create_run_settings(
+        sys.executable, ["printing_model.py"], run_command=run_command
+    )
+    rs_e.set_nodes(1)
+    rs_e.set_tasks(1)
+    ens = exp.create_ensemble(
+        "my-ens",
+        run_settings=rs_e,
+        perm_strategy="all_perm",
+        params={
+            "START": ["spam"],
+            "MID": ["eggs"],
+            "END": ["sausage", "and spam"],
+        },
+    )
+
+    test_script_path = fileutils.get_test_conf_path("printing_model.py")
+    ens.attach_generator_files(to_configure=[test_script_path])
+
+    rs_m = exp.create_run_settings("echo", ["hello", "world"], run_command=run_command)
+    rs_m.set_nodes(1)
+    rs_m.set_tasks(1)
+    model = exp.create_model("my-model", run_settings=rs_m)
+
+    db = exp.create_database(
+        db_nodes=1,
+        port=wlmutils.get_test_port(),
+        interface=wlmutils.get_test_interface(),
+    )
+
+    exp.generate(db, ens, model, overwrite=True)
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(cfg.Config, "telemetry_frequency", 1)
+        ctx.setattr(cfg.Config, "telemetry_cooldown", 45)
+
+        exp.start(model, block=False)
+
+        # track PID to see that telmon cooldown avoids restarting process
+        tm_pid = exp._control._telemetry_monitor.pid
+
+        exp.start(db, block=False)
+        # check that same TM proc is active
+        assert tm_pid == exp._control._telemetry_monitor.pid
+        try:
+            exp.start(ens, block=True, summary=True)
+        finally:
+            exp.stop(db)
+            assert tm_pid == exp._control._telemetry_monitor.pid
+            time.sleep(3)  # time for telmon to write db stop event
+
+    telemetry_output_path = pathlib.Path(test_dir) / serialize.TELMON_SUBDIR
+
+    db_start_events = list(telemetry_output_path.rglob("database/**/start.json"))
+    db_stop_events = list(telemetry_output_path.rglob("database/**/stop.json"))
+    assert len(db_start_events) == 1
+    assert len(db_stop_events) == 1
+
+    m_start_events = list(telemetry_output_path.rglob("model/**/start.json"))
+    m_stop_events = list(telemetry_output_path.rglob("model/**/stop.json"))
+    assert len(m_start_events) == 1
+    assert len(m_stop_events) == 1
+
+    e_start_events = list(telemetry_output_path.rglob("ensemble/**/start.json"))
+    e_stop_events = list(telemetry_output_path.rglob("ensemble/**/stop.json"))
+    assert len(e_start_events) == 2
+    assert len(e_stop_events) == 2
+
+
+@pytest.mark.parametrize(
+    "status_in, expected_out",
+    [
+        pytest.param(STATUS_CANCELLED, 1, id="failure on cancellation"),
+        pytest.param(STATUS_COMPLETED, 0, id="success on completion"),
+        pytest.param(STATUS_FAILED, 1, id="failure on failed"),
+        pytest.param(STATUS_NEW, None, id="failure on new"),
+        pytest.param(STATUS_PAUSED, None, id="failure on paused"),
+        pytest.param(STATUS_RUNNING, None, id="failure on running"),
+    ],
+)
+def test_faux_rc(status_in: str, expected_out: t.Optional[int]):
+    """Ensure faux response codes match expectations."""
+    step_info = StepInfo(status=status_in)
+
+    rc = faux_return_code(step_info)
+    assert rc == expected_out
+
+
+@pytest.mark.parametrize(
+    "status_in, expected_out, expected_has_jobs",
+    [
+        pytest.param(STATUS_CANCELLED, 1, False, id="failure on cancellation"),
+        pytest.param(STATUS_COMPLETED, 0, False, id="success on completion"),
+        pytest.param(STATUS_FAILED, 1, False, id="failure on failed"),
+        pytest.param(STATUS_NEW, None, True, id="failure on new"),
+        pytest.param(STATUS_PAUSED, None, True, id="failure on paused"),
+        pytest.param(STATUS_RUNNING, None, True, id="failure on running"),
+    ],
+)
+def test_wlm_completion_handling(
+    fileutils: FileUtils,
+    monkeypatch: pytest.MonkeyPatch,
+    status_in: str,
+    expected_out: t.Optional[int],
+    expected_has_jobs: bool,
+):
+    test_dir = fileutils.make_test_dir(sub_dir=str(uuid.uuid4()))
+
+    def get_faux_update(status: str) -> t.Callable:
+        def _faux_updates(_self: WLMLauncher, _names: t.List[str]) -> t.List[StepInfo]:
+            return [("faux-name", StepInfo(status=status))]
+        return _faux_updates
+
+    ts = get_ts()
+    with monkeypatch.context() as ctx:
+        # don't actually start a job manager
+        ctx.setattr(JobManager, "start", lambda x: ...)
+        ctx.setattr(SlurmLauncher, "get_step_update", get_faux_update(status_in))
+
+        mani_handler = ManifestEventHandler("xyz", logger)
+        mani_handler.set_launcher("slurm")
+
+        # prep a fake job to request updates for
+        job_entity = JobEntity()
+        job_entity.name = "faux-name"
+        job_entity.step_id = "faux-step-id"
+        job_entity.task_id = 1234
+        job_entity.status_dir = test_dir
+        job_entity.type = "orchestrator"
+
+        job = Job(job_entity.name, job_entity.step_id, job_entity, "slurm", True)
+
+        # populate our tracking collections
+        mani_handler._tracked_jobs = {job_entity.key: job_entity}
+        mani_handler.job_manager.jobs[job.name] = job
+
+        mani_handler.on_timestep(ts)
+
+        # see that the job queue was properly manipulated
+        has_jobs = bool(mani_handler._tracked_jobs)
+        assert expected_has_jobs == has_jobs
+
+        # see that the event was properly written
+        stop_event_path = pathlib.Path(test_dir) / "stop.json"
+
+        # if a status wasn't terminal, no stop event should have been written
+        should_have_stop_event = False if expected_out is None else True
+        assert should_have_stop_event == stop_event_path.exists()