Skip to content

Commit

Permalink
Add support for producing dashboard outputs (#426)
Browse files Browse the repository at this point in the history
Add support for producing & consuming telemetry outputs.

- Adds telemetry monitor to check for updates and produce events for the dashboard
- Updates controller to conditionally start telemetry monitor
- Updates controller to produce a runtime manifest to trigger telemetry collection
- Adds indirect proxy to produce events for the dashboard for unmanaged tasks
- Adds CLI capability to launch dashboard

[ committed by @ankona, @MattToast, @AlyssaCote ]
[ reviewed by @al-rigazzi, @ashao ]

---------

Co-authored-by: Matt Drozt <[email protected]>
Co-authored-by: Alyssa Cote <[email protected]>
  • Loading branch information
3 people authored Dec 6, 2023
1 parent 508cba3 commit d8fba1b
Show file tree
Hide file tree
Showing 87 changed files with 5,735 additions and 302 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,8 @@ smartsim/_core/bin/*-cli

# created upon install
smartsim/_core/lib

**/manifest/
**/*.err
**/*.out
**/.smartsim/*
13 changes: 8 additions & 5 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,10 +380,10 @@ def local_db(
"""Yield fixture for startup and teardown of an local orchestrator"""

exp_name = request.function.__name__
exp = Experiment(exp_name, launcher="local")
test_dir = fileutils.make_test_dir(
caller_function=exp_name, caller_fspath=request.fspath
)
exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
db = Orchestrator(port=wlmutils.get_test_port(), interface="lo")
db.set_path(test_dir)
exp.start(db)
Expand All @@ -402,10 +402,10 @@ def db(
launcher = wlmutils.get_test_launcher()

exp_name = request.function.__name__
exp = Experiment(exp_name, launcher=launcher)
test_dir = fileutils.make_test_dir(
caller_function=exp_name, caller_fspath=request.fspath
)
exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
db = wlmutils.get_orchestrator()
db.set_path(test_dir)
exp.start(db)
Expand All @@ -427,10 +427,10 @@ def db_cluster(
launcher = wlmutils.get_test_launcher()

exp_name = request.function.__name__
exp = Experiment(exp_name, launcher=launcher)
test_dir = fileutils.make_test_dir(
caller_function=exp_name, caller_fspath=request.fspath
)
exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
db = wlmutils.get_orchestrator(nodes=3)
db.set_path(test_dir)
exp.start(db)
Expand Down Expand Up @@ -630,7 +630,7 @@ def get_test_dir_path(dirname: str) -> str:
return dir_path

@staticmethod
def make_test_file(file_name: str, file_dir: t.Optional[str] = None) -> str:
def make_test_file(file_name: str, file_dir: t.Optional[str] = None, file_content: t.Optional[str] = None) -> str:
"""Create a dummy file in the test output directory.
:param file_name: name of file to create, e.g. "file.txt"
Expand All @@ -644,7 +644,10 @@ def make_test_file(file_name: str, file_dir: t.Optional[str] = None) -> str:
file_path = os.path.join(test_dir, file_name)

with open(file_path, "w+", encoding="utf-8") as dummy_file:
dummy_file.write("dummy\n")
if not file_content:
dummy_file.write("dummy\n")
else:
dummy_file.write(file_content)

return file_path

Expand Down
6 changes: 6 additions & 0 deletions doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@
sr_runtime
api/smartredis_api

.. toctree::
:maxdepth: 2
:caption: SmartDashboard

smartdashboard

.. toctree::
:maxdepth: 2
:caption: Reference
Expand Down
7 changes: 7 additions & 0 deletions doc/smartdashboard.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

**************
SmartDashboard
**************

.. include:: ../smartdashboard/doc/overview.rst
:start-line: 4
6 changes: 6 additions & 0 deletions docker/docs/dev/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ RUN git clone https://github.com/CrayLabs/SmartRedis.git --branch develop --dept
&& python -m pip install . \
&& rm -rf ~/.cache/pip

# Install smartdashboard
RUN git clone https://github.com/CrayLabs/SmartDashboard.git --branch develop --depth=1 smartdashboard \
&& cd smartdashboard \
&& python -m pip install . \
&& rm -rf ~/.cache/pip

RUN cd doc/tutorials/ && \
ln -s ../../tutorials/* .

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ module = [
"keras",
"torch",
"smartsim.ml.torch.*", # must solve/ignore inheritance issues
"watchdog",
]
ignore_missing_imports = true
ignore_errors = true
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def has_ext_modules(_placeholder):
"tqdm>=4.50.2",
"filelock>=3.4.2",
"protobuf~=3.20",
"watchdog>=3.0.0",
]

# Add SmartRedis at specific version
Expand Down
19 changes: 18 additions & 1 deletion smartsim/_core/_cli/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,31 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys

from smartsim._core._cli.cli import default_cli
from smartsim._core._cli.utils import SMART_LOGGER_FORMAT
from smartsim.error.errors import SmartSimCLIActionCancelled
from smartsim.log import get_logger


logger = get_logger("Smart", fmt=SMART_LOGGER_FORMAT)


def main() -> int:
smart_cli = default_cli()
return smart_cli.execute(sys.argv)
exception_trace_back_msg = "SmartSim exited with the following exception info:"

try:
return smart_cli.execute(sys.argv)
except SmartSimCLIActionCancelled as ssi:
logger.info(str(ssi))
logger.debug(exception_trace_back_msg, exc_info=ssi)
except KeyboardInterrupt as e:
logger.info("SmartSim was terminated by user")
logger.debug(exception_trace_back_msg, exc_info=e)
return os.EX_OK


if __name__ == "__main__":
Expand Down
10 changes: 6 additions & 4 deletions smartsim/_core/_cli/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,9 @@ def _format_incompatible_python_env_message(
)


def execute(args: argparse.Namespace) -> int:
def execute(
args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
) -> int:
verbose = args.v
keydb = args.keydb
device: _TDeviceStr = args.device
Expand Down Expand Up @@ -416,7 +418,7 @@ def execute(args: argparse.Namespace) -> int:
)
except (SetupError, BuildError) as e:
logger.error(str(e))
return 1
return os.EX_SOFTWARE

backends = installed_redisai_backends()
backends_str = ", ".join(s.capitalize() for s in backends) if backends else "No"
Expand All @@ -431,10 +433,10 @@ def execute(args: argparse.Namespace) -> int:
check_py_onnx_version(versions)
except (SetupError, BuildError) as e:
logger.error(str(e))
return 1
return os.EX_SOFTWARE

logger.info("SmartSim build complete!")
return 0
return os.EX_OK


def configure_parser(parser: argparse.ArgumentParser) -> None:
Expand Down
9 changes: 7 additions & 2 deletions smartsim/_core/_cli/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import typing as t

from smartsim._core._cli.utils import clean, get_install_path

Expand All @@ -39,10 +40,14 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
)


def execute(args: argparse.Namespace) -> int:
def execute(
args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
) -> int:
return clean(get_install_path() / "_core", _all=args.clobber)


def execute_all(args: argparse.Namespace) -> int:
def execute_all(
args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
) -> int:
args.clobber = True
return execute(args)
57 changes: 40 additions & 17 deletions smartsim/_core/_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os
import typing as t

from smartsim._core._cli.build import configure_parser as build_parser
Expand All @@ -41,46 +42,68 @@
execute as validate_execute,
configure_parser as validate_parser,
)
from smartsim._core._cli.plugin import plugins
from smartsim._core._cli.utils import MenuItemConfig


class SmartCli:
def __init__(self, menu: t.List[MenuItemConfig]) -> None:
self.menu: t.Dict[str, MenuItemConfig] = {item.command: item for item in menu}
parser = argparse.ArgumentParser(
self.menu: t.Dict[str, MenuItemConfig] = {}
self.parser = argparse.ArgumentParser(
prog="smart",
description="SmartSim command line interface",
)
self.parser = parser
self.args: t.Optional[argparse.Namespace] = None

subparsers = parser.add_subparsers(
self.subparsers = self.parser.add_subparsers(
dest="command",
required=True,
metavar="<command>",
help="Available commands",
)

for cmd, item in self.menu.items():
parser = subparsers.add_parser(
cmd, description=item.description, help=item.description
)
if item.configurator:
item.configurator(parser)
self.register_menu_items(menu)
self.register_menu_items([plugin() for plugin in plugins])

def execute(self, cli_args: t.List[str]) -> int:
if len(cli_args) < 2:
self.parser.print_help()
return 0
return os.EX_USAGE

app_args = cli_args[1:]
self.args = self.parser.parse_args(app_args)
app_args = cli_args[1:] # exclude the path to executable
subcommand = cli_args[1] # first positional arg is the subcommand

if not (menu_item := self.menu.get(app_args[0], None)):
menu_item = self.menu.get(subcommand, None)
if not menu_item:
self.parser.print_help()
return 0
return os.EX_USAGE

args = argparse.Namespace()
unparsed_args = []

if menu_item.is_plugin:
unparsed_args = app_args[1:]
else:
args = self.parser.parse_args(app_args)

return menu_item.handler(args, unparsed_args)

def _register_menu_item(self, item: MenuItemConfig) -> None:
parser = self.subparsers.add_parser(
item.command, description=item.description, help=item.description
)
if item.configurator:
item.configurator(parser)

if item.command in self.menu:
raise ValueError(
f"{item.command} cannot overwrite existing CLI command"
)

self.menu[item.command] = item

return menu_item.handler(self.args)
def register_menu_items(self, menu_items: t.List[MenuItemConfig]) -> None:
for item in menu_items:
self._register_menu_item(item)


def default_cli() -> SmartCli:
Expand Down
10 changes: 7 additions & 3 deletions smartsim/_core/_cli/dbcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,17 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import os
import typing as t

from smartsim._core._cli.utils import get_db_path


def execute(_args: argparse.Namespace) -> int:
def execute(
_args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
) -> int:
if db_path := get_db_path():
print(db_path)
return 0
return os.EX_OK
print("Database (Redis or KeyDB) dependencies not found")
return 1
return os.EX_SOFTWARE
7 changes: 5 additions & 2 deletions smartsim/_core/_cli/info.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import importlib.metadata
import os
import pathlib
import typing as t

Expand All @@ -12,7 +13,9 @@
_MISSING_DEP = _helpers.colorize("Not Installed", "red")


def execute(_args: argparse.Namespace, /) -> int:
def execute(
_args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
) -> int:
print("\nSmart Python Packages:")
print(
tabulate(
Expand Down Expand Up @@ -66,7 +69,7 @@ def execute(_args: argparse.Namespace, /) -> int:
),
end="\n\n",
)
return 0
return os.EX_OK


def _fmt_installed_db(db_path: t.Optional[pathlib.Path]) -> str:
Expand Down
Loading

0 comments on commit d8fba1b

Please sign in to comment.