diff --git a/conftest.py b/conftest.py index 3facd09a9..c407681d7 100644 --- a/conftest.py +++ b/conftest.py @@ -51,8 +51,8 @@ from smartsim import Experiment from smartsim._core.config import CONFIG from smartsim._core.config.config import Config -from smartsim._core.launcher.dragon.dragonConnector import DragonConnector -from smartsim._core.launcher.dragon.dragonLauncher import DragonLauncher +from smartsim._core.launcher.dragon.dragon_connector import DragonConnector +from smartsim._core.launcher.dragon.dragon_launcher import DragonLauncher from smartsim._core.utils.telemetry.telemetry import JobEntity from smartsim.database import FeatureStore from smartsim.entity import Application diff --git a/doc/tutorials/doc_examples/experiment_doc_examples/exp.py b/doc/tutorials/doc_examples/experiment_doc_examples/exp.py index b5374e7bd..b4b4e0100 100644 --- a/doc/tutorials/doc_examples/experiment_doc_examples/exp.py +++ b/doc/tutorials/doc_examples/experiment_doc_examples/exp.py @@ -1,5 +1,5 @@ from smartsim import Experiment -from smartsim._core.control.previewrenderer import Verbosity +from smartsim._core.control.preview_renderer import Verbosity from smartsim.log import get_logger # Initialize an Experiment diff --git a/pyproject.toml b/pyproject.toml index c62a849be..5b81676a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,10 +147,8 @@ module = [ # FIXME: DO NOT MERGE THIS INTO DEVELOP BRANCH UNLESS THESE ARE PASSING OR # REMOVED!! "smartsim._core._cli.*", - "smartsim._core.control.controller", "smartsim._core.control.manifest", "smartsim._core.entrypoints.dragon_client", - "smartsim._core.launcher.colocated", "smartsim._core.launcher.launcher", "smartsim._core.launcher.local.*", "smartsim._core.launcher.lsf.*", @@ -158,13 +156,13 @@ module = [ "smartsim._core.launcher.sge.*", "smartsim._core.launcher.slurm.*", "smartsim._core.launcher.step.*", - "smartsim._core.launcher.stepInfo", - "smartsim._core.launcher.stepMapping", - "smartsim._core.launcher.taskManager", + "smartsim._core.launcher.step_info", + "smartsim._core.launcher.step_mapping", + "smartsim._core.launcher.task_manager", "smartsim._core.utils.serialize", "smartsim._core.utils.telemetry.*", "smartsim.database.*", - "smartsim.settings.sgeSettings", + "smartsim.settings.sge_settings", "smartsim._core.control.controller_utils", "smartsim.entity.dbnode", ] diff --git a/smartsim/_core/__init__.py b/smartsim/_core/__init__.py index 490078770..ee8d3cc96 100644 --- a/smartsim/_core/__init__.py +++ b/smartsim/_core/__init__.py @@ -24,7 +24,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from .control import Controller, Manifest, previewrenderer +from .control import Manifest, preview_renderer from .generation import Generator -__all__ = ["Controller", "Manifest", "Generator", "previewrenderer"] +__all__ = ["Manifest", "Generator", "preview_renderer"] diff --git a/smartsim/_core/arguments/shell.py b/smartsim/_core/arguments/shell.py index 184d242cb..e4138d0eb 100644 --- a/smartsim/_core/arguments/shell.py +++ b/smartsim/_core/arguments/shell.py @@ -30,7 +30,7 @@ from abc import abstractmethod from smartsim.log import get_logger -from smartsim.settings.arguments.launchArguments import LaunchArguments +from smartsim.settings.arguments.launch_arguments import LaunchArguments logger = get_logger(__name__) diff --git a/smartsim/_core/commands/__init__.py b/smartsim/_core/commands/__init__.py index 72ef1f674..a35efc62f 100644 --- a/smartsim/_core/commands/__init__.py +++ b/smartsim/_core/commands/__init__.py @@ -25,5 +25,5 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from .command import Command -from .commandList import CommandList -from .launchCommands import LaunchCommands +from .command_list import CommandList +from .launch_commands import LaunchCommands diff --git a/smartsim/_core/commands/commandList.py b/smartsim/_core/commands/command_list.py similarity index 100% rename from smartsim/_core/commands/commandList.py rename to smartsim/_core/commands/command_list.py diff --git a/smartsim/_core/commands/launchCommands.py b/smartsim/_core/commands/launch_commands.py similarity index 97% rename from smartsim/_core/commands/launchCommands.py rename to smartsim/_core/commands/launch_commands.py index c62186671..74303ac94 100644 --- a/smartsim/_core/commands/launchCommands.py +++ b/smartsim/_core/commands/launch_commands.py @@ -1,4 +1,4 @@ -from .commandList import CommandList +from .command_list import CommandList class LaunchCommands: diff --git a/smartsim/_core/control/__init__.py b/smartsim/_core/control/__init__.py index 0acd80650..ba3af1440 100644 --- a/smartsim/_core/control/__init__.py +++ b/smartsim/_core/control/__init__.py @@ -24,5 +24,4 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from .controller import Controller from .manifest import Manifest diff --git a/smartsim/_core/control/controller.py b/smartsim/_core/control/controller.py deleted file mode 100644 index 5c1a4da3e..000000000 --- a/smartsim/_core/control/controller.py +++ /dev/null @@ -1,1018 +0,0 @@ -# BSD 2-Clause License -# -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from __future__ import annotations - -import itertools -import os -import os.path as osp -import pathlib -import pickle -import signal -import subprocess -import sys -import threading -import time -import typing as t - -from smartsim._core.utils.network import get_ip_from_host -from smartsim.entity._mock import Mock - -from ..._core.launcher.step import Step -from ..._core.utils.helpers import ( - SignalInterceptionStack, - unpack_colo_fs_identifier, - unpack_fs_identifier, -) -from ...database import FeatureStore -from ...entity import Application, Ensemble, EntitySequence, SmartSimEntity -from ...error import ( - LauncherError, - SmartSimError, - SSDBIDConflictError, - SSInternalError, - SSUnsupportedError, -) -from ...log import get_logger -from ...servertype import CLUSTERED, STANDALONE -from ...status import TERMINAL_STATUSES, JobStatus -from ..config import CONFIG -from ..launcher import ( - DragonLauncher, - LocalLauncher, - LSFLauncher, - PBSLauncher, - SGELauncher, - SlurmLauncher, -) -from ..launcher.launcher import Launcher -from ..utils import serialize -from .controller_utils import _AnonymousBatchJob, _look_up_launched_data -from .job import Job -from .jobmanager import JobManager -from .manifest import LaunchedManifest, LaunchedManifestBuilder, Manifest - -if t.TYPE_CHECKING: - from types import FrameType - - from ..utils.serialize import TStepLaunchMetaData - - -logger = get_logger(__name__) - -# job manager lock -JM_LOCK = threading.RLock() - - -class Client(Mock): - """Mock Client""" - - -class ConfigOptions(Mock): - """Mock ConfigOptions""" - - -def fs_is_active(): - pass - - -def set_ml_model(): - pass - - -def set_script(): - pass - - -def shutdown_fs_node(): - pass - - -def create_cluster(): - pass - - -def check_cluster_status(): - pass - - -class Controller: - """The controller module provides an interface between the - smartsim entities created in the experiment and the - underlying workload manager or run framework. - """ - - def __init__(self, launcher: str = "local") -> None: - """Initialize a Controller - - :param launcher: the type of launcher being used - """ - self._jobs = JobManager(JM_LOCK) - self.init_launcher(launcher) - self._telemetry_monitor: t.Optional[subprocess.Popen[bytes]] = None - - def start( - self, - exp_name: str, - exp_path: str, - manifest: Manifest, - block: bool = True, - kill_on_interrupt: bool = True, - ) -> None: - """Start the passed SmartSim entities - - This function should not be called directly, but rather - through the experiment interface. - - The controller will start the job-manager thread upon - execution of all jobs. - """ - # launch a telemetry monitor to track job progress - if CONFIG.telemetry_enabled: - self._start_telemetry_monitor(exp_path) - - self._jobs.kill_on_interrupt = kill_on_interrupt - - # register custom signal handler for ^C (SIGINT) - SignalInterceptionStack.get(signal.SIGINT).push_unique( - self._jobs.signal_interrupt - ) - launched = self._launch(exp_name, exp_path, manifest) - - # start the job manager thread if not already started - if not self._jobs.actively_monitoring: - self._jobs.start() - - serialize.save_launch_manifest( - launched.map(_look_up_launched_data(self._launcher)) - ) - - # block until all non-feature store jobs are complete - if block: - # poll handles its own keyboard interrupt as - # it may be called separately - self.poll(5, True, kill_on_interrupt=kill_on_interrupt) - - @property - def active_feature_store_jobs(self) -> t.Dict[str, Job]: - """Return active feature store jobs.""" - return {**self._jobs.fs_jobs} - - @property - def feature_store_active(self) -> bool: - with JM_LOCK: - if len(self._jobs.fs_jobs) > 0: - return True - return False - - def poll( - self, interval: int, verbose: bool, kill_on_interrupt: bool = True - ) -> None: - """Poll running jobs and receive logging output of job status - - :param interval: number of seconds to wait before polling again - :param verbose: set verbosity - :param kill_on_interrupt: flag for killing jobs when SIGINT is received - """ - self._jobs.kill_on_interrupt = kill_on_interrupt - to_monitor = self._jobs.jobs - while len(to_monitor) > 0: - time.sleep(interval) - - # acquire lock to avoid "dictionary changed during iteration" error - # without having to copy dictionary each time. - if verbose: - with JM_LOCK: - for job in to_monitor.values(): - logger.info(job) - - def finished( - self, entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]] - ) -> bool: - """Return a boolean indicating wether a job has finished or not - - :param entity: object launched by SmartSim. - :returns: bool - :raises ValueError: if entity has not been launched yet - """ - try: - if isinstance(entity, FeatureStore): - raise TypeError("Finished() does not support FeatureStore instances") - if isinstance(entity, EntitySequence): - return all(self.finished(ent) for ent in entity.entities) - if not isinstance(entity, SmartSimEntity): - raise TypeError( - f"Argument was of type {type(entity)} not derived " - "from SmartSimEntity or EntitySequence" - ) - - return self._jobs.is_finished(entity) - except KeyError: - raise ValueError( - f"Entity {entity.name} has not been launched in this experiment" - ) from None - - def stop_entity( - self, entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]] - ) -> None: - """Stop an instance of an entity - - This function will also update the status of the job in - the jobmanager so that the job appears as "cancelled". - - :param entity: entity to be stopped - """ - with JM_LOCK: - job = self._jobs[entity.name] - if job.status not in TERMINAL_STATUSES: - logger.info( - " ".join( - ( - "Stopping application", - entity.name, - "with job name", - str(job.name), - ) - ) - ) - status = self._launcher.stop(job.name) - - job.set_status( - status.status, - status.launcher_status, - status.returncode, - error=status.error, - output=status.output, - ) - self._jobs.move_to_completed(job) - - def stop_fs(self, fs: FeatureStore) -> None: - """Stop an FeatureStore - - :param fs: FeatureStore to be stopped - """ - if fs.batch: - self.stop_entity(fs) - else: - with JM_LOCK: - for node in fs.entities: - for host_ip, port in itertools.product( - (get_ip_from_host(host) for host in node.hosts), fs.ports - ): - retcode, _, _ = shutdown_fs_node(host_ip, port) - # Sometimes the fs will not shutdown (unless we force NOSAVE) - if retcode != 0: - self.stop_entity(node) - continue - - job = self._jobs[node.name] - job.set_status( - JobStatus.CANCELLED, - "", - 0, - output=None, - error=None, - ) - self._jobs.move_to_completed(job) - - fs.reset_hosts() - - def stop_entity_list(self, entity_list: EntitySequence[SmartSimEntity]) -> None: - """Stop an instance of an entity list - - :param entity_list: entity list to be stopped - """ - - if entity_list.batch: - self.stop_entity(entity_list) - else: - for entity in entity_list.entities: - self.stop_entity(entity) - - def get_jobs(self) -> t.Dict[str, Job]: - """Return a dictionary of completed job data - - :returns: dict[str, Job] - """ - with JM_LOCK: - return self._jobs.completed - - def get_entity_status( - self, entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]] - ) -> JobStatus: - """Get the status of an entity - - :param entity: entity to get status of - :raises TypeError: if not SmartSimEntity | EntitySequence - :return: status of entity - """ - if not isinstance(entity, (SmartSimEntity, EntitySequence)): - raise TypeError( - "Argument must be of type SmartSimEntity or EntitySequence, " - f"not {type(entity)}" - ) - return self._jobs.get_status(entity) - - def get_entity_list_status( - self, entity_list: EntitySequence[SmartSimEntity] - ) -> t.List[JobStatus]: - """Get the statuses of an entity list - - :param entity_list: entity list containing entities to - get statuses of - :raises TypeError: if not EntitySequence - :return: list of SmartSimStatus statuses - """ - if not isinstance(entity_list, EntitySequence): - raise TypeError( - f"Argument was of type {type(entity_list)} not EntitySequence" - ) - if entity_list.batch: - return [self.get_entity_status(entity_list)] - statuses = [] - for entity in entity_list.entities: - statuses.append(self.get_entity_status(entity)) - return statuses - - def init_launcher(self, launcher: str) -> None: - """Initialize the controller with a specific type of launcher. - SmartSim currently supports slurm, pbs(pro), lsf, - and local launching - - :param launcher: which launcher to initialize - :raises SSUnsupportedError: if a string is passed that is not - a supported launcher - :raises TypeError: if no launcher argument is provided. - """ - launcher_map: t.Dict[str, t.Type[Launcher]] = { - "slurm": SlurmLauncher, - "pbs": PBSLauncher, - "pals": PBSLauncher, - "lsf": LSFLauncher, - "local": LocalLauncher, - "dragon": DragonLauncher, - "sge": SGELauncher, - } - - if launcher is not None: - launcher = launcher.lower() - if launcher in launcher_map: - # create new instance of the launcher - self._launcher = launcher_map[launcher]() - self._jobs.set_launcher(self._launcher) - else: - raise SSUnsupportedError("Launcher type not supported: " + launcher) - else: - raise TypeError("Must provide a 'launcher' argument") - - @staticmethod - def symlink_output_files( - job_step: Step, entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]] - ) -> None: - """Create symlinks for entity output files that point to the output files - under the .smartsim directory - - :param job_step: Job step instance - :param entity: Entity instance - """ - historical_out, historical_err = map(pathlib.Path, job_step.get_output_files()) - entity_out = pathlib.Path(entity.path) / f"{entity.name}.out" - entity_err = pathlib.Path(entity.path) / f"{entity.name}.err" - - # check if there is already a link to a previous run - if entity_out.is_symlink() or entity_err.is_symlink(): - entity_out.unlink() - entity_err.unlink() - - historical_err.touch() - historical_out.touch() - - if historical_err.exists() and historical_out.exists(): - entity_out.symlink_to(historical_out) - entity_err.symlink_to(historical_err) - else: - raise FileNotFoundError( - f"Output files for {entity.name} could not be found. " - "Symlinking files failed." - ) - - def _launch( - self, exp_name: str, exp_path: str, manifest: Manifest - ) -> LaunchedManifest[t.Tuple[str, Step]]: - """Main launching function of the controller - - FeatureStores are always launched first so that the - address of the feature store can be given to following entities - - :param exp_name: The name of the launching experiment - :param exp_path: path to location of ``Experiment`` directory if generated - :param manifest: Manifest of deployables to launch - """ - - manifest_builder = LaunchedManifestBuilder[t.Tuple[str, Step]]( - exp_name=exp_name, - exp_path=exp_path, - launcher_name=str(self._launcher), - ) - # Loop over deployables to launch and launch multiple FeatureStores - for featurestore in manifest.fss: - for key in self._jobs.get_fs_host_addresses(): - _, fs_id = unpack_fs_identifier(key, "_") - if featurestore.fs_identifier == fs_id: - raise SSDBIDConflictError( - f"Feature store identifier {featurestore.fs_identifier}" - " has already been used. Pass in a unique" - " name for fs_identifier" - ) - - if featurestore.num_shards > 1 and isinstance( - self._launcher, LocalLauncher - ): - raise SmartSimError( - "Local launcher does not support multi-host feature stores" - ) - self._launch_feature_store(featurestore, manifest_builder) - - if self.feature_store_active: - self._set_fsobjects(manifest) - - # create all steps prior to launch - steps: t.List[ - t.Tuple[Step, t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]]] - ] = [] - - symlink_substeps: t.List[ - t.Tuple[Step, t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]]] - ] = [] - - for elist in manifest.ensembles: - ens_telem_dir = manifest_builder.run_telemetry_subdirectory / "ensemble" - if elist.batch: - batch_step, substeps = self._create_batch_job_step(elist, ens_telem_dir) - manifest_builder.add_ensemble( - elist, [(batch_step.name, step) for step in substeps] - ) - - # symlink substeps to maintain directory structure - for substep, substep_entity in zip(substeps, elist.applications): - symlink_substeps.append((substep, substep_entity)) - - steps.append((batch_step, elist)) - else: - # if ensemble is to be run as separate job steps, aka not in a batch - job_steps = [ - (self._create_job_step(e, ens_telem_dir / elist.name), e) - for e in elist.entities - ] - manifest_builder.add_ensemble( - elist, [(step.name, step) for step, _ in job_steps] - ) - steps.extend(job_steps) - # applications themselves cannot be batch steps. If batch settings are - # attached, wrap them in an anonymous batch job step - for application in manifest.applications: - application_telem_dir = ( - manifest_builder.run_telemetry_subdirectory / "application" - ) - if application.batch_settings: - anon_entity_list = _AnonymousBatchJob(application) - batch_step, substeps = self._create_batch_job_step( - anon_entity_list, application_telem_dir - ) - manifest_builder.add_application( - application, (batch_step.name, batch_step) - ) - - symlink_substeps.append((substeps[0], application)) - steps.append((batch_step, application)) - else: - # create job step for aapplication with run settings - job_step = self._create_job_step(application, application_telem_dir) - manifest_builder.add_application(application, (job_step.name, job_step)) - steps.append((job_step, application)) - - # launch and symlink steps - for step, entity in steps: - self._launch_step(step, entity) - self.symlink_output_files(step, entity) - - # symlink substeps to maintain directory structure - for substep, entity in symlink_substeps: - self.symlink_output_files(substep, entity) - - return manifest_builder.finalize() - - def _launch_feature_store( - self, - featurestore: FeatureStore, - manifest_builder: LaunchedManifestBuilder[t.Tuple[str, Step]], - ) -> None: - """Launch an FeatureStore instance - - This function will launch the FeatureStore instance and - if on WLM, find the nodes where it was launched and - set them in the JobManager - - :param featurestore: FeatureStore to launch - :param manifest_builder: An `LaunchedManifestBuilder` to record the - names and `Step`s of the launched featurestore - """ - featurestore.remove_stale_files() - feature_store_telem_dir = ( - manifest_builder.run_telemetry_subdirectory / "database" - ) - - # if the featurestore was launched as a batch workload - if featurestore.batch: - feature_store_batch_step, substeps = self._create_batch_job_step( - featurestore, feature_store_telem_dir - ) - manifest_builder.add_feature_store( - featurestore, - [(feature_store_batch_step.name, step) for step in substeps], - ) - - self._launch_step(feature_store_batch_step, featurestore) - self.symlink_output_files(feature_store_batch_step, featurestore) - - # symlink substeps to maintain directory structure - for substep, substep_entity in zip(substeps, featurestore.entities): - self.symlink_output_files(substep, substep_entity) - - # if featurestore was run on existing allocation, locally, or in allocation - else: - fs_steps = [ - ( - self._create_job_step( - fs, feature_store_telem_dir / featurestore.name - ), - fs, - ) - for fs in featurestore.entities - ] - manifest_builder.add_feature_store( - featurestore, [(step.name, step) for step, _ in fs_steps] - ) - for fs_step in fs_steps: - self._launch_step(*fs_step) - self.symlink_output_files(*fs_step) - - # wait for featurestore to spin up - self._feature_store_launch_wait(featurestore) - - # set the jobs in the job manager to provide SSDB variable to entities - # if _host isnt set within each - self._jobs.set_fs_hosts(featurestore) - - # create the feature store cluster - if featurestore.num_shards > 2: - num_trials = 5 - cluster_created = False - while not cluster_created: - try: - create_cluster(featurestore.hosts, featurestore.ports) - check_cluster_status(featurestore.hosts, featurestore.ports) - num_shards = featurestore.num_shards - logger.info( - f"Feature store cluster created with {num_shards} shards" - ) - cluster_created = True - except SSInternalError: - if num_trials > 0: - logger.debug( - "Cluster creation failed, attempting again in five seconds." - ) - num_trials -= 1 - time.sleep(5) - else: - # surface SSInternalError as we have no way to recover - raise - self._save_feature_store(featurestore) - logger.debug(f"FeatureStore launched on nodes: {featurestore.hosts}") - - def _launch_step( - self, - job_step: Step, - entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]], - ) -> None: - """Use the launcher to launch a job step - - :param job_step: a job step instance - :param entity: entity instance - :raises SmartSimError: if launch fails - """ - # attempt to retrieve entity name in JobManager.completed - completed_job = self._jobs.completed.get(entity.name, None) - - # if completed job DNE and is the entity name is not - # running in JobManager.jobs or JobManager.fs_jobs, - # launch the job - if completed_job is None and ( - entity.name not in self._jobs.jobs and entity.name not in self._jobs.fs_jobs - ): - try: - job_id = self._launcher.run(job_step) - except LauncherError as e: - msg = f"An error occurred when launching {entity.name} \n" - msg += "Check error and output files for details.\n" - msg += f"{entity}" - logger.error(msg) - raise SmartSimError(f"Job step {entity.name} failed to launch") from e - - # if the completed job does exist and the entity passed in is the same - # that has ran and completed, relaunch the entity. - elif completed_job is not None and completed_job.entity is entity: - try: - job_id = self._launcher.run(job_step) - except LauncherError as e: - msg = f"An error occurred when launching {entity.name} \n" - msg += "Check error and output files for details.\n" - msg += f"{entity}" - logger.error(msg) - raise SmartSimError(f"Job step {entity.name} failed to launch") from e - - # the entity is using a duplicate name of an existing entity in - # the experiment, throw an error - else: - raise SSUnsupportedError("SmartSim entities cannot have duplicate names.") - - # a job step is a task if it is not managed by a workload manager (i.e. Slurm) - # but is rather started, monitored, and exited through the Popen interface - # in the taskmanager - is_task = not job_step.managed - - if self._jobs.query_restart(entity.name): - logger.debug(f"Restarting {entity.name}") - self._jobs.restart_job(job_step.name, job_id, entity.name, is_task) - else: - logger.debug(f"Launching {entity.name}") - self._jobs.add_job(job_step, job_id, is_task) - - def _create_batch_job_step( - self, - entity_list: t.Union[FeatureStore, Ensemble, _AnonymousBatchJob], - telemetry_dir: pathlib.Path, - ) -> t.Tuple[Step, t.List[Step]]: - """Use launcher to create batch job step - - :param entity_list: EntityList to launch as batch - :param telemetry_dir: Path to a directory in which the batch job step - may write telemetry events - :return: batch job step instance and a list of run steps to be - executed within the batch job - """ - if not entity_list.batch_settings: - raise ValueError( - "EntityList must have batch settings to be launched as batch" - ) - - telemetry_dir = telemetry_dir / entity_list.name - batch_step = self._launcher.create_step(entity, entity_list.batch_settings) - batch_step.meta["entity_type"] = str(type(entity_list).__name__).lower() - batch_step.meta["status_dir"] = str(telemetry_dir) - - substeps = [] - for entity in entity_list.entities: - # tells step creation not to look for an allocation - entity.run_settings.in_batch = True - step = self._create_job_step(entity, telemetry_dir) - substeps.append(step) - batch_step.add_to_batch(step) - return batch_step, substeps - - def _create_job_step( - self, entity: SmartSimEntity, telemetry_dir: pathlib.Path - ) -> Step: - """Create job steps for all entities with the launcher - - :param entity: an entity to create a step for - :param telemetry_dir: Path to a directory in which the job step - may write telemetry events - :return: the job step - """ - # get SSDB, SSIN, SSOUT and add to entity run settings - if isinstance(entity, Application): - self._prep_entity_client_env(entity) - - # creating job step through the created launcher - step = self._launcher.create_step(entity, entity.run_settings) - - step.meta["entity_type"] = str(type(entity).__name__).lower() - step.meta["status_dir"] = str(telemetry_dir / entity.name) - - # return the job step that was created using the launcher since the launcher is defined in the exp - return step - - def _prep_entity_client_env(self, entity: Application) -> None: - """Retrieve all connections registered to this entity - - :param entity: The entity to retrieve connections from - """ - client_env: t.Dict[str, t.Union[str, int, float, bool]] = {} - address_dict = self._jobs.get_fs_host_addresses() - - for fs_id, addresses in address_dict.items(): - fs_name, _ = unpack_fs_identifier(fs_id, "_") - if addresses: - # Cap max length of SSDB - client_env[f"SSDB{fs_name}"] = ",".join(addresses[:128]) - - # Retrieve num_shards to append to client env - client_env[f"SR_fs_TYPE{fs_name}"] = ( - CLUSTERED if len(addresses) > 1 else STANDALONE - ) - - if entity.incoming_entities: - client_env["SSKEYIN"] = ",".join( - [in_entity.name for in_entity in entity.incoming_entities] - ) - if entity.query_key_prefixing(): - client_env["SSKEYOUT"] = entity.name - - # Set address to local if it's a colocated application - if entity.colocated and entity.run_settings.colocated_fs_settings is not None: - fs_name_colo = entity.run_settings.colocated_fs_settings["fs_identifier"] - assert isinstance(fs_name_colo, str) - for key in address_dict: - _, fs_id = unpack_fs_identifier(key, "_") - if fs_name_colo == fs_id: - raise SSDBIDConflictError( - f"Feature store identifier {fs_name_colo}" - " has already been used. Pass in a unique" - " name for fs_identifier" - ) - - fs_name_colo = unpack_colo_fs_identifier(fs_name_colo) - if colo_cfg := entity.run_settings.colocated_fs_settings: - port = colo_cfg.get("port", None) - socket = colo_cfg.get("unix_socket", None) - if socket and port: - raise SSInternalError( - "Co-located was configured for both TCP/IP and UDS" - ) - if port: - client_env[f"SSDB{fs_name_colo}"] = f"127.0.0.1:{str(port)}" - elif socket: - client_env[f"SSDB{fs_name_colo}"] = f"unix://{socket}" - else: - raise SSInternalError( - "Colocated feature store was not configured for either TCP or UDS" - ) - client_env[f"SR_fs_TYPE{fs_name_colo}"] = STANDALONE - entity.run_settings.update_env(client_env) - - def _save_feature_store(self, feature_store: FeatureStore) -> None: - """Save the FeatureStore object via pickle - - This function saves the feature store information to a pickle - file that can be imported by subsequent experiments to reconnect - to the featurestore. - - :param featurestore: FeatureStore configuration to be saved - """ - - if not feature_store.is_active(): - raise Exception("Feature store is not running") - - # Extract only the fs_jobs associated with this particular feature store - if feature_store.batch: - job_names = [feature_store.name] - else: - job_names = [fsnode.name for fsnode in feature_store.entities] - fs_jobs = { - name: job for name, job in self._jobs.fs_jobs.items() if name in job_names - } - - # Extract the associated steps - steps = [ - self._launcher.step_mapping[fs_job.name] for fs_job in fs_jobs.values() - ] - - feature_store_data = {"fs": feature_store, "fs_jobs": fs_jobs, "steps": steps} - - with open(feature_store.checkpoint_file, "wb") as pickle_file: - pickle.dump(feature_store_data, pickle_file) - - # Extract only the fs_jobs associated with this particular featurestore - if feature_store.batch: - job_names = [feature_store.name] - else: - job_names = [fsnode.name for fsnode in feature_store.entities] - fs_jobs = { - name: job for name, job in self._jobs.fs_jobs.items() if name in job_names - } - - # Extract the associated steps - steps = [ - self._launcher.step_mapping[fs_job.name] for fs_job in fs_jobs.values() - ] - - feature_store_data = {"fs": feature_store, "fs_jobs": fs_jobs, "steps": steps} - - with open(feature_store.checkpoint_file, "wb") as pickle_file: - pickle.dump(feature_store_data, pickle_file) - - def _feature_store_launch_wait(self, featurestore: FeatureStore) -> None: - """Wait for the featurestore instances to run - - In the case where the featurestore is launched as a batch - through a WLM, we wait for the featurestore to exit the - queue before proceeding so new launched entities can - be launched with SSDB address - - :param featurestore: FeatureStore instance - :raises SmartSimError: if launch fails or manually stopped by user - """ - if featurestore.batch: - logger.info("FeatureStore launched as a batch") - logger.info("While queued, SmartSim will wait for FeatureStore to run") - logger.info("CTRL+C interrupt to abort and cancel launch") - - ready = False - while not ready: - try: - time.sleep(CONFIG.jm_interval) - # manually trigger job update if JM not running - if not self._jobs.actively_monitoring: - self._jobs.check_jobs() - - # _jobs.get_status acquires JM lock for main thread, no need for locking - statuses = self.get_entity_list_status(featurestore) - if all(stat == JobStatus.RUNNING for stat in statuses): - ready = True - # TODO: Add a node status check - elif any(stat in TERMINAL_STATUSES for stat in statuses): - self.stop_fs(featurestore) - msg = "FeatureStore failed during startup" - msg += f" See {featurestore.path} for details" - raise SmartSimError(msg) - else: - logger.debug("Waiting for featurestore instances to spin up...") - except KeyboardInterrupt: - logger.info("FeatureStore launch cancelled - requesting to stop") - self.stop_fs(featurestore) - - # re-raise keyboard interrupt so the job manager will display - # any running and un-killed jobs as this method is only called - # during launch and we handle all keyboard interrupts during - # launch explicitly - raise - - def reload_saved_fs( - self, checkpoint_file: t.Union[str, os.PathLike[str]] - ) -> FeatureStore: - with JM_LOCK: - - if not osp.exists(checkpoint_file): - raise FileNotFoundError( - f"The SmartSim feature store config file {os.fspath(checkpoint_file)} " - "cannot be found." - ) - - try: - with open(checkpoint_file, "rb") as pickle_file: - fs_config = pickle.load(pickle_file) - except (OSError, IOError) as e: - msg = "Feature store checkpoint corrupted" - raise SmartSimError(msg) from e - - err_message = ( - "The SmartSim feature store checkpoint is incomplete or corrupted. " - ) - if not "fs" in fs_config: - raise SmartSimError( - err_message + "Could not find the featurestore object." - ) - - if not "fs_jobs" in fs_config: - raise SmartSimError( - err_message + "Could not find feature store job objects." - ) - - if not "steps" in fs_config: - raise SmartSimError( - err_message + "Could not find feature store job objects." - ) - feature_store: FeatureStore = fs_config["fs"] - - # TODO check that each fs_object is running - - job_steps = zip(fs_config["fs_jobs"].values(), fs_config["steps"]) - try: - for fs_job, step in job_steps: - self._jobs.fs_jobs[fs_job.ename] = fs_job - self._launcher.add_step_to_mapping_table(fs_job.name, step) - if step.task_id: - self._launcher.task_manager.add_existing(int(step.task_id)) - except LauncherError as e: - raise SmartSimError("Failed to reconnect feature store") from e - - # start job manager if not already started - if not self._jobs.actively_monitoring: - self._jobs.start() - - return feature_store - - def _set_fsobjects(self, manifest: Manifest) -> None: - if not manifest.has_fs_objects: - return - - address_dict = self._jobs.get_fs_host_addresses() - for ( - fs_id, - fs_addresses, - ) in address_dict.items(): - fs_name, name = unpack_fs_identifier(fs_id, "_") - - hosts = list({address.split(":")[0] for address in fs_addresses}) - ports = list({int(address.split(":")[-1]) for address in fs_addresses}) - - if not fs_is_active(hosts=hosts, ports=ports, num_shards=len(fs_addresses)): - raise SSInternalError("Cannot set FS Objects, FS is not running") - - os.environ[f"SSDB{fs_name}"] = fs_addresses[0] - - os.environ[f"SR_fs_TYPE{fs_name}"] = ( - CLUSTERED if len(fs_addresses) > 1 else STANDALONE - ) - - options = ConfigOptions.create_from_environment(name) - client = Client(options, logger_name="SmartSim") - - for application in manifest.applications: - if not application.colocated: - for fs_model in application.fs_models: - set_ml_model(fs_model, client) - for fs_script in application.fs_scripts: - set_script(fs_script, client) - - for ensemble in manifest.ensembles: - for fs_model in ensemble.fs_models: - set_ml_model(fs_model, client) - for fs_script in ensemble.fs_scripts: - set_script(fs_script, client) - for entity in ensemble.applications: - if not entity.colocated: - # Set models which could belong only - # to the entities and not to the ensemble - # but avoid duplicates - for fs_model in entity.fs_models: - if fs_model not in ensemble.fs_models: - set_ml_model(fs_model, client) - for fs_script in entity.fs_scripts: - if fs_script not in ensemble.fs_scripts: - set_script(fs_script, client) - - def _start_telemetry_monitor(self, exp_dir: str) -> None: - """Spawns a telemetry monitor process to keep track of the life times - of the processes launched through this controller. - - :param exp_dir: An experiment directory - """ - if ( - self._telemetry_monitor is None - or self._telemetry_monitor.returncode is not None - ): - logger.debug("Starting telemetry monitor process") - cmd = [ - sys.executable, - "-m", - "smartsim._core.entrypoints.telemetrymonitor", - "-exp_dir", - exp_dir, - "-frequency", - str(CONFIG.telemetry_frequency), - "-cooldown", - str(CONFIG.telemetry_cooldown), - ] - # pylint: disable-next=consider-using-with - self._telemetry_monitor = subprocess.Popen( - cmd, - stderr=sys.stderr, - stdout=sys.stdout, - cwd=str(pathlib.Path(__file__).parent.parent.parent), - shell=False, - ) diff --git a/smartsim/_core/control/controller_utils.py b/smartsim/_core/control/controller_utils.py deleted file mode 100644 index 57694ce7c..000000000 --- a/smartsim/_core/control/controller_utils.py +++ /dev/null @@ -1,77 +0,0 @@ -# BSD 2-Clause License -# -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from __future__ import annotations - -import pathlib -import typing as t - -from ..._core.launcher.step import Step -from ...entity import Application, EntityList -from ...error import SmartSimError -from ..launcher.launcher import Launcher - -if t.TYPE_CHECKING: - from ..utils.serialize import TStepLaunchMetaData - - -class _AnonymousBatchJob(EntityList[Application]): - @staticmethod - def _validate(application: Application) -> None: - if application.batch_settings is None: - msg = "Unable to create _AnonymousBatchJob without batch_settings" - raise SmartSimError(msg) - - def __init__(self, application: Application) -> None: - self._validate(application) - super().__init__(application.name, application.path) - self.entities = [application] - self.batch_settings = application.batch_settings - - def _initialize_entities(self, **kwargs: t.Any) -> None: ... - - -def _look_up_launched_data( - launcher: Launcher, -) -> t.Callable[[t.Tuple[str, Step]], "TStepLaunchMetaData"]: - def _unpack_launched_data(data: t.Tuple[str, Step]) -> "TStepLaunchMetaData": - # NOTE: we cannot assume that the name of the launched step - # ``launched_step_name`` is equal to the name of the step referring to - # the entity ``step.name`` as is the case when an entity list is - # launched as a batch job - launched_step_name, step = data - launched_step_map = launcher.step_mapping[launched_step_name] - out_file, err_file = step.get_output_files() - return ( - launched_step_map.step_id, - launched_step_map.task_id, - launched_step_map.managed, - out_file, - err_file, - pathlib.Path(step.meta.get("status_dir", step.cwd)), - ) - - return _unpack_launched_data diff --git a/smartsim/_core/control/job.py b/smartsim/_core/control/job.py index 7e752cecd..fd79c0656 100644 --- a/smartsim/_core/control/job.py +++ b/smartsim/_core/control/job.py @@ -29,7 +29,9 @@ import typing as t from dataclasses import dataclass -from ...entity import EntitySequence, SmartSimEntity +from smartsim.entity._mock import Mock + +from ...entity import SmartSimEntity from ...status import JobStatus @@ -47,8 +49,7 @@ class _JobKey: class JobEntity: """An entity containing run-time SmartSimEntity metadata. The run-time metadata - is required to perform telemetry collection. The `JobEntity` satisfies the core - API necessary to use a `JobManager` to manage retrieval of managed step updates. + is required to perform telemetry collection. """ def __init__(self) -> None: @@ -190,27 +191,23 @@ def from_manifest( class Job: - """Keep track of various information for the controller. - In doing so, continuously add various fields of information - that is queryable by the user through interface methods in - the controller class. + """Keep track of various information. + In doing so, continuously add various fields of information. """ def __init__( self, job_name: str, job_id: t.Optional[str], - entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity], JobEntity], + entity: t.Union[SmartSimEntity, JobEntity], launcher: str, - is_task: bool, ) -> None: """Initialize a Job. :param job_name: Name of the job step :param job_id: The id associated with the job - :param entity: The SmartSim entity(list) associated with the job + :param entity: The SmartSim entity associated with the job :param launcher: Launcher job was started with - :param is_task: process monitored by TaskManager (True) or the WLM (True) """ self.name = job_name self.jid = job_id @@ -224,7 +221,6 @@ def __init__( self.error: t.Optional[str] = None # same as output self.hosts: t.List[str] = [] # currently only used for FS jobs self.launched_with = launcher - self.is_task = is_task self.start_time = time.time() self.history = History() @@ -263,14 +259,11 @@ def record_history(self) -> None: """Record the launching history of a job.""" self.history.record(self.jid, self.status, self.returncode, self.elapsed) - def reset( - self, new_job_name: str, new_job_id: t.Optional[str], is_task: bool - ) -> None: + def reset(self, new_job_name: str, new_job_id: t.Optional[str]) -> None: """Reset the job in order to be able to restart it. :param new_job_name: name of the new job step :param new_job_id: new job id to launch under - :param is_task: process monitored by TaskManager (True) or the WLM (True) """ self.name = new_job_name self.jid = new_job_id @@ -279,7 +272,6 @@ def reset( self.output = None self.error = None self.hosts = [] - self.is_task = is_task self.start_time = time.time() self.history.new_run() diff --git a/smartsim/_core/control/jobmanager.py b/smartsim/_core/control/jobmanager.py deleted file mode 100644 index 69bf015d1..000000000 --- a/smartsim/_core/control/jobmanager.py +++ /dev/null @@ -1,364 +0,0 @@ -# BSD 2-Clause License -# -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -import itertools -import time -import typing as t -from collections import ChainMap -from threading import RLock, Thread -from types import FrameType - -from ..._core.launcher.step import Step -from ...database import FeatureStore -from ...entity import EntitySequence, FSNode, SmartSimEntity -from ...log import ContextThread, get_logger -from ...status import TERMINAL_STATUSES, InvalidJobStatus, JobStatus -from ..config import CONFIG -from ..launcher import Launcher, LocalLauncher -from ..utils.network import get_ip_from_host -from .job import Job, JobEntity - -logger = get_logger(__name__) - - -class JobManager: - """The JobManager maintains a mapping between user defined entities - and the steps launched through the launcher. The JobManager - holds jobs according to entity type. - - The JobManager is threaded and runs during the course of an experiment - to update the statuses of Jobs. - - The JobManager and Controller share a single instance of a launcher - object that allows both the Controller and launcher access to the - wlm to query information about jobs that the user requests. - """ - - def __init__(self, lock: RLock, launcher: t.Optional[Launcher] = None) -> None: - """Initialize a Jobmanager - - :param launcher: a Launcher object to manage jobs - """ - self.monitor: t.Optional[Thread] = None - - # active jobs - self.jobs: t.Dict[str, Job] = {} - self.fs_jobs: t.Dict[str, Job] = {} - - # completed jobs - self.completed: t.Dict[str, Job] = {} - - self.actively_monitoring = False # on/off flag - self._launcher = launcher # reference to launcher - self._lock = lock # thread lock - - self.kill_on_interrupt = True # flag for killing jobs on SIGINT - - def start(self) -> None: - """Start a thread for the job manager""" - self.monitor = ContextThread(name="JobManager", daemon=True, target=self.run) - self.monitor.start() - - def run(self) -> None: - """Start the JobManager thread to continually check - the status of all jobs. Whichever launcher is selected - by the user will be responsible for returning statuses - that progress the state of the job. - - The interval of the checks is controlled by - smartsim.constats.TM_INTERVAL and should be set to values - above 20 for congested, multi-user systems - - The job manager thread will exit when no jobs are left - or when the main thread dies - """ - logger.debug("Starting Job Manager") - self.actively_monitoring = True - while self.actively_monitoring: - self._thread_sleep() - self.check_jobs() # update all job statuses at once - for _, job in self().items(): - # if the job has errors then output the report - # this should only output once - if job.returncode is not None and job.status in TERMINAL_STATUSES: - if int(job.returncode) != 0: - logger.warning(job) - logger.warning(job.error_report()) - self.move_to_completed(job) - else: - # job completed without error - logger.info(job) - self.move_to_completed(job) - - # if no more jobs left to actively monitor - if not self(): - self.actively_monitoring = False - logger.debug("Sleeping, no jobs to monitor") - - def move_to_completed(self, job: Job) -> None: - """Move job to completed queue so that its no longer - actively monitored by the job manager - - :param job: job instance we are transitioning - """ - with self._lock: - self.completed[job.ename] = job - job.record_history() - - # remove from actively monitored jobs - if job.ename in self.fs_jobs: - del self.fs_jobs[job.ename] - elif job.ename in self.jobs: - del self.jobs[job.ename] - - def __getitem__(self, entity_name: str) -> Job: - """Return the job associated with the name of the entity - from which it was created. - - :param entity_name: The name of the entity of a job - :returns: the Job associated with the entity_name - """ - with self._lock: - entities = ChainMap(self.fs_jobs, self.jobs, self.completed) - return entities[entity_name] - - def __call__(self) -> t.Dict[str, Job]: - """Returns dictionary all jobs for () operator - - :returns: Dictionary of all jobs - """ - all_jobs = {**self.jobs, **self.fs_jobs} - return all_jobs - - def __contains__(self, key: str) -> bool: - try: - self[key] # pylint: disable=pointless-statement - return True - except KeyError: - return False - - def add_job( - self, - step: Step, - job_id: t.Optional[str], - is_task: bool = True, - ) -> None: - """Add a job to the job manager which holds specific jobs by type. - - :param job_name: name of the job step - :param job_id: job step id created by launcher - :param entity: entity that was launched on job step - :param is_task: process monitored by TaskManager (True) or the WLM (True) - """ - launcher = str(self._launcher) - # all operations here should be atomic - job = Job(step.name, job_id, step.entity, launcher, is_task) - if isinstance(step.entity, (FSNode, FeatureStore)): - self.fs_jobs[step.entity.name] = job - elif isinstance(step.entity, JobEntity) and step.entity.is_fs: - self.fs_jobs[step.entity.name] = job - else: - self.jobs[step.entity.name] = job - - def is_finished(self, entity: SmartSimEntity) -> bool: - """Detect if a job has completed - - :param entity: entity to check - :return: True if finished - """ - with self._lock: - job = self[entity.name] # locked operation - if entity.name in self.completed: - if job.status in TERMINAL_STATUSES: - return True - return False - - def check_jobs(self) -> None: - """Update all jobs in jobmanager - - Update all jobs returncode, status, error and output - through one call to the launcher. - - """ - with self._lock: - jobs = self().values() - job_name_map = {job.name: job.ename for job in jobs} - - # returns (job step name, StepInfo) tuples - if self._launcher: - step_names = list(job_name_map.keys()) - statuses = self._launcher.get_step_update(step_names) - for job_name, status in statuses: - job = self[job_name_map[job_name]] - - if status: - # uses abstract step interface - job.set_status( - status.status, - status.launcher_status, - status.returncode, - error=status.error, - output=status.output, - ) - - def get_status( - self, - entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]], - ) -> t.Union[JobStatus, InvalidJobStatus]: - """Return the status of a job. - - :param entity: SmartSimEntity or EntitySequence instance - :returns: a SmartSimStatus status - """ - with self._lock: - if entity.name in self.completed: - return self.completed[entity.name].status - - if entity.name in self: - job: Job = self[entity.name] # locked - return job.status - - return InvalidJobStatus.NEVER_STARTED - - def set_launcher(self, launcher: Launcher) -> None: - """Set the launcher of the job manager to a specific launcher instance - - :param launcher: child of Launcher - """ - self._launcher = launcher - - def query_restart(self, entity_name: str) -> bool: - """See if the job just started should be restarted or not. - - :param entity_name: name of entity to check for a job for - :return: if job should be restarted instead of started - """ - if entity_name in self.completed: - return True - return False - - def restart_job( - self, - job_name: str, - job_id: t.Optional[str], - entity_name: str, - is_task: bool = True, - ) -> None: - """Function to reset a job to record history and be - ready to launch again. - - :param job_name: new job step name - :param job_id: new job id - :param entity_name: name of the entity of the job - :param is_task: process monitored by TaskManager (True) or the WLM (True) - - """ - with self._lock: - job = self.completed[entity_name] - del self.completed[entity_name] - job.reset(job_name, job_id, is_task) - - if isinstance(job.entity, (FSNode, FeatureStore)): - self.fs_jobs[entity_name] = job - else: - self.jobs[entity_name] = job - - def get_fs_host_addresses(self) -> t.Dict[str, t.List[str]]: - """Retrieve the list of hosts for the feature store - for corresponding feature store identifiers - - :return: dictionary of host ip addresses - """ - - address_dict: t.Dict[str, t.List[str]] = {} - for fs_job in self.fs_jobs.values(): - addresses = [] - if isinstance(fs_job.entity, (FSNode, FeatureStore)): - fs_entity = fs_job.entity - for combine in itertools.product(fs_job.hosts, fs_entity.ports): - ip_addr = get_ip_from_host(combine[0]) - addresses.append(":".join((ip_addr, str(combine[1])))) - - dict_entry: t.List[str] = address_dict.get(fs_entity.fs_identifier, []) - dict_entry.extend(addresses) - address_dict[fs_entity.fs_identifier] = dict_entry - - return address_dict - - def set_fs_hosts(self, FeatureStore: FeatureStore) -> None: - """Set the fs hosts in fs_jobs so future entities can query this - - :param FeatureStore: FeatureStore instance - """ - # should only be called during launch in the controller - - with self._lock: - if FeatureStore.batch: - self.fs_jobs[FeatureStore.name].hosts = FeatureStore.hosts - - else: - for fsnode in FeatureStore.entities: - if not fsnode.is_mpmd: - self.fs_jobs[fsnode.name].hosts = [fsnode.host] - else: - self.fs_jobs[fsnode.name].hosts = fsnode.hosts - - def signal_interrupt(self, signo: int, _frame: t.Optional[FrameType]) -> None: - """Custom handler for whenever SIGINT is received""" - if not signo: - logger.warning("Received SIGINT with no signal number") - if self.actively_monitoring and len(self) > 0: - if self.kill_on_interrupt: - for _, job in self().items(): - if job.status not in TERMINAL_STATUSES and self._launcher: - self._launcher.stop(job.name) - else: - logger.warning("SmartSim process interrupted before resource cleanup") - logger.warning("You may need to manually stop the following:") - - for job_name, job in self().items(): - if job.is_task: - # this will be the process id - logger.warning(f"Task {job_name} with id: {job.jid}") - else: - logger.warning( - f"Job {job_name} with {job.launched_with} id: {job.jid}" - ) - - def _thread_sleep(self) -> None: - """Sleep the job manager for a specific constant - set for the launcher type. - """ - local_jm_interval = 2 - if isinstance(self._launcher, (LocalLauncher)): - time.sleep(local_jm_interval) - else: - time.sleep(CONFIG.jm_interval) - - def __len__(self) -> int: - # number of active jobs - return len(self.fs_jobs) + len(self.jobs) diff --git a/smartsim/_core/control/manifest.py b/smartsim/_core/control/manifest.py index 36b030504..89b80c217 100644 --- a/smartsim/_core/control/manifest.py +++ b/smartsim/_core/control/manifest.py @@ -29,8 +29,11 @@ import typing as t from dataclasses import dataclass, field +from smartsim.entity._mock import Mock + +from ...builders import Ensemble from ...database import FeatureStore -from ...entity import Application, Ensemble, EntitySequence, FSNode, SmartSimEntity +from ...entity import Application, FSNode, SmartSimEntity from ...error import SmartSimError from ..config import CONFIG from ..utils import helpers as _helpers @@ -47,20 +50,17 @@ class Manifest: """This class is used to keep track of all deployables generated by an experiment. Different types of deployables (i.e. different - `SmartSimEntity`-derived objects or `EntitySequence`-derived objects) can + `SmartSimEntity`-derived objects) can be accessed by using the corresponding accessor. Instances of ``Application``, ``Ensemble`` and ``FeatureStore`` can all be passed as arguments """ - def __init__( - self, *args: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]] - ) -> None: + def __init__(self, *args: t.Union[SmartSimEntity]) -> None: self._deployables = list(args) self._check_types(self._deployables) self._check_names(self._deployables) - self._check_entity_lists_nonempty() @property def fss(self) -> t.List[FeatureStore]: @@ -91,20 +91,6 @@ def ensembles(self) -> t.List[Ensemble]: """ return [e for e in self._deployables if isinstance(e, Ensemble)] - @property - def all_entity_lists(self) -> t.List[EntitySequence[SmartSimEntity]]: - """All entity lists, including ensembles and - exceptional ones like FeatureStore - - :return: list of entity lists - """ - _all_entity_lists: t.List[EntitySequence[SmartSimEntity]] = list(self.ensembles) - - for fs in self.fss: - _all_entity_lists.append(fs) - - return _all_entity_lists - @property def has_deployable(self) -> bool: """ @@ -127,24 +113,16 @@ def _check_names(deployables: t.List[t.Any]) -> None: @staticmethod def _check_types(deployables: t.List[t.Any]) -> None: for deployable in deployables: - if not isinstance(deployable, (SmartSimEntity, EntitySequence)): + if not isinstance(deployable, SmartSimEntity): raise TypeError( - f"Entity has type {type(deployable)}, not " - + "SmartSimEntity or EntitySequence" + f"Entity has type {type(deployable)}, not " + "SmartSimEntity" ) - def _check_entity_lists_nonempty(self) -> None: - """Check deployables for sanity before launching""" - - for entity_list in self.all_entity_lists: - if len(entity_list) < 1: - raise ValueError(f"{entity_list.name} is empty. Nothing to launch.") - def __str__(self) -> str: output = "" e_header = "=== Ensembles ===\n" - m_header = "=== Applications ===\n" - db_header = "=== Feature Stores ===\n" + a_header = "=== Applications ===\n" + fs_header = "=== Feature Stores ===\n" if self.ensembles: output += e_header @@ -158,7 +136,7 @@ def __str__(self) -> str: output += "\n" if self.applications: - output += m_header + output += a_header for application in self.applications: output += f"{application.name}\n" if application.batch_settings: @@ -215,8 +193,7 @@ class LaunchedManifest(t.Generic[_T]): """Immutable manifest mapping launched entities or collections of launched entities to other pieces of external data. This is commonly used to map a launch-able entity to its constructed ``Step`` instance without assuming - that ``step.name == job.name`` or querying the ``JobManager`` which itself - can be ephemeral. + that ``step.name == job.name``. """ metadata: _LaunchedManifestMetadata diff --git a/smartsim/_core/control/previewrenderer.py b/smartsim/_core/control/preview_renderer.py similarity index 98% rename from smartsim/_core/control/previewrenderer.py rename to smartsim/_core/control/preview_renderer.py index d3e4f6be2..17d9ceac1 100644 --- a/smartsim/_core/control/previewrenderer.py +++ b/smartsim/_core/control/preview_renderer.py @@ -33,10 +33,10 @@ import jinja2.utils as u from jinja2 import pass_eval_context -from ..._core.config import CONFIG -from ..._core.control import Manifest from ...error.errors import PreviewFormatError from ...log import get_logger +from ..config import CONFIG +from . import Manifest from .job import Job logger = get_logger(__name__) diff --git a/smartsim/_core/entrypoints/dragon.py b/smartsim/_core/entrypoints/dragon.py index 2bfde74f2..b0b941d10 100644 --- a/smartsim/_core/entrypoints/dragon.py +++ b/smartsim/_core/entrypoints/dragon.py @@ -40,8 +40,8 @@ import zmq.auth.thread from smartsim._core.config import get_config -from smartsim._core.launcher.dragon import dragonSockets -from smartsim._core.launcher.dragon.dragonBackend import DragonBackend +from smartsim._core.launcher.dragon import dragon_sockets +from smartsim._core.launcher.dragon.dragon_backend import DragonBackend from smartsim._core.schemas import ( DragonBootstrapRequest, DragonBootstrapResponse, @@ -164,12 +164,12 @@ def run( dragon_pid: int, ) -> None: logger.debug(f"Opening socket {dragon_head_address}") - dragon_head_socket = dragonSockets.get_secure_socket(zmq_context, zmq.REP, True) + dragon_head_socket = dragon_sockets.get_secure_socket(zmq_context, zmq.REP, True) dragon_head_socket.bind(dragon_head_address) dragon_backend = DragonBackend(pid=dragon_pid) backend_updater = start_updater(dragon_backend, None) - server = dragonSockets.as_server(dragon_head_socket) + server = dragon_sockets.as_server(dragon_head_socket) logger.debug(f"Listening to {dragon_head_address}") @@ -236,14 +236,14 @@ def execute_entrypoint(args: DragonEntrypointArgs) -> int: else: dragon_head_address += ":5555" - zmq_authenticator = dragonSockets.get_authenticator(zmq_context, timeout=-1) + zmq_authenticator = dragon_sockets.get_authenticator(zmq_context, timeout=-1) logger.debug("Getting launcher socket") - launcher_socket = dragonSockets.get_secure_socket(zmq_context, zmq.REQ, False) + launcher_socket = dragon_sockets.get_secure_socket(zmq_context, zmq.REQ, False) logger.debug(f"Connecting launcher socket to: {args.launching_address}") launcher_socket.connect(args.launching_address) - client = dragonSockets.as_client(launcher_socket) + client = dragon_sockets.as_client(launcher_socket) logger.debug( f"Sending bootstrap request to launcher_socket with {dragon_head_address}" diff --git a/smartsim/_core/entrypoints/dragon_client.py b/smartsim/_core/entrypoints/dragon_client.py index e998ddce1..013112412 100644 --- a/smartsim/_core/entrypoints/dragon_client.py +++ b/smartsim/_core/entrypoints/dragon_client.py @@ -37,7 +37,7 @@ import zmq -from smartsim._core.launcher.dragon.dragonConnector import DragonConnector +from smartsim._core.launcher.dragon.dragon_connector import DragonConnector from smartsim._core.schemas import ( DragonHandshakeRequest, DragonRequest, diff --git a/smartsim/_core/entrypoints/telemetrymonitor.py b/smartsim/_core/entrypoints/telemetry_monitor.py similarity index 100% rename from smartsim/_core/entrypoints/telemetrymonitor.py rename to smartsim/_core/entrypoints/telemetry_monitor.py diff --git a/smartsim/_core/launcher/__init__.py b/smartsim/_core/launcher/__init__.py index c6584ee3d..3047aaed4 100644 --- a/smartsim/_core/launcher/__init__.py +++ b/smartsim/_core/launcher/__init__.py @@ -24,13 +24,13 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from .dragon.dragonLauncher import DragonLauncher +from .dragon.dragon_launcher import DragonLauncher from .launcher import Launcher from .local.local import LocalLauncher -from .lsf.lsfLauncher import LSFLauncher -from .pbs.pbsLauncher import PBSLauncher -from .sge.sgeLauncher import SGELauncher -from .slurm.slurmLauncher import SlurmLauncher +from .lsf.lsf_launcher import LSFLauncher +from .pbs.pbs_launcher import PBSLauncher +from .sge.sge_launcher import SGELauncher +from .slurm.slurm_launcher import SlurmLauncher __all__ = [ "Launcher", diff --git a/smartsim/_core/launcher/dragon/dragonBackend.py b/smartsim/_core/launcher/dragon/dragon_backend.py similarity index 99% rename from smartsim/_core/launcher/dragon/dragonBackend.py rename to smartsim/_core/launcher/dragon/dragon_backend.py index ec38e53eb..7d77aaaac 100644 --- a/smartsim/_core/launcher/dragon/dragonBackend.py +++ b/smartsim/_core/launcher/dragon/dragon_backend.py @@ -45,8 +45,10 @@ # pylint: enable=import-error # isort: on -from ...._core.config import get_config -from ...._core.schemas import ( +from ....log import get_logger +from ....status import TERMINAL_STATUSES, JobStatus +from ...config import get_config +from ...schemas import ( DragonHandshakeRequest, DragonHandshakeResponse, DragonRequest, @@ -60,9 +62,7 @@ DragonUpdateStatusRequest, DragonUpdateStatusResponse, ) -from ...._core.utils.helpers import create_short_id_str -from ....log import get_logger -from ....status import TERMINAL_STATUSES, JobStatus +from ...utils.helpers import create_short_id_str logger = get_logger(__name__) diff --git a/smartsim/_core/launcher/dragon/dragonConnector.py b/smartsim/_core/launcher/dragon/dragon_connector.py similarity index 98% rename from smartsim/_core/launcher/dragon/dragonConnector.py rename to smartsim/_core/launcher/dragon/dragon_connector.py index 60fbf3ce7..7ff4cdc1c 100644 --- a/smartsim/_core/launcher/dragon/dragonConnector.py +++ b/smartsim/_core/launcher/dragon/dragon_connector.py @@ -42,7 +42,6 @@ import zmq import zmq.auth.thread -from ...._core.launcher.dragon import dragonSockets from ....error.errors import SmartSimError from ....log import get_logger from ...config import get_config @@ -56,6 +55,7 @@ DragonShutdownRequest, ) from ...utils.network import find_free_port, get_best_interface_and_address +from . import dragon_sockets if t.TYPE_CHECKING: from typing_extensions import Self @@ -113,7 +113,7 @@ def can_monitor(self) -> bool: return self._dragon_head_pid is not None def _handshake(self, address: str) -> None: - self._dragon_head_socket = dragonSockets.get_secure_socket( + self._dragon_head_socket = dragon_sockets.get_secure_socket( self._context, zmq.REQ, False ) self._dragon_head_socket.connect(address) @@ -176,7 +176,7 @@ def _get_new_authenticator( except zmq.Again: logger.debug("Could not stop authenticator") try: - self._authenticator = dragonSockets.get_authenticator( + self._authenticator = dragon_sockets.get_authenticator( self._context, timeout ) return @@ -224,7 +224,9 @@ def _start_connector_socket(self, socket_addr: str) -> zmq.Socket[t.Any]: connector_socket: t.Optional[zmq.Socket[t.Any]] = None self._reset_timeout(config.dragon_server_startup_timeout) self._get_new_authenticator(-1) - connector_socket = dragonSockets.get_secure_socket(self._context, zmq.REP, True) + connector_socket = dragon_sockets.get_secure_socket( + self._context, zmq.REP, True + ) logger.debug(f"Binding connector to {socket_addr}") connector_socket.bind(socket_addr) if connector_socket is None: @@ -353,7 +355,7 @@ def connect_to_dragon(self) -> None: start_new_session=True, ) - server = dragonSockets.as_server(connector_socket) + server = dragon_sockets.as_server(connector_socket) logger.debug(f"Listening to {socket_addr}") request = _assert_schema_type(server.recv(), DragonBootstrapRequest) server.send( @@ -460,7 +462,7 @@ def _send_req_with_socket( send_flags: int = 0, recv_flags: int = 0, ) -> DragonResponse: - client = dragonSockets.as_client(socket) + client = dragon_sockets.as_client(socket) with DRG_LOCK: logger.debug(f"Sending {type(request).__name__}: {request}") client.send(request, send_flags) diff --git a/smartsim/_core/launcher/dragon/dragonLauncher.py b/smartsim/_core/launcher/dragon/dragon_launcher.py similarity index 98% rename from smartsim/_core/launcher/dragon/dragonLauncher.py rename to smartsim/_core/launcher/dragon/dragon_launcher.py index 398596049..d540ce081 100644 --- a/smartsim/_core/launcher/dragon/dragonLauncher.py +++ b/smartsim/_core/launcher/dragon/dragon_launcher.py @@ -30,11 +30,10 @@ import pathlib import typing as t -from smartsim._core.schemas.dragonRequests import DragonRunPolicy +from smartsim._core.schemas.dragon_requests import DragonRunPolicy from smartsim.error import errors from smartsim.types import LaunchedJobID -from ...._core.launcher.stepMapping import StepMap from ....error import LauncherError, SmartSimError from ....log import get_logger from ....settings import ( @@ -55,11 +54,12 @@ DragonUpdateStatusResponse, ) from ..launcher import WLMLauncher -from ..pbs.pbsLauncher import PBSLauncher -from ..slurm.slurmLauncher import SlurmLauncher +from ..pbs.pbs_launcher import PBSLauncher +from ..slurm.slurm_launcher import SlurmLauncher from ..step import DragonBatchStep, DragonStep, LocalStep, Step -from ..stepInfo import StepInfo -from .dragonConnector import DragonConnector, _SchemaT +from ..step_info import StepInfo +from ..step_mapping import StepMap +from .dragon_connector import DragonConnector, _SchemaT if t.TYPE_CHECKING: from typing_extensions import Self diff --git a/smartsim/_core/launcher/dragon/dragonSockets.py b/smartsim/_core/launcher/dragon/dragon_sockets.py similarity index 97% rename from smartsim/_core/launcher/dragon/dragonSockets.py rename to smartsim/_core/launcher/dragon/dragon_sockets.py index 80acd61a2..0ffe857e6 100644 --- a/smartsim/_core/launcher/dragon/dragonSockets.py +++ b/smartsim/_core/launcher/dragon/dragon_sockets.py @@ -30,8 +30,8 @@ import zmq.auth.thread from smartsim._core.config.config import get_config -from smartsim._core.schemas import dragonRequests as _dragonRequests -from smartsim._core.schemas import dragonResponses as _dragonResponses +from smartsim._core.schemas import dragon_requests as _dragonRequests +from smartsim._core.schemas import dragon_responses as _dragonResponses from smartsim._core.schemas import utils as _utils from smartsim._core.utils.security import KeyManager from smartsim.log import get_logger diff --git a/smartsim/_core/launcher/launcher.py b/smartsim/_core/launcher/launcher.py index bbc9b59d6..5b2894cf3 100644 --- a/smartsim/_core/launcher/launcher.py +++ b/smartsim/_core/launcher/launcher.py @@ -27,14 +27,13 @@ import abc import typing as t -from ..._core.launcher.stepMapping import StepMap from ...entity import SmartSimEntity from ...error import AllocationError, LauncherError, SSUnsupportedError from ...settings import SettingsBase from .step import Step -from .stepInfo import StepInfo, UnmanagedStepInfo -from .stepMapping import StepMapping -from .taskManager import TaskManager +from .step_info import StepInfo, UnmanagedStepInfo +from .step_mapping import StepMap, StepMapping +from .task_manager import TaskManager class Launcher(abc.ABC): # pragma: no cover diff --git a/smartsim/_core/launcher/local/local.py b/smartsim/_core/launcher/local/local.py index 2b3f47997..9a902f003 100644 --- a/smartsim/_core/launcher/local/local.py +++ b/smartsim/_core/launcher/local/local.py @@ -30,9 +30,9 @@ from ....settings import RunSettings, SettingsBase from ..launcher import Launcher from ..step import LocalStep, Step -from ..stepInfo import StepInfo, UnmanagedStepInfo -from ..stepMapping import StepMapping -from ..taskManager import TaskManager +from ..step_info import StepInfo, UnmanagedStepInfo +from ..step_mapping import StepMapping +from ..task_manager import TaskManager class LocalLauncher(Launcher): diff --git a/smartsim/_core/launcher/lsf/lsfCommands.py b/smartsim/_core/launcher/lsf/lsf_commands.py similarity index 100% rename from smartsim/_core/launcher/lsf/lsfCommands.py rename to smartsim/_core/launcher/lsf/lsf_commands.py diff --git a/smartsim/_core/launcher/lsf/lsfLauncher.py b/smartsim/_core/launcher/lsf/lsf_launcher.py similarity index 98% rename from smartsim/_core/launcher/lsf/lsfLauncher.py rename to smartsim/_core/launcher/lsf/lsf_launcher.py index 387607f30..472d66b89 100644 --- a/smartsim/_core/launcher/lsf/lsfLauncher.py +++ b/smartsim/_core/launcher/lsf/lsf_launcher.py @@ -50,9 +50,9 @@ OrterunStep, Step, ) -from ..stepInfo import LSFBatchStepInfo, LSFJsrunStepInfo, StepInfo -from .lsfCommands import bjobs, bkill, jskill, jslist -from .lsfParser import ( +from ..step_info import LSFBatchStepInfo, LSFJsrunStepInfo, StepInfo +from .lsf_commands import bjobs, bkill, jskill, jslist +from .lsf_parser import ( parse_bjobs_jobid, parse_bsub, parse_jslist_stepid, diff --git a/smartsim/_core/launcher/lsf/lsfParser.py b/smartsim/_core/launcher/lsf/lsf_parser.py similarity index 100% rename from smartsim/_core/launcher/lsf/lsfParser.py rename to smartsim/_core/launcher/lsf/lsf_parser.py diff --git a/smartsim/_core/launcher/pbs/pbsCommands.py b/smartsim/_core/launcher/pbs/pbs_commands.py similarity index 100% rename from smartsim/_core/launcher/pbs/pbsCommands.py rename to smartsim/_core/launcher/pbs/pbs_commands.py diff --git a/smartsim/_core/launcher/pbs/pbsLauncher.py b/smartsim/_core/launcher/pbs/pbs_launcher.py similarity index 98% rename from smartsim/_core/launcher/pbs/pbsLauncher.py rename to smartsim/_core/launcher/pbs/pbs_launcher.py index 2419144cf..fe8a9538b 100644 --- a/smartsim/_core/launcher/pbs/pbsLauncher.py +++ b/smartsim/_core/launcher/pbs/pbs_launcher.py @@ -51,9 +51,9 @@ QsubBatchStep, Step, ) -from ..stepInfo import PBSStepInfo, StepInfo -from .pbsCommands import qdel, qstat -from .pbsParser import ( +from ..step_info import PBSStepInfo, StepInfo +from .pbs_commands import qdel, qstat +from .pbs_parser import ( parse_qstat_jobid, parse_qstat_jobid_json, parse_step_id_from_qstat, diff --git a/smartsim/_core/launcher/pbs/pbsParser.py b/smartsim/_core/launcher/pbs/pbs_parser.py similarity index 100% rename from smartsim/_core/launcher/pbs/pbsParser.py rename to smartsim/_core/launcher/pbs/pbs_parser.py diff --git a/smartsim/_core/launcher/sge/sgeCommands.py b/smartsim/_core/launcher/sge/sge_commands.py similarity index 100% rename from smartsim/_core/launcher/sge/sgeCommands.py rename to smartsim/_core/launcher/sge/sge_commands.py diff --git a/smartsim/_core/launcher/sge/sgeLauncher.py b/smartsim/_core/launcher/sge/sge_launcher.py similarity index 97% rename from smartsim/_core/launcher/sge/sgeLauncher.py rename to smartsim/_core/launcher/sge/sge_launcher.py index fc848b9a2..82c1f8fe9 100644 --- a/smartsim/_core/launcher/sge/sgeLauncher.py +++ b/smartsim/_core/launcher/sge/sge_launcher.py @@ -48,9 +48,9 @@ SgeQsubBatchStep, Step, ) -from ..stepInfo import SGEStepInfo, StepInfo -from .sgeCommands import qacct, qdel, qstat -from .sgeParser import parse_qacct_job_output, parse_qstat_jobid_xml +from ..step_info import SGEStepInfo, StepInfo +from .sge_commands import qacct, qdel, qstat +from .sge_parser import parse_qacct_job_output, parse_qstat_jobid_xml logger = get_logger(__name__) diff --git a/smartsim/_core/launcher/sge/sgeParser.py b/smartsim/_core/launcher/sge/sge_parser.py similarity index 100% rename from smartsim/_core/launcher/sge/sgeParser.py rename to smartsim/_core/launcher/sge/sge_parser.py diff --git a/smartsim/_core/launcher/slurm/slurmCommands.py b/smartsim/_core/launcher/slurm/slurm_commands.py similarity index 100% rename from smartsim/_core/launcher/slurm/slurmCommands.py rename to smartsim/_core/launcher/slurm/slurm_commands.py diff --git a/smartsim/_core/launcher/slurm/slurmLauncher.py b/smartsim/_core/launcher/slurm/slurm_launcher.py similarity index 98% rename from smartsim/_core/launcher/slurm/slurmLauncher.py rename to smartsim/_core/launcher/slurm/slurm_launcher.py index 660ba3484..038176d93 100644 --- a/smartsim/_core/launcher/slurm/slurmLauncher.py +++ b/smartsim/_core/launcher/slurm/slurm_launcher.py @@ -52,9 +52,9 @@ SrunStep, Step, ) -from ..stepInfo import SlurmStepInfo, StepInfo -from .slurmCommands import sacct, scancel, sstat -from .slurmParser import parse_sacct, parse_sstat_nodes, parse_step_id_from_sacct +from ..step_info import SlurmStepInfo, StepInfo +from .slurm_commands import sacct, scancel, sstat +from .slurm_parser import parse_sacct, parse_sstat_nodes, parse_step_id_from_sacct logger = get_logger(__name__) diff --git a/smartsim/_core/launcher/slurm/slurmParser.py b/smartsim/_core/launcher/slurm/slurm_parser.py similarity index 100% rename from smartsim/_core/launcher/slurm/slurmParser.py rename to smartsim/_core/launcher/slurm/slurm_parser.py diff --git a/smartsim/_core/launcher/step/__init__.py b/smartsim/_core/launcher/step/__init__.py index 8331a18bf..b11e54a50 100644 --- a/smartsim/_core/launcher/step/__init__.py +++ b/smartsim/_core/launcher/step/__init__.py @@ -24,12 +24,12 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from .alpsStep import AprunStep -from .dragonStep import DragonBatchStep, DragonStep -from .localStep import LocalStep -from .lsfStep import BsubBatchStep, JsrunStep -from .mpiStep import MpiexecStep, MpirunStep, OrterunStep -from .pbsStep import QsubBatchStep -from .sgeStep import SgeQsubBatchStep -from .slurmStep import SbatchStep, SrunStep +from .alps_step import AprunStep +from .dragon_step import DragonBatchStep, DragonStep +from .local_step import LocalStep +from .lsf_step import BsubBatchStep, JsrunStep +from .mpi_step import MpiexecStep, MpirunStep, OrterunStep +from .pbs_step import QsubBatchStep +from .sge_step import SgeQsubBatchStep +from .slurm_step import SbatchStep, SrunStep from .step import Step diff --git a/smartsim/_core/launcher/step/alpsStep.py b/smartsim/_core/launcher/step/alps_step.py similarity index 100% rename from smartsim/_core/launcher/step/alpsStep.py rename to smartsim/_core/launcher/step/alps_step.py diff --git a/smartsim/_core/launcher/step/dragonStep.py b/smartsim/_core/launcher/step/dragon_step.py similarity index 99% rename from smartsim/_core/launcher/step/dragonStep.py rename to smartsim/_core/launcher/step/dragon_step.py index 2f4a80cd5..26864e89f 100644 --- a/smartsim/_core/launcher/step/dragonStep.py +++ b/smartsim/_core/launcher/step/dragon_step.py @@ -30,11 +30,6 @@ import sys import typing as t -from ...._core.schemas.dragonRequests import ( - DragonRunPolicy, - DragonRunRequest, - request_registry, -) from ....error.errors import SSUnsupportedError from ....log import get_logger from ....settings import ( @@ -43,6 +38,11 @@ SbatchSettings, Singularity, ) +from ...schemas.dragon_requests import ( + DragonRunPolicy, + DragonRunRequest, + request_registry, +) from .step import Step logger = get_logger(__name__) diff --git a/smartsim/_core/launcher/step/localStep.py b/smartsim/_core/launcher/step/local_step.py similarity index 100% rename from smartsim/_core/launcher/step/localStep.py rename to smartsim/_core/launcher/step/local_step.py diff --git a/smartsim/_core/launcher/step/lsfStep.py b/smartsim/_core/launcher/step/lsf_step.py similarity index 100% rename from smartsim/_core/launcher/step/lsfStep.py rename to smartsim/_core/launcher/step/lsf_step.py diff --git a/smartsim/_core/launcher/step/mpiStep.py b/smartsim/_core/launcher/step/mpi_step.py similarity index 100% rename from smartsim/_core/launcher/step/mpiStep.py rename to smartsim/_core/launcher/step/mpi_step.py diff --git a/smartsim/_core/launcher/step/pbsStep.py b/smartsim/_core/launcher/step/pbs_step.py similarity index 100% rename from smartsim/_core/launcher/step/pbsStep.py rename to smartsim/_core/launcher/step/pbs_step.py diff --git a/smartsim/_core/launcher/step/sgeStep.py b/smartsim/_core/launcher/step/sge_step.py similarity index 100% rename from smartsim/_core/launcher/step/sgeStep.py rename to smartsim/_core/launcher/step/sge_step.py diff --git a/smartsim/_core/launcher/step/slurmStep.py b/smartsim/_core/launcher/step/slurm_step.py similarity index 99% rename from smartsim/_core/launcher/step/slurmStep.py rename to smartsim/_core/launcher/step/slurm_step.py index 3f178d974..2a9046a3a 100644 --- a/smartsim/_core/launcher/step/slurmStep.py +++ b/smartsim/_core/launcher/step/slurm_step.py @@ -29,7 +29,8 @@ import typing as t from shlex import split as sh_split -from ....entity import Application, Ensemble, FSNode +from ....builders import Ensemble +from ....entity import Application, FSNode from ....error import AllocationError from ....log import get_logger from ....settings import RunSettings, SbatchSettings, Singularity, SrunSettings diff --git a/smartsim/_core/launcher/step/step.py b/smartsim/_core/launcher/step/step.py index 46bcebf7f..b5e79a363 100644 --- a/smartsim/_core/launcher/step/step.py +++ b/smartsim/_core/launcher/step/step.py @@ -38,7 +38,8 @@ from smartsim._core.config import CONFIG from smartsim.error.errors import SmartSimError, UnproxyableStepError -from ....entity import Application, Ensemble, FSNode +from ....builders import Ensemble +from ....entity import Application, FSNode from ....log import get_logger from ....settings import RunSettings, SettingsBase from ...utils.helpers import encode_cmd, get_base_36_repr diff --git a/smartsim/_core/launcher/stepInfo.py b/smartsim/_core/launcher/step_info.py similarity index 100% rename from smartsim/_core/launcher/stepInfo.py rename to smartsim/_core/launcher/step_info.py diff --git a/smartsim/_core/launcher/stepMapping.py b/smartsim/_core/launcher/step_mapping.py similarity index 100% rename from smartsim/_core/launcher/stepMapping.py rename to smartsim/_core/launcher/step_mapping.py diff --git a/smartsim/_core/launcher/taskManager.py b/smartsim/_core/launcher/task_manager.py similarity index 100% rename from smartsim/_core/launcher/taskManager.py rename to smartsim/_core/launcher/task_manager.py diff --git a/smartsim/_core/launcher/util/launcherUtil.py b/smartsim/_core/launcher/util/launcher_util.py similarity index 100% rename from smartsim/_core/launcher/util/launcherUtil.py rename to smartsim/_core/launcher/util/launcher_util.py diff --git a/smartsim/_core/schemas/__init__.py b/smartsim/_core/schemas/__init__.py index d7ee9d83d..54ae3947d 100644 --- a/smartsim/_core/schemas/__init__.py +++ b/smartsim/_core/schemas/__init__.py @@ -24,8 +24,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from .dragonRequests import * -from .dragonResponses import * +from .dragon_requests import * +from .dragon_responses import * __all__ = [ "DragonRequest", diff --git a/smartsim/_core/schemas/dragonRequests.py b/smartsim/_core/schemas/dragon_requests.py similarity index 100% rename from smartsim/_core/schemas/dragonRequests.py rename to smartsim/_core/schemas/dragon_requests.py diff --git a/smartsim/_core/schemas/dragonResponses.py b/smartsim/_core/schemas/dragon_responses.py similarity index 100% rename from smartsim/_core/schemas/dragonResponses.py rename to smartsim/_core/schemas/dragon_responses.py diff --git a/smartsim/_core/shell/shellLauncher.py b/smartsim/_core/shell/shell_launcher.py similarity index 99% rename from smartsim/_core/shell/shellLauncher.py rename to smartsim/_core/shell/shell_launcher.py index a24786698..9f88d0545 100644 --- a/smartsim/_core/shell/shellLauncher.py +++ b/smartsim/_core/shell/shell_launcher.py @@ -40,7 +40,7 @@ from smartsim._core.utils.launcher import create_job_id from smartsim.error import errors from smartsim.log import get_logger -from smartsim.settings.arguments.launchArguments import LaunchArguments +from smartsim.settings.arguments.launch_arguments import LaunchArguments from smartsim.status import JobStatus from smartsim.types import LaunchedJobID diff --git a/smartsim/_core/utils/serialize.py b/smartsim/_core/utils/serialize.py index aad38c778..46c0a2c1d 100644 --- a/smartsim/_core/utils/serialize.py +++ b/smartsim/_core/utils/serialize.py @@ -36,8 +36,9 @@ if t.TYPE_CHECKING: from smartsim._core.control.manifest import LaunchedManifest as _Manifest + from smartsim.builders import Ensemble from smartsim.database.orchestrator import FeatureStore - from smartsim.entity import Application, Ensemble, FSNode + from smartsim.entity import Application, FSNode from smartsim.entity.dbobject import FSModel, FSScript from smartsim.settings.base import BatchSettings, RunSettings diff --git a/smartsim/_core/utils/telemetry/telemetry.py b/smartsim/_core/utils/telemetry/telemetry.py index 8a9a99aed..c8ff3bf25 100644 --- a/smartsim/_core/utils/telemetry/telemetry.py +++ b/smartsim/_core/utils/telemetry/telemetry.py @@ -41,14 +41,13 @@ from smartsim._core.config import CONFIG from smartsim._core.control.job import JobEntity, _JobKey -from smartsim._core.control.jobmanager import JobManager -from smartsim._core.launcher.dragon.dragonLauncher import DragonLauncher +from smartsim._core.launcher.dragon.dragon_launcher import DragonLauncher from smartsim._core.launcher.launcher import Launcher from smartsim._core.launcher.local.local import LocalLauncher -from smartsim._core.launcher.lsf.lsfLauncher import LSFLauncher -from smartsim._core.launcher.pbs.pbsLauncher import PBSLauncher -from smartsim._core.launcher.slurm.slurmLauncher import SlurmLauncher -from smartsim._core.launcher.stepInfo import StepInfo +from smartsim._core.launcher.lsf.lsf_launcher import LSFLauncher +from smartsim._core.launcher.pbs.pbs_launcher import PBSLauncher +from smartsim._core.launcher.slurm.slurm_launcher import SlurmLauncher +from smartsim._core.launcher.step_info import StepInfo from smartsim._core.utils.helpers import get_ts_ms from smartsim._core.utils.serialize import MANIFEST_FILENAME from smartsim._core.utils.telemetry.collector import CollectorManager @@ -95,7 +94,6 @@ def __init__( self._tracked_jobs: t.Dict[_JobKey, JobEntity] = {} self._completed_jobs: t.Dict[_JobKey, JobEntity] = {} self._launcher: t.Optional[Launcher] = None - self.job_manager: JobManager = JobManager(threading.RLock()) self._launcher_map: t.Dict[str, t.Type[Launcher]] = { "slurm": SlurmLauncher, "pbs": PBSLauncher, @@ -132,14 +130,6 @@ def init_launcher(self, launcher: str) -> None: raise ValueError("Launcher type not supported: " + launcher) - def init_job_manager(self) -> None: - """Initialize the job manager instance""" - if not self._launcher: - raise TypeError("self._launcher must be initialized") - - self.job_manager.set_launcher(self._launcher) - self.job_manager.start() - def set_launcher(self, launcher_type: str) -> None: """Set the launcher for the experiment :param launcher_type: the name of the workload manager used by the experiment @@ -149,9 +139,6 @@ def set_launcher(self, launcher_type: str) -> None: if self._launcher is None: raise SmartSimError("Launcher init failed") - self.job_manager.set_launcher(self._launcher) - self.job_manager.start() - def process_manifest(self, manifest_path: str) -> None: """Read the manifest for the experiment. Process the `RuntimeManifest` by updating the set of tracked jobs @@ -210,14 +197,6 @@ def process_manifest(self, manifest_path: str) -> None: ) if entity.is_managed: - # Tell JobManager the task is unmanaged. This collects - # status updates but does not try to start a new copy - self.job_manager.add_job( - entity.name, - entity.step_id, - entity, - False, - ) # Tell the launcher it's managed so it doesn't attempt # to look for a PID that may no longer exist self._launcher.step_mapping.add( @@ -264,9 +243,6 @@ async def _to_completed( # remove all the registered collectors for the completed entity await self._collector_mgr.remove(entity) - job = self.job_manager[entity.name] - self.job_manager.move_to_completed(job) - status_clause = f"status: {step_info.status}" error_clause = f", error: {step_info.error}" if step_info.error else "" @@ -432,8 +408,7 @@ class TelemetryMonitor: """The telemetry monitor is a standalone process managed by SmartSim to perform long-term retrieval of experiment status updates and resource usage metrics. Note that a non-blocking driver script is likely to complete before - the SmartSim entities complete. Also, the JobManager performs status updates - only as long as the driver is running. This telemetry monitor entrypoint is + the SmartSim entities complete. This telemetry monitor entrypoint is started automatically when a SmartSim experiment calls the `start` method on resources. The entrypoint runs until it has no resources to monitor.""" @@ -463,11 +438,7 @@ def _can_shutdown(self) -> bool: :return: return True if capable of automatically shutting down """ - managed_jobs = ( - list(self._action_handler.job_manager.jobs.values()) - if self._action_handler - else [] - ) + managed_jobs = [] unmanaged_jobs = ( list(self._action_handler.tracked_jobs) if self._action_handler else [] ) diff --git a/smartsim/_core/utils/telemetry/util.py b/smartsim/_core/utils/telemetry/util.py index e46e7855b..5a1c94d5c 100644 --- a/smartsim/_core/utils/telemetry/util.py +++ b/smartsim/_core/utils/telemetry/util.py @@ -30,7 +30,7 @@ import pathlib import typing as t -from smartsim._core.launcher.stepInfo import StepInfo +from smartsim._core.launcher.step_info import StepInfo from smartsim.status import TERMINAL_STATUSES, JobStatus _EventClass = t.Literal["start", "stop", "timestep"] diff --git a/smartsim/builders/__init__.py b/smartsim/builders/__init__.py new file mode 100644 index 000000000..866269f20 --- /dev/null +++ b/smartsim/builders/__init__.py @@ -0,0 +1,28 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from .ensemble import Ensemble +from .utils.strategies import ParamSet diff --git a/smartsim/entity/ensemble.py b/smartsim/builders/ensemble.py similarity index 54% rename from smartsim/entity/ensemble.py rename to smartsim/builders/ensemble.py index f228c4a8a..c4a57175f 100644 --- a/smartsim/entity/ensemble.py +++ b/smartsim/builders/ensemble.py @@ -32,19 +32,20 @@ import os.path import typing as t -from smartsim.entity import entity, strategies +from smartsim.builders.utils import strategies +from smartsim.builders.utils.strategies import ParamSet +from smartsim.entity import entity from smartsim.entity.application import Application from smartsim.entity.files import EntityFiles -from smartsim.entity.strategies import ParamSet from smartsim.launchable.job import Job if t.TYPE_CHECKING: - from smartsim.settings.launchSettings import LaunchSettings + from smartsim.settings.launch_settings import LaunchSettings class Ensemble(entity.CompoundEntity): - """Entity to help parameterize the creation multiple application - instances. + """An Ensemble is a builder class that parameterizes the creation of multiple + Applications. """ def __init__( @@ -59,7 +60,60 @@ def __init__( max_permutations: int = -1, replicas: int = 1, ) -> None: - """Initialize an ``Ensemble`` of application instances + """Initialize an ``Ensemble`` of Application instances + + An Ensemble can be tailored to align with one of the following + creation strategies: parameter expansion or replicas. + + **Parameter Expansion** + + Parameter expansion allows users to assign different parameter values to + multiple Applications. This is done by specifying input to `Ensemble.file_parameters`, + `Ensemble.exe_arg_parameters` and `Ensemble.permutation_strategy`. The `permutation_strategy` + argument accepts three options: + + 1. "all_perm": Generates all possible parameter permutations for exhaustive exploration. + 2. "step": Collects identically indexed values across parameter lists to create parameter sets. + 3. "random": Enables random selection from predefined parameter spaces. + + The example below demonstrates creating an Ensemble via parameter expansion, resulting in + the creation of two Applications: + + .. highlight:: python + .. code-block:: python + + file_params={"SPAM": ["a", "b"], "EGGS": ["c", "d"]} + exe_arg_parameters = {"EXE": [["a"], ["b", "c"]], "ARGS": [["d"], ["e", "f"]]} + ensemble = Ensemble(name="name",exe="python",exe_arg_parameters=exe_arg_parameters, + file_parameters=file_params,permutation_strategy="step") + + This configuration will yield the following permutations: + + .. highlight:: python + .. code-block:: python + [ParamSet(params={'SPAM': 'a', 'EGGS': 'c'}, exe_args={'EXE': ['a'], 'ARGS': ['d']}), + ParamSet(params={'SPAM': 'b', 'EGGS': 'd'}, exe_args={'EXE': ['b', 'c'], 'ARGS': ['e', 'f']})] + + Each ParamSet contains the parameters assigned from file_params and the corresponding executable + arguments from exe_arg_parameters. + + **Replication** + The replication strategy involves creating identical Applications within an Ensemble. + This is achieved by specifying the `replicas` argument in the Ensemble. + + For example, by applying the `replicas` argument to the previous parameter expansion + example, we can double our Application output: + + .. highlight:: python + .. code-block:: python + + file_params={"SPAM": ["a", "b"], "EGGS": ["c", "d"]} + exe_arg_parameters = {"EXE": [["a"], ["b", "c"]], "ARGS": [["d"], ["e", "f"]]} + ensemble = Ensemble(name="name",exe="python",exe_arg_parameters=exe_arg_parameters, + file_parameters=file_params,permutation_strategy="step", replicas=2) + + This configuration will result in each ParamSet being replicated, effectively doubling + the number of Applications created. :param name: name of the ensemble :param exe: executable to run @@ -83,7 +137,7 @@ def __init__( copy.deepcopy(exe_arg_parameters) if exe_arg_parameters else {} ) """The parameters and values to be used when configuring entities""" - self._files = copy.deepcopy(files) if files else EntityFiles() + self._files = copy.deepcopy(files) if files else None """The files to be copied, symlinked, and/or configured prior to execution""" self._file_parameters = ( copy.deepcopy(file_parameters) if file_parameters else {} @@ -98,25 +152,25 @@ def __init__( @property def exe(self) -> str: - """Return executable to run. + """Return the attached executable. - :returns: application executable to run + :return: the executable """ return self._exe @exe.setter def exe(self, value: str | os.PathLike[str]) -> None: - """Set executable to run. + """Set the executable. - :param value: executable to run + :param value: the executable """ self._exe = os.fspath(value) @property def exe_args(self) -> t.List[str]: - """Return a list of attached executable arguments. + """Return attached list of executable arguments. - :returns: application executable arguments + :return: the executable arguments """ return self._exe_args @@ -124,15 +178,15 @@ def exe_args(self) -> t.List[str]: def exe_args(self, value: t.Sequence[str]) -> None: """Set the executable arguments. - :param value: executable arguments + :param value: the executable arguments """ self._exe_args = list(value) @property def exe_arg_parameters(self) -> t.Mapping[str, t.Sequence[t.Sequence[str]]]: - """Return the executable argument parameters + """Return attached executable argument parameters. - :returns: executable arguments parameters + :return: the executable argument parameters """ return self._exe_arg_parameters @@ -140,35 +194,35 @@ def exe_arg_parameters(self) -> t.Mapping[str, t.Sequence[t.Sequence[str]]]: def exe_arg_parameters( self, value: t.Mapping[str, t.Sequence[t.Sequence[str]]] ) -> None: - """Set the executable arguments. + """Set the executable argument parameters. - :param value: executable arguments + :param value: the executable argument parameters """ self._exe_arg_parameters = copy.deepcopy(value) @property - def files(self) -> EntityFiles: - """Return files to be copied, symlinked, and/or configured prior to - execution. + def files(self) -> t.Union[EntityFiles, None]: + """Return attached EntityFiles object. - :returns: files + :return: the EntityFiles object of files to be copied, symlinked, + and/or configured prior to execution """ return self._files @files.setter - def files(self, value: EntityFiles) -> None: - """Set files to be copied, symlinked, and/or configured prior to - execution. + def files(self, value: t.Optional[EntityFiles]) -> None: + """Set the EntityFiles object. - :param value: files + :param value: the EntityFiles object of files to be copied, symlinked, + and/or configured prior to execution """ self._files = copy.deepcopy(value) @property def file_parameters(self) -> t.Mapping[str, t.Sequence[str]]: - """Return file parameters. + """Return the attached file parameters. - :returns: application file parameters + :return: the file parameters """ return self._file_parameters @@ -176,7 +230,7 @@ def file_parameters(self) -> t.Mapping[str, t.Sequence[str]]: def file_parameters(self, value: t.Mapping[str, t.Sequence[str]]) -> None: """Set the file parameters. - :param value: file parameters + :param value: the file parameters """ self._file_parameters = dict(value) @@ -184,7 +238,7 @@ def file_parameters(self, value: t.Mapping[str, t.Sequence[str]]) -> None: def permutation_strategy(self) -> str | strategies.PermutationStrategyType: """Return the permutation strategy - :return: permutation strategy + :return: the permutation strategy """ return self._permutation_strategy @@ -194,7 +248,7 @@ def permutation_strategy( ) -> None: """Set the permutation strategy - :param value: permutation strategy + :param value: the permutation strategy """ self._permutation_strategy = value @@ -202,7 +256,7 @@ def permutation_strategy( def max_permutations(self) -> int: """Return the maximum permutations - :return: max permutations + :return: the max permutations """ return self._max_permutations @@ -210,29 +264,34 @@ def max_permutations(self) -> int: def max_permutations(self, value: int) -> None: """Set the maximum permutations - :param value: the maxpermutations + :param value: the max permutations """ self._max_permutations = value @property def replicas(self) -> int: - """Return the number of replicas + """Return the number of replicas. - :return: number of replicas + :return: the number of replicas """ return self._replicas @replicas.setter def replicas(self, value: int) -> None: - """Set the number of replicas + """Set the number of replicas. :return: the number of replicas """ self._replicas = value def _create_applications(self) -> tuple[Application, ...]: - """Concretize the ensemble attributes into a collection of - application instances. + """Generate a collection of Application instances based on the Ensembles attributes. + + This method uses a permutation strategy to create various combinations of file + parameters and executable arguments. Each combination is then replicated according + to the specified number of replicas, resulting in a set of Application instances. + + :return: A tuple of Application instances """ permutation_strategy = strategies.resolve(self.permutation_strategy) @@ -254,7 +313,36 @@ def _create_applications(self) -> tuple[Application, ...]: for i, permutation in enumerate(permutations_) ) - def as_jobs(self, settings: LaunchSettings) -> tuple[Job, ...]: + def build_jobs(self, settings: LaunchSettings) -> tuple[Job, ...]: + """Expand an Ensemble into a list of deployable Jobs and apply + identical LaunchSettings to each Job. + + The number of Jobs returned is controlled by the Ensemble attributes: + - Ensemble.exe_arg_parameters + - Ensemble.file_parameters + - Ensemble.permutation_strategy + - Ensemble.max_permutations + - Ensemble.replicas + + Consider the example below: + + .. highlight:: python + .. code-block:: python + + # Create LaunchSettings + my_launch_settings = LaunchSettings(...) + + # Initialize the Ensemble + ensemble = Ensemble("my_name", "echo", "hello world", replicas=3) + # Expand Ensemble into Jobs + ensemble_as_jobs = ensemble.build_jobs(my_launch_settings) + + By calling `build_jobs` on `ensemble`, three Jobs are returned because + three replicas were specified. Each Job will have the provided LaunchSettings. + + :param settings: LaunchSettings to apply to each Job + :return: Sequence of Jobs with the provided LaunchSettings + """ apps = self._create_applications() if not apps: raise ValueError("There are no members as part of this ensemble") diff --git a/smartsim/entity/strategies.py b/smartsim/builders/utils/strategies.py similarity index 100% rename from smartsim/entity/strategies.py rename to smartsim/builders/utils/strategies.py diff --git a/smartsim/database/orchestrator.py b/smartsim/database/orchestrator.py index a6bd01c07..c29c781a1 100644 --- a/smartsim/database/orchestrator.py +++ b/smartsim/database/orchestrator.py @@ -41,7 +41,7 @@ from .._core.utils.helpers import is_valid_cmd, unpack_fs_identifier from .._core.utils.network import get_ip_from_host from .._core.utils.shell import execute_cmd -from ..entity import EntityList, FSNode, TelemetryConfiguration +from ..entity import FSNode, TelemetryConfiguration from ..error import SmartSimError, SSDBFilesNotParseable, SSUnsupportedError from ..log import get_logger from ..servertype import CLUSTERED, STANDALONE @@ -165,7 +165,7 @@ def _check_local_constraints(launcher: str, batch: bool) -> None: # pylint: disable-next=too-many-public-methods -class FeatureStore(EntityList[FSNode]): +class FeatureStore: """The FeatureStore is an in-memory database that can be launched alongside entities in SmartSim. Data can be transferred between entities by using one of the Python, C, C++ or Fortran clients diff --git a/smartsim/entity/__init__.py b/smartsim/entity/__init__.py index 7ffa290b2..a12d737bb 100644 --- a/smartsim/entity/__init__.py +++ b/smartsim/entity/__init__.py @@ -27,7 +27,5 @@ from .application import Application from .dbnode import FSNode from .dbobject import * -from .ensemble import Ensemble from .entity import SmartSimEntity, TelemetryConfiguration -from .entityList import EntityList, EntitySequence from .files import TaggedFilesHierarchy diff --git a/smartsim/entity/application.py b/smartsim/entity/application.py index a8302fc1f..fb3ed2a7e 100644 --- a/smartsim/entity/application.py +++ b/smartsim/entity/application.py @@ -46,6 +46,14 @@ class Application(SmartSimEntity): + """The Application class enables users to execute computational tasks in an + Experiment workflow, such as launching compiled applications, running scripts, + or performing general computational operations. + + Applications are designed to be added to Jobs, where LaunchSettings are also + provided to inject launcher-specific behavior into the Job. + """ + def __init__( self, name: str, @@ -56,6 +64,16 @@ def __init__( ) -> None: """Initialize an ``Application`` + Applications require a name and an executable. Optionally, users may provide + executable arguments, files and file parameters. To create a simple Application + that echos `Hello World!`, consider the example below: + + .. highlight:: python + .. code-block:: python + + # Create an application that runs the 'echo' command + my_app = Application(name="my_app", exe="echo", exe_args="Hello World!") + :param name: name of the application :param exe: executable to run :param exe_args: executable arguments @@ -83,25 +101,25 @@ def __init__( @property def exe(self) -> str: - """Return executable to run. + """Return the executable. - :returns: application executable to run + :return: the executable """ return self._exe @exe.setter def exe(self, value: str) -> None: - """Set executable to run. + """Set the executable. - :param value: executable to run + :param value: the executable """ self._exe = copy.deepcopy(value) @property def exe_args(self) -> t.MutableSequence[str]: - """Return a list of attached executable arguments. + """Return the executable arguments. - :returns: application executable arguments + :return: the executable arguments """ return self._exe_args @@ -109,7 +127,7 @@ def exe_args(self) -> t.MutableSequence[str]: def exe_args(self, value: t.Union[str, t.Sequence[str], None]) -> None: """Set the executable arguments. - :param value: executable arguments + :param value: the executable arguments """ self._exe_args = self._build_exe_args(value) @@ -122,20 +140,20 @@ def add_exe_args(self, args: t.Union[str, t.List[str], None]) -> None: self._exe_args.extend(args) @property - def files(self) -> t.Optional[EntityFiles]: - """Return files to be copied, symlinked, and/or configured prior to - execution. + def files(self) -> t.Union[EntityFiles, None]: + """Return attached EntityFiles object. - :returns: files + :return: the EntityFiles object of files to be copied, symlinked, + and/or configured prior to execution """ return self._files @files.setter def files(self, value: t.Optional[EntityFiles]) -> None: - """Set files to be copied, symlinked, and/or configured prior to - execution. + """Set the EntityFiles object. - :param value: files + :param value: the EntityFiles object of files to be copied, symlinked, + and/or configured prior to execution """ self._files = copy.deepcopy(value) @@ -143,7 +161,7 @@ def files(self, value: t.Optional[EntityFiles]) -> None: def file_parameters(self) -> t.Mapping[str, str]: """Return file parameters. - :returns: application file parameters + :return: the file parameters """ return self._file_parameters @@ -151,7 +169,7 @@ def file_parameters(self) -> t.Mapping[str, str]: def file_parameters(self, value: t.Mapping[str, str]) -> None: """Set the file parameters. - :param value: file parameters + :param value: the file parameters """ self._file_parameters = copy.deepcopy(value) @@ -159,7 +177,7 @@ def file_parameters(self, value: t.Mapping[str, str]) -> None: def incoming_entities(self) -> t.List[SmartSimEntity]: """Return incoming entities. - :returns: incoming entities + :return: incoming entities """ return self._incoming_entities @@ -244,7 +262,7 @@ def attach_generator_files( def attached_files_table(self) -> str: """Return a list of attached files as a plain text table - :returns: String version of table + :return: String version of table """ if not self.files: return "No file attached to this application." diff --git a/smartsim/entity/entity.py b/smartsim/entity/entity.py index 1caebb607..3f5a9eabd 100644 --- a/smartsim/entity/entity.py +++ b/smartsim/entity/entity.py @@ -29,11 +29,11 @@ import abc import typing as t -from smartsim.launchable.jobGroup import JobGroup +from smartsim.launchable.job_group import JobGroup if t.TYPE_CHECKING: from smartsim.launchable.job import Job - from smartsim.settings.launchSettings import LaunchSettings + from smartsim.settings.launch_settings import LaunchSettings class TelemetryConfiguration: @@ -135,6 +135,6 @@ class CompoundEntity(abc.ABC): """ @abc.abstractmethod - def as_jobs(self, settings: LaunchSettings) -> t.Collection[Job]: ... + def build_jobs(self, settings: LaunchSettings) -> t.Collection[Job]: ... def as_job_group(self, settings: LaunchSettings) -> JobGroup: - return JobGroup(list(self.as_jobs(settings))) + return JobGroup(list(self.build_jobs(settings))) diff --git a/smartsim/entity/entityList.py b/smartsim/entity/entityList.py deleted file mode 100644 index e6bb64f8f..000000000 --- a/smartsim/entity/entityList.py +++ /dev/null @@ -1,138 +0,0 @@ -# BSD 2-Clause License -# -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import typing as t - -from .entity import SmartSimEntity - -if t.TYPE_CHECKING: - # pylint: disable-next=unused-import - import smartsim - -_T = t.TypeVar("_T", bound=SmartSimEntity) -# Old style pyint from TF 2.6.x does not know about pep484 style ``TypeVar`` names -# pylint: disable-next=invalid-name -_T_co = t.TypeVar("_T_co", bound=SmartSimEntity, covariant=True) - - -class EntitySequence(t.Generic[_T_co]): - """Abstract class for containers for SmartSimEntities""" - - def __init__(self, name: str, **kwargs: t.Any) -> None: - self.name: str = name - - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> - # WARNING: This class cannot be made truly covariant until the - # following properties are made read-only. It is currently - # designed for in-house type checking only!! - # - # Despite the fact that these properties are type hinted as - # ``Sequence``s, the underlying types must remain ``list``s as that is - # what subclasses are expecting when implementing their - # ``_initialize_entities`` methods. - # - # I'm leaving it "as is" for now as to not introduce a potential API - # break in case any users subclassed the invariant version of this - # class (``EntityList``), but a "proper" solution would be to turn - # ``EntitySequence``/``EntityList`` into proper ``abc.ABC``s and have - # the properties we expect to be initialized represented as abstract - # properties. An additional benefit of this solution is would be that - # users could actually initialize their entities in the ``__init__`` - # method, and it would remove the need for the cumbersome and - # un-type-hint-able ``_initialize_entities`` method by returning all - # object construction into the class' constructor. - # --------------------------------------------------------------------- - # - self.entities: t.Sequence[_T_co] = [] - self._fs_models: t.Sequence["smartsim.entity.FSModel"] = [] - self._fs_scripts: t.Sequence["smartsim.entity.FSScript"] = [] - # - # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - self._initialize_entities(**kwargs) - - def _initialize_entities(self, **kwargs: t.Any) -> None: - """Initialize the SmartSimEntity objects in the container""" - raise NotImplementedError - - @property - def fs_models(self) -> t.Iterable["smartsim.entity.FSModel"]: - """Return an immutable collection of attached models""" - return (model for model in self._fs_models) - - @property - def fs_scripts(self) -> t.Iterable["smartsim.entity.FSScript"]: - """Return an immutable collection of attached scripts""" - return (script for script in self._fs_scripts) - - @property - def batch(self) -> bool: - """Property indicating whether or not the entity sequence should be - launched as a batch job - - :return: ``True`` if entity sequence should be launched as a batch job, - ``False`` if the members will be launched individually. - """ - # pylint: disable-next=no-member - return hasattr(self, "batch_settings") and self.batch_settings - - @property - def type(self) -> str: - """Return the name of the class""" - return type(self).__name__ - - def __getitem__(self, name: str) -> t.Optional[_T_co]: - for entity in self.entities: - if entity.name == name: - return entity - return None - - def __iter__(self) -> t.Iterator[_T_co]: - for entity in self.entities: - yield entity - - def __len__(self) -> int: - return len(self.entities) - - -class EntityList(EntitySequence[_T]): - """An invariant subclass of an ``EntitySequence`` with mutable containers""" - - def __init__(self, name: str, **kwargs: t.Any) -> None: - super().__init__(name=name, **kwargs) - # Change container types to be invariant ``list``s - self.entities: t.List[_T] = list(self.entities) - self._fs_models: t.List["smartsim.entity.FSModel"] = list(self._fs_models) - self._fs_scripts: t.List["smartsim.entity.FSScript"] = list(self._fs_scripts) - - def _initialize_entities(self, **kwargs: t.Any) -> None: - """Initialize the SmartSimEntity objects in the container""" - # Need to identically re-define this "abstract method" or pylint - # complains that we are trying to define a concrete implementation of - # an abstract class despite the fact that we want this class to also be - # abstract. All the more reason to turn both of these classes into - # ``abc.ABC``s in my opinion. - raise NotImplementedError diff --git a/smartsim/experiment.py b/smartsim/experiment.py index 0861cc1ed..e42176c6e 100644 --- a/smartsim/experiment.py +++ b/smartsim/experiment.py @@ -43,7 +43,7 @@ from smartsim.error import errors from smartsim.status import TERMINAL_STATUSES, InvalidJobStatus, JobStatus -from ._core import Generator, Manifest, previewrenderer +from ._core import Generator, Manifest, preview_renderer from .entity import TelemetryConfiguration from .error import SmartSimError from .log import ctx_exp_path, get_logger, method_contextualizer @@ -358,8 +358,8 @@ def _generate( def preview( self, *args: t.Any, - verbosity_level: previewrenderer.Verbosity = previewrenderer.Verbosity.INFO, - output_format: previewrenderer.Format = previewrenderer.Format.PLAINTEXT, + verbosity_level: preview_renderer.Verbosity = preview_renderer.Verbosity.INFO, + output_format: preview_renderer.Format = preview_renderer.Format.PLAINTEXT, output_filename: t.Optional[str] = None, ) -> None: """Preview entity information prior to launch. This method @@ -386,7 +386,7 @@ def preview( preview_manifest = Manifest(*args) - previewrenderer.render( + preview_renderer.render( self, preview_manifest, verbosity_level, diff --git a/smartsim/launchable/__init__.py b/smartsim/launchable/__init__.py index 961032bf2..383b458f0 100644 --- a/smartsim/launchable/__init__.py +++ b/smartsim/launchable/__init__.py @@ -24,11 +24,11 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from .basejob import BaseJob -from .baseJobGroup import BaseJobGroup -from .colocatedJobGroup import ColocatedJobGroup +from .base_job import BaseJob +from .base_job_group import BaseJobGroup +from .colocated_job_group import ColocatedJobGroup from .job import Job -from .jobGroup import JobGroup +from .job_group import JobGroup from .launchable import Launchable -from .mpmdjob import MPMDJob -from .mpmdpair import MPMDPair +from .mpmd_job import MPMDJob +from .mpmd_pair import MPMDPair diff --git a/smartsim/launchable/basejob.py b/smartsim/launchable/base_job.py similarity index 96% rename from smartsim/launchable/basejob.py rename to smartsim/launchable/base_job.py index eedf9f6ff..878a59e53 100644 --- a/smartsim/launchable/basejob.py +++ b/smartsim/launchable/base_job.py @@ -30,7 +30,7 @@ from smartsim.launchable.launchable import Launchable if t.TYPE_CHECKING: - from smartsim._core.commands.launchCommands import LaunchCommands + from smartsim._core.commands.launch_commands import LaunchCommands class BaseJob(ABC, Launchable): diff --git a/smartsim/launchable/baseJobGroup.py b/smartsim/launchable/base_job_group.py similarity index 99% rename from smartsim/launchable/baseJobGroup.py rename to smartsim/launchable/base_job_group.py index b7becba56..9031705f3 100644 --- a/smartsim/launchable/baseJobGroup.py +++ b/smartsim/launchable/base_job_group.py @@ -33,7 +33,7 @@ from smartsim.launchable.launchable import Launchable -from .basejob import BaseJob +from .base_job import BaseJob class BaseJobGroup(Launchable, MutableSequence[BaseJob], ABC): diff --git a/smartsim/launchable/colocatedJobGroup.py b/smartsim/launchable/colocated_job_group.py similarity index 97% rename from smartsim/launchable/colocatedJobGroup.py rename to smartsim/launchable/colocated_job_group.py index 1c3b96fba..db187a46c 100644 --- a/smartsim/launchable/colocatedJobGroup.py +++ b/smartsim/launchable/colocated_job_group.py @@ -29,8 +29,8 @@ import typing as t from copy import deepcopy -from .basejob import BaseJob -from .baseJobGroup import BaseJobGroup +from .base_job import BaseJob +from .base_job_group import BaseJobGroup if t.TYPE_CHECKING: from typing_extensions import Self diff --git a/smartsim/launchable/job.py b/smartsim/launchable/job.py index a433319ac..6ec2bbbc4 100644 --- a/smartsim/launchable/job.py +++ b/smartsim/launchable/job.py @@ -26,13 +26,12 @@ from __future__ import annotations -import os import typing as t from copy import deepcopy -from smartsim._core.commands.launchCommands import LaunchCommands +from smartsim._core.commands.launch_commands import LaunchCommands from smartsim._core.utils.helpers import check_name -from smartsim.launchable.basejob import BaseJob +from smartsim.launchable.base_job import BaseJob from smartsim.log import get_logger from smartsim.settings import LaunchSettings @@ -45,11 +44,9 @@ @t.final class Job(BaseJob): """A Job holds a reference to a SmartSimEntity and associated - LaunchSettings prior to launch. It is responsible for turning - the stored entity and launch settings into commands that can be - executed by a launcher. - - Jobs will hold a deep copy of launch settings. + LaunchSettings prior to launch. It is responsible for turning + the stored SmartSimEntity and LaunchSettings into commands that can be + executed by a launcher. Jobs are designed to be started by the Experiment. """ def __init__( @@ -58,47 +55,91 @@ def __init__( launch_settings: LaunchSettings, name: str | None = None, ): + """Initialize a ``Job`` + + Jobs require a SmartSimEntity and a LaunchSettings. Optionally, users may provide + a name. To create a simple Job that echos `Hello World!`, consider the example below: + + .. highlight:: python + .. code-block:: python + + # Create an application that runs the 'echo' command + my_app = Application(name="my_app", exe="echo", exe_args="Hello World!") + # Define the launch settings using SLURM + srun_settings = LaunchSettings(launcher="slurm") + + # Create a Job with the `my_app` and `srun_settings` + my_job = Job(my_app, srun_settings, name="my_job") + + :param entity: the SmartSimEntity object + :param launch_settings: the LaunchSettings object + :param name: the Job name + """ super().__init__() + """Initialize the parent class BaseJob""" self._entity = deepcopy(entity) + """Deepcopy of the SmartSimEntity object""" self._launch_settings = deepcopy(launch_settings) + """Deepcopy of the LaunchSettings object""" self._name = name if name else entity.name + """Name of the Job""" check_name(self._name) @property def name(self) -> str: - """Retrieves the name of the Job.""" + """Return the name of the Job. + + :return: the name of the Job + """ return self._name @name.setter def name(self, name: str) -> None: - """Sets the name of the Job.""" + """Set the name of the Job. + + :param name: the name of the Job + """ check_name(name) logger.debug(f'Overwriting the Job name from "{self._name}" to "{name}"') self._name = name @property def entity(self) -> SmartSimEntity: - """Retrieves the Job entity.""" + """Return the attached entity. + + :return: the attached SmartSimEntity + """ return deepcopy(self._entity) @entity.setter def entity(self, value: SmartSimEntity) -> None: - """Sets the Job entity.""" + """Set the Job entity. + + :param value: the SmartSimEntity + """ self._entity = deepcopy(value) @property def launch_settings(self) -> LaunchSettings: - """Retrieves the Job LaunchSettings.""" + """Return the attached LaunchSettings. + + :return: the attached LaunchSettings + """ return deepcopy(self._launch_settings) @launch_settings.setter def launch_settings(self, value: LaunchSettings) -> None: - """Sets the Job LaunchSettings.""" + """Set the Jobs LaunchSettings. + + :param value: the LaunchSettings + """ self._launch_settings = deepcopy(value) def get_launch_steps(self) -> LaunchCommands: """Return the launch steps corresponding to the internal data. + + :returns: The Jobs launch steps """ # TODO: return JobWarehouseRunner.run(self) raise NotImplementedError diff --git a/smartsim/launchable/jobGroup.py b/smartsim/launchable/job_group.py similarity index 97% rename from smartsim/launchable/jobGroup.py rename to smartsim/launchable/job_group.py index 3de767711..f06313dd8 100644 --- a/smartsim/launchable/jobGroup.py +++ b/smartsim/launchable/job_group.py @@ -32,8 +32,8 @@ from smartsim.log import get_logger from .._core.utils.helpers import check_name -from .basejob import BaseJob -from .baseJobGroup import BaseJobGroup +from .base_job import BaseJob +from .base_job_group import BaseJobGroup logger = get_logger(__name__) diff --git a/smartsim/launchable/mpmdjob.py b/smartsim/launchable/mpmd_job.py similarity index 94% rename from smartsim/launchable/mpmdjob.py rename to smartsim/launchable/mpmd_job.py index 7426d9136..e526f1074 100644 --- a/smartsim/launchable/mpmdjob.py +++ b/smartsim/launchable/mpmd_job.py @@ -31,12 +31,12 @@ from copy import deepcopy from smartsim.error.errors import SSUnsupportedError -from smartsim.launchable.basejob import BaseJob -from smartsim.launchable.mpmdpair import MPMDPair -from smartsim.settings.launchSettings import LaunchSettings +from smartsim.launchable.base_job import BaseJob +from smartsim.launchable.mpmd_pair import MPMDPair +from smartsim.settings.launch_settings import LaunchSettings if t.TYPE_CHECKING: - from smartsim._core.commands.launchCommands import LaunchCommands + from smartsim._core.commands.launch_commands import LaunchCommands from smartsim.entity.entity import SmartSimEntity diff --git a/smartsim/launchable/mpmdpair.py b/smartsim/launchable/mpmd_pair.py similarity index 96% rename from smartsim/launchable/mpmdpair.py rename to smartsim/launchable/mpmd_pair.py index 3df8a0ee5..722a16cde 100644 --- a/smartsim/launchable/mpmdpair.py +++ b/smartsim/launchable/mpmd_pair.py @@ -29,7 +29,7 @@ import copy import typing as t -from smartsim.settings.launchSettings import LaunchSettings +from smartsim.settings.launch_settings import LaunchSettings if t.TYPE_CHECKING: from smartsim.entity.entity import SmartSimEntity diff --git a/smartsim/settings/__init__.py b/smartsim/settings/__init__.py index b9ede3cec..59aeeffbd 100644 --- a/smartsim/settings/__init__.py +++ b/smartsim/settings/__init__.py @@ -26,9 +26,9 @@ import typing as t -from .baseSettings import BaseSettings -from .batchSettings import BatchSettings -from .launchSettings import LaunchSettings +from .base_settings import BaseSettings +from .batch_settings import BatchSettings +from .launch_settings import LaunchSettings __all__ = ["LaunchSettings", "BaseSettings", "BatchSettings"] diff --git a/smartsim/settings/arguments/__init__.py b/smartsim/settings/arguments/__init__.py index cd216526c..f79a3b4bf 100644 --- a/smartsim/settings/arguments/__init__.py +++ b/smartsim/settings/arguments/__init__.py @@ -24,7 +24,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from .batchArguments import BatchArguments -from .launchArguments import LaunchArguments +from .batch_arguments import BatchArguments +from .launch_arguments import LaunchArguments __all__ = ["LaunchArguments", "BatchArguments"] diff --git a/smartsim/settings/arguments/batch/lsf.py b/smartsim/settings/arguments/batch/lsf.py index 10dc85763..23f948bd0 100644 --- a/smartsim/settings/arguments/batch/lsf.py +++ b/smartsim/settings/arguments/batch/lsf.py @@ -30,20 +30,24 @@ from smartsim.log import get_logger -from ...batchCommand import SchedulerType +from ...batch_command import BatchSchedulerType from ...common import StringArgument -from ..batchArguments import BatchArguments +from ..batch_arguments import BatchArguments logger = get_logger(__name__) class BsubBatchArguments(BatchArguments): + """A class to represent the arguments required for submitting batch + jobs using the bsub command. + """ + def scheduler_str(self) -> str: """Get the string representation of the scheduler :returns: The string representation of the scheduler """ - return SchedulerType.Lsf.value + return BatchSchedulerType.Lsf.value def set_walltime(self, walltime: str) -> None: """Set the walltime @@ -137,7 +141,7 @@ def format_batch_args(self) -> t.List[str]: """ opts = [] - for opt, value in self._scheduler_args.items(): + for opt, value in self._batch_args.items(): prefix = "-" # LSF only uses single dashses @@ -156,4 +160,4 @@ def set(self, key: str, value: str | None) -> None: argument (if applicable), otherwise `None` """ # Store custom arguments in the launcher_args - self._scheduler_args[key] = value + self._batch_args[key] = value diff --git a/smartsim/settings/arguments/batch/pbs.py b/smartsim/settings/arguments/batch/pbs.py index 192874c16..126207665 100644 --- a/smartsim/settings/arguments/batch/pbs.py +++ b/smartsim/settings/arguments/batch/pbs.py @@ -32,20 +32,24 @@ from smartsim.log import get_logger from ....error import SSConfigError -from ...batchCommand import SchedulerType +from ...batch_command import BatchSchedulerType from ...common import StringArgument -from ..batchArguments import BatchArguments +from ..batch_arguments import BatchArguments logger = get_logger(__name__) class QsubBatchArguments(BatchArguments): + """A class to represent the arguments required for submitting batch + jobs using the qsub command. + """ + def scheduler_str(self) -> str: """Get the string representation of the scheduler :returns: The string representation of the scheduler """ - return SchedulerType.Pbs.value + return BatchSchedulerType.Pbs.value def set_nodes(self, num_nodes: int) -> None: """Set the number of nodes for this batch job @@ -119,7 +123,7 @@ def format_batch_args(self) -> t.List[str]: :return: batch arguments for `qsub` :raises ValueError: if options are supplied without values """ - opts, batch_arg_copy = self._create_resource_list(self._scheduler_args) + opts, batch_arg_copy = self._create_resource_list(self._batch_args) for opt, value in batch_arg_copy.items(): prefix = "-" if not value: @@ -179,4 +183,4 @@ def set(self, key: str, value: str | None) -> None: :param value: A string representation of the value for the launch argument (if applicable), otherwise `None` """ - self._scheduler_args[key] = value + self._batch_args[key] = value diff --git a/smartsim/settings/arguments/batch/slurm.py b/smartsim/settings/arguments/batch/slurm.py index f4725a117..26f9cf854 100644 --- a/smartsim/settings/arguments/batch/slurm.py +++ b/smartsim/settings/arguments/batch/slurm.py @@ -31,20 +31,24 @@ from smartsim.log import get_logger -from ...batchCommand import SchedulerType +from ...batch_command import BatchSchedulerType from ...common import StringArgument -from ..batchArguments import BatchArguments +from ..batch_arguments import BatchArguments logger = get_logger(__name__) class SlurmBatchArguments(BatchArguments): + """A class to represent the arguments required for submitting batch + jobs using the sbatch command. + """ + def scheduler_str(self) -> str: """Get the string representation of the scheduler :returns: The string representation of the scheduler """ - return SchedulerType.Slurm.value + return BatchSchedulerType.Slurm.value def set_walltime(self, walltime: str) -> None: """Set the walltime of the job @@ -127,7 +131,7 @@ def format_batch_args(self) -> t.List[str]: """ opts = [] # TODO add restricted here - for opt, value in self._scheduler_args.items(): + for opt, value in self._batch_args.items(): # attach "-" prefix if argument is 1 character otherwise "--" short_arg = len(opt) == 1 prefix = "-" if short_arg else "--" @@ -149,4 +153,4 @@ def set(self, key: str, value: str | None) -> None: argument (if applicable), otherwise `None` """ # Store custom arguments in the launcher_args - self._scheduler_args[key] = value + self._batch_args[key] = value diff --git a/smartsim/settings/arguments/batchArguments.py b/smartsim/settings/arguments/batch_arguments.py similarity index 92% rename from smartsim/settings/arguments/batchArguments.py rename to smartsim/settings/arguments/batch_arguments.py index a85148697..0fa8d3964 100644 --- a/smartsim/settings/arguments/batchArguments.py +++ b/smartsim/settings/arguments/batch_arguments.py @@ -44,8 +44,9 @@ class BatchArguments(ABC): the input parameter to a properly formatted launcher argument. """ - def __init__(self, scheduler_args: t.Dict[str, str | None] | None) -> None: - self._scheduler_args = copy.deepcopy(scheduler_args) or {} + def __init__(self, batch_args: t.Dict[str, str | None] | None) -> None: + self._batch_args = copy.deepcopy(batch_args) or {} + """A dictionary of batch arguments""" @abstractmethod def scheduler_str(self) -> str: @@ -104,5 +105,5 @@ def format_batch_args(self) -> t.List[str]: pass def __str__(self) -> str: # pragma: no-cover - string = f"\nScheduler Arguments:\n{fmt_dict(self._scheduler_args)}" + string = f"\nScheduler Arguments:\n{fmt_dict(self._batch_args)}" return string diff --git a/smartsim/settings/arguments/launch/alps.py b/smartsim/settings/arguments/launch/alps.py index 89a108a28..356a443d6 100644 --- a/smartsim/settings/arguments/launch/alps.py +++ b/smartsim/settings/arguments/launch/alps.py @@ -30,11 +30,11 @@ from smartsim._core.arguments.shell import ShellLaunchArguments from smartsim._core.dispatch import dispatch -from smartsim._core.shell.shellLauncher import ShellLauncher, make_shell_format_fn +from smartsim._core.shell.shell_launcher import ShellLauncher, make_shell_format_fn from smartsim.log import get_logger from ...common import set_check_input -from ...launchCommand import LauncherType +from ...launch_command import LauncherType logger = get_logger(__name__) _as_aprun_command = make_shell_format_fn(run_command="aprun") diff --git a/smartsim/settings/arguments/launch/dragon.py b/smartsim/settings/arguments/launch/dragon.py index 98b91059c..f1b34608d 100644 --- a/smartsim/settings/arguments/launch/dragon.py +++ b/smartsim/settings/arguments/launch/dragon.py @@ -33,8 +33,8 @@ from smartsim.log import get_logger from ...common import set_check_input -from ...launchCommand import LauncherType -from ..launchArguments import LaunchArguments +from ...launch_command import LauncherType +from ..launch_arguments import LaunchArguments logger = get_logger(__name__) diff --git a/smartsim/settings/arguments/launch/local.py b/smartsim/settings/arguments/launch/local.py index 1fb664790..2c589cb48 100644 --- a/smartsim/settings/arguments/launch/local.py +++ b/smartsim/settings/arguments/launch/local.py @@ -30,11 +30,11 @@ from smartsim._core.arguments.shell import ShellLaunchArguments from smartsim._core.dispatch import dispatch -from smartsim._core.shell.shellLauncher import ShellLauncher, make_shell_format_fn +from smartsim._core.shell.shell_launcher import ShellLauncher, make_shell_format_fn from smartsim.log import get_logger from ...common import StringArgument, set_check_input -from ...launchCommand import LauncherType +from ...launch_command import LauncherType logger = get_logger(__name__) _as_local_command = make_shell_format_fn(run_command=None) diff --git a/smartsim/settings/arguments/launch/lsf.py b/smartsim/settings/arguments/launch/lsf.py index 34d0f007e..ed2427198 100644 --- a/smartsim/settings/arguments/launch/lsf.py +++ b/smartsim/settings/arguments/launch/lsf.py @@ -32,11 +32,11 @@ from smartsim._core.arguments.shell import ShellLaunchArguments from smartsim._core.dispatch import EnvironMappingType, dispatch -from smartsim._core.shell.shellLauncher import ShellLauncher, ShellLauncherCommand +from smartsim._core.shell.shell_launcher import ShellLauncher, ShellLauncherCommand from smartsim.log import get_logger from ...common import set_check_input -from ...launchCommand import LauncherType +from ...launch_command import LauncherType logger = get_logger(__name__) diff --git a/smartsim/settings/arguments/launch/mpi.py b/smartsim/settings/arguments/launch/mpi.py index 54cb7e2fd..ce8c43aa5 100644 --- a/smartsim/settings/arguments/launch/mpi.py +++ b/smartsim/settings/arguments/launch/mpi.py @@ -30,11 +30,11 @@ from smartsim._core.arguments.shell import ShellLaunchArguments from smartsim._core.dispatch import dispatch -from smartsim._core.shell.shellLauncher import ShellLauncher, make_shell_format_fn +from smartsim._core.shell.shell_launcher import ShellLauncher, make_shell_format_fn from smartsim.log import get_logger from ...common import set_check_input -from ...launchCommand import LauncherType +from ...launch_command import LauncherType logger = get_logger(__name__) _as_mpirun_command = make_shell_format_fn("mpirun") diff --git a/smartsim/settings/arguments/launch/pals.py b/smartsim/settings/arguments/launch/pals.py index 915ae8622..d48dc799b 100644 --- a/smartsim/settings/arguments/launch/pals.py +++ b/smartsim/settings/arguments/launch/pals.py @@ -30,11 +30,11 @@ from smartsim._core.arguments.shell import ShellLaunchArguments from smartsim._core.dispatch import dispatch -from smartsim._core.shell.shellLauncher import ShellLauncher, make_shell_format_fn +from smartsim._core.shell.shell_launcher import ShellLauncher, make_shell_format_fn from smartsim.log import get_logger from ...common import set_check_input -from ...launchCommand import LauncherType +from ...launch_command import LauncherType logger = get_logger(__name__) _as_pals_command = make_shell_format_fn(run_command="mpiexec") diff --git a/smartsim/settings/arguments/launch/slurm.py b/smartsim/settings/arguments/launch/slurm.py index 6eff9c812..c5dceff62 100644 --- a/smartsim/settings/arguments/launch/slurm.py +++ b/smartsim/settings/arguments/launch/slurm.py @@ -34,11 +34,11 @@ from smartsim._core.arguments.shell import ShellLaunchArguments from smartsim._core.dispatch import EnvironMappingType, dispatch -from smartsim._core.shell.shellLauncher import ShellLauncher, ShellLauncherCommand +from smartsim._core.shell.shell_launcher import ShellLauncher, ShellLauncherCommand from smartsim.log import get_logger from ...common import set_check_input -from ...launchCommand import LauncherType +from ...launch_command import LauncherType logger = get_logger(__name__) diff --git a/smartsim/settings/arguments/launchArguments.py b/smartsim/settings/arguments/launch_arguments.py similarity index 98% rename from smartsim/settings/arguments/launchArguments.py rename to smartsim/settings/arguments/launch_arguments.py index 0e011339e..6ec741d91 100644 --- a/smartsim/settings/arguments/launchArguments.py +++ b/smartsim/settings/arguments/launch_arguments.py @@ -50,6 +50,7 @@ def __init__(self, launch_args: t.Dict[str, str | None] | None) -> None: :param launch_args: A mapping of arguments to (optional) values """ self._launch_args = copy.deepcopy(launch_args) or {} + """A dictionary of launch arguments""" @abstractmethod def launcher_str(self) -> str: diff --git a/smartsim/settings/baseSettings.py b/smartsim/settings/base_settings.py similarity index 95% rename from smartsim/settings/baseSettings.py rename to smartsim/settings/base_settings.py index 1acd5f605..2e8a87f57 100644 --- a/smartsim/settings/baseSettings.py +++ b/smartsim/settings/base_settings.py @@ -23,7 +23,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# fmt: off + class BaseSettings: - ... -# fmt: on + """ + A base class for LaunchSettings and BatchSettings. + """ diff --git a/smartsim/settings/batchSettings.py b/smartsim/settings/batchSettings.py deleted file mode 100644 index 6649fa5f7..000000000 --- a/smartsim/settings/batchSettings.py +++ /dev/null @@ -1,113 +0,0 @@ -# BSD 2-Clause License -# -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from __future__ import annotations - -import copy -import typing as t - -from smartsim.log import get_logger - -from .._core.utils.helpers import fmt_dict -from .arguments import BatchArguments -from .arguments.batch.lsf import BsubBatchArguments -from .arguments.batch.pbs import QsubBatchArguments -from .arguments.batch.slurm import SlurmBatchArguments -from .baseSettings import BaseSettings -from .batchCommand import SchedulerType -from .common import StringArgument - -logger = get_logger(__name__) - - -class BatchSettings(BaseSettings): - def __init__( - self, - batch_scheduler: t.Union[SchedulerType, str], - scheduler_args: t.Dict[str, t.Union[str, None]] | None = None, - env_vars: StringArgument | None = None, - ) -> None: - try: - self._batch_scheduler = SchedulerType(batch_scheduler) - except ValueError: - raise ValueError(f"Invalid scheduler type: {batch_scheduler}") from None - self._arguments = self._get_arguments(scheduler_args) - self.env_vars = env_vars or {} - - @property - def scheduler(self) -> str: - """Return the launcher name.""" - return self._batch_scheduler.value - - @property - def batch_scheduler(self) -> str: - """Return the scheduler name.""" - return self._batch_scheduler.value - - @property - def scheduler_args(self) -> BatchArguments: - """Return the batch argument translator.""" - return self._arguments - - @property - def env_vars(self) -> StringArgument: - """Return an immutable list of attached environment variables.""" - return copy.deepcopy(self._env_vars) - - @env_vars.setter - def env_vars(self, value: t.Dict[str, str | None]) -> None: - """Set the environment variables.""" - self._env_vars = copy.deepcopy(value) - - def _get_arguments(self, scheduler_args: StringArgument | None) -> BatchArguments: - """Map the Scheduler to the BatchArguments. This method should only be - called once during construction. - - :param scheduler_args: A mapping of arguments names to values to be - used to initialize the arguments - :returns: The appropriate type for the settings instance. - """ - if self._batch_scheduler == SchedulerType.Slurm: - return SlurmBatchArguments(scheduler_args) - elif self._batch_scheduler == SchedulerType.Lsf: - return BsubBatchArguments(scheduler_args) - elif self._batch_scheduler == SchedulerType.Pbs: - return QsubBatchArguments(scheduler_args) - else: - raise ValueError(f"Invalid scheduler type: {self._batch_scheduler}") - - def format_batch_args(self) -> t.List[str]: - """Get the formatted batch arguments for a preview - - :return: batch arguments for Sbatch - """ - return self._arguments.format_batch_args() - - def __str__(self) -> str: # pragma: no-cover - string = f"\nScheduler: {self.scheduler}{self.scheduler_args}" - if self.env_vars: - string += f"\nEnvironment variables: \n{fmt_dict(self.env_vars)}" - return string diff --git a/smartsim/settings/batchCommand.py b/smartsim/settings/batch_command.py similarity index 94% rename from smartsim/settings/batchCommand.py rename to smartsim/settings/batch_command.py index 8f3b0c89d..a96492d39 100644 --- a/smartsim/settings/batchCommand.py +++ b/smartsim/settings/batch_command.py @@ -27,10 +27,8 @@ from enum import Enum -class SchedulerType(Enum): - """Schedulers that are supported by - SmartSim. - """ +class BatchSchedulerType(Enum): + """Schedulers supported by SmartSim.""" Slurm = "slurm" Pbs = "pbs" diff --git a/smartsim/settings/batch_settings.py b/smartsim/settings/batch_settings.py new file mode 100644 index 000000000..734e919ce --- /dev/null +++ b/smartsim/settings/batch_settings.py @@ -0,0 +1,174 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import copy +import typing as t + +from smartsim.log import get_logger + +from .._core.utils.helpers import fmt_dict +from .arguments import BatchArguments +from .arguments.batch.lsf import BsubBatchArguments +from .arguments.batch.pbs import QsubBatchArguments +from .arguments.batch.slurm import SlurmBatchArguments +from .base_settings import BaseSettings +from .batch_command import BatchSchedulerType +from .common import StringArgument + +logger = get_logger(__name__) + + +class BatchSettings(BaseSettings): + """The BatchSettings class stores scheduler configuration settings and is + used to inject scheduler-specific behavior into a job. + + BatchSettings is designed to be extended by a BatchArguments child class that + corresponds to the scheduler provided during initialization. The supported schedulers + are Slurm, PBS, and LSF. Using the BatchSettings class, users can: + + - Set the scheduler type of a batch job. + - Configure batch arguments and environment variables. + - Access and modify custom batch arguments. + - Update environment variables. + - Retrieve information associated with the ``BatchSettings`` object. + - The scheduler value (BatchSettings.scheduler). + - The derived BatchArguments child class (BatchSettings.batch_args). + - The set environment variables (BatchSettings.env_vars). + - A formatted output of set batch arguments (BatchSettings.format_batch_args). + """ + + def __init__( + self, + batch_scheduler: t.Union[BatchSchedulerType, str], + batch_args: StringArgument | None = None, + env_vars: StringArgument | None = None, + ) -> None: + """Initialize a BatchSettings instance. + + The "batch_scheduler" of SmartSim BatchSettings will determine the + child type assigned to the BatchSettings.batch_args attribute. + For example, to configure a job for SLURM batch jobs, assign BatchSettings.batch_scheduler + to "slurm" or BatchSchedulerType.Slurm: + + .. highlight:: python + .. code-block:: python + + sbatch_settings = BatchSettings(batch_scheduler="slurm") + # OR + sbatch_settings = BatchSettings(batch_scheduler=BatchSchedulerType.Slurm) + + This will assign a SlurmBatchArguments object to ``sbatch_settings.batch_args``. + Using the object, users may access the child class functions to set + batch configurations. For example: + + .. highlight:: python + .. code-block:: python + + sbatch_settings.batch_args.set_nodes(5) + sbatch_settings.batch_args.set_cpus_per_task(2) + + To set customized batch arguments, use the `set()` function provided by + the BatchSettings child class. For example: + + .. highlight:: python + .. code-block:: python + + sbatch_settings.batch_args.set(key="nodes", value="6") + + If the key already exists in the existing batch arguments, the value will + be overwritten. + + :param batch_scheduler: The type of scheduler to initialize (e.g., Slurm, PBS, LSF) + :param batch_args: A dictionary of arguments for the scheduler, where the keys + are strings and the values can be either strings or None. This argument is optional + and defaults to None. + :param env_vars: Environment variables for the batch settings, where the keys + are strings and the values can be either strings or None. This argument is + also optional and defaults to None. + :raises ValueError: Raises if the scheduler provided does not exist. + """ + try: + self._batch_scheduler = BatchSchedulerType(batch_scheduler) + """The scheduler type""" + except ValueError: + raise ValueError(f"Invalid scheduler type: {batch_scheduler}") from None + self._arguments = self._get_arguments(batch_args) + """The BatchSettings child class based on scheduler type""" + self.env_vars = env_vars or {} + """The environment configuration""" + + @property + def batch_scheduler(self) -> str: + """Return the scheduler type.""" + return self._batch_scheduler.value + + @property + def batch_args(self) -> BatchArguments: + """Return the BatchArguments child class.""" + return self._arguments + + @property + def env_vars(self) -> StringArgument: + """Return an immutable list of attached environment variables.""" + return self._env_vars + + @env_vars.setter + def env_vars(self, value: t.Dict[str, str | None]) -> None: + """Set the environment variables.""" + self._env_vars = copy.deepcopy(value) + + def _get_arguments(self, batch_args: StringArgument | None) -> BatchArguments: + """Map the Scheduler to the BatchArguments. This method should only be + called once during construction. + + :param schedule_args: A mapping of arguments names to values to be + used to initialize the arguments + :returns: The appropriate type for the settings instance. + :raises ValueError: An invalid scheduler type was provided. + """ + if self._batch_scheduler == BatchSchedulerType.Slurm: + return SlurmBatchArguments(batch_args) + elif self._batch_scheduler == BatchSchedulerType.Lsf: + return BsubBatchArguments(batch_args) + elif self._batch_scheduler == BatchSchedulerType.Pbs: + return QsubBatchArguments(batch_args) + else: + raise ValueError(f"Invalid scheduler type: {self._batch_scheduler}") + + def format_batch_args(self) -> t.List[str]: + """Get the formatted batch arguments to preview + + :return: formatted batch arguments + """ + return self._arguments.format_batch_args() + + def __str__(self) -> str: # pragma: no-cover + string = f"\nBatch Scheduler: {self.batch_scheduler}{self.batch_args}" + if self.env_vars: + string += f"\nEnvironment variables: \n{fmt_dict(self.env_vars)}" + return string diff --git a/smartsim/settings/launchCommand.py b/smartsim/settings/launch_command.py similarity index 96% rename from smartsim/settings/launchCommand.py rename to smartsim/settings/launch_command.py index 491f01d86..b848e35e1 100644 --- a/smartsim/settings/launchCommand.py +++ b/smartsim/settings/launch_command.py @@ -28,9 +28,7 @@ class LauncherType(Enum): - """Launchers that are supported by - SmartSim. - """ + """Launchers supported by SmartSim.""" Dragon = "dragon" Slurm = "slurm" diff --git a/smartsim/settings/launchSettings.py b/smartsim/settings/launch_settings.py similarity index 66% rename from smartsim/settings/launchSettings.py rename to smartsim/settings/launch_settings.py index 14137481d..7b6083022 100644 --- a/smartsim/settings/launchSettings.py +++ b/smartsim/settings/launch_settings.py @@ -44,26 +44,92 @@ ) from .arguments.launch.pals import PalsMpiexecLaunchArguments from .arguments.launch.slurm import SlurmLaunchArguments -from .baseSettings import BaseSettings +from .base_settings import BaseSettings from .common import StringArgument -from .launchCommand import LauncherType +from .launch_command import LauncherType logger = get_logger(__name__) class LaunchSettings(BaseSettings): + """The LaunchSettings class stores launcher configuration settings and is + used to inject launcher-specific behavior into a job. + + LaunchSettings is designed to be extended by a LaunchArguments child class that + corresponds to the launcher provided during initialization. The supported launchers + are Dragon, Slurm, PALS, ALPS, Local, Mpiexec, Mpirun, Orterun, and LSF. Using the + LaunchSettings class, users can: + + - Set the launcher type of a job. + - Configure launch arguments and environment variables. + - Access and modify custom launch arguments. + - Update environment variables. + - Retrieve information associated with the ``LaunchSettings`` object. + - The launcher value (LaunchSettings.launcher). + - The derived LaunchSettings child class (LaunchSettings.launch_args). + - The set environment variables (LaunchSettings.env_vars). + """ + def __init__( self, launcher: t.Union[LauncherType, str], launch_args: StringArgument | None = None, env_vars: StringArgument | None = None, ) -> None: + """Initialize a LaunchSettings instance. + + The "launcher" of SmartSim LaunchSettings will determine the + child type assigned to the LaunchSettings.launch_args attribute. + For example, to configure a job for SLURM, assign LaunchSettings.launcher + to "slurm" or LauncherType.Slurm: + + .. highlight:: python + .. code-block:: python + + srun_settings = LaunchSettings(launcher="slurm") + # OR + srun_settings = LaunchSettings(launcher=LauncherType.Slurm) + + This will assign a SlurmLaunchArguments object to ``srun_settings.launch_args``. + Using the object, users may access the child class functions to set + batch configurations. For example: + + .. highlight:: python + .. code-block:: python + + srun_settings.launch_args.set_nodes(5) + srun_settings.launch_args.set_cpus_per_task(2) + + To set customized launch arguments, use the `set()`function provided by + the LaunchSettings child class. For example: + + .. highlight:: python + .. code-block:: python + + srun_settings.launch_args.set(key="nodes", value="6") + + If the key already exists in the existing launch arguments, the value will + be overwritten. + + :param launcher: The type of launcher to initialize (e.g., Dragon, Slurm, + PALS, ALPS, Local, Mpiexec, Mpirun, Orterun, LSF) + :param launch_args: A dictionary of arguments for the launcher, where the keys + are strings and the values can be either strings or None. This argument is optional + and defaults to None. + :param env_vars: Environment variables for the launch settings, where the keys + are strings and the values can be either strings or None. This argument is + also optional and defaults to None. + :raises ValueError: Raises if the launcher provided does not exist. + """ try: self._launcher = LauncherType(launcher) + """The launcher type""" except ValueError: raise ValueError(f"Invalid launcher type: {launcher}") self._arguments = self._get_arguments(launch_args) + """The LaunchSettings child class based on launcher type""" self.env_vars = env_vars or {} + """The environment configuration""" @property def launcher(self) -> str: @@ -89,7 +155,7 @@ def env_vars(self) -> t.Mapping[str, str | None]: :returns: An environment mapping """ - return copy.deepcopy(self._env_vars) + return self._env_vars @env_vars.setter def env_vars(self, value: dict[str, str | None]) -> None: @@ -108,6 +174,7 @@ def _get_arguments(self, launch_args: StringArgument | None) -> LaunchArguments: :param launch_args: A mapping of arguments names to values to be used to initialize the arguments :returns: The appropriate type for the settings instance. + :raises ValueError: An invalid launcher type was provided. """ if self._launcher == LauncherType.Slurm: return SlurmLaunchArguments(launch_args) diff --git a/smartsim/settings/sgeSettings.py b/smartsim/settings/sge_settings.py similarity index 100% rename from smartsim/settings/sgeSettings.py rename to smartsim/settings/sge_settings.py diff --git a/smartsim/wlm/pbs.py b/smartsim/wlm/pbs.py index 5b559c1e6..62f5a69a0 100644 --- a/smartsim/wlm/pbs.py +++ b/smartsim/wlm/pbs.py @@ -31,7 +31,7 @@ from smartsim.error.errors import LauncherError, SmartSimError -from .._core.launcher.pbs.pbsCommands import qstat +from .._core.launcher.pbs.pbs_commands import qstat def get_hosts() -> t.List[str]: diff --git a/smartsim/wlm/slurm.py b/smartsim/wlm/slurm.py index 58893d2ba..e1b24b906 100644 --- a/smartsim/wlm/slurm.py +++ b/smartsim/wlm/slurm.py @@ -29,9 +29,9 @@ import typing as t from shutil import which -from .._core.launcher.slurm.slurmCommands import salloc, scancel, scontrol, sinfo -from .._core.launcher.slurm.slurmParser import parse_salloc, parse_salloc_error -from .._core.launcher.util.launcherUtil import ComputeNode, Partition +from .._core.launcher.slurm.slurm_commands import salloc, scancel, scontrol, sinfo +from .._core.launcher.slurm.slurm_parser import parse_salloc, parse_salloc_error +from .._core.launcher.util.launcher_util import ComputeNode, Partition from ..error import ( AllocationError, LauncherError, diff --git a/tests/_legacy/on_wlm/test_dragon.py b/tests/_legacy/on_wlm/test_dragon.py index 56264c1e1..b685b6502 100644 --- a/tests/_legacy/on_wlm/test_dragon.py +++ b/tests/_legacy/on_wlm/test_dragon.py @@ -26,7 +26,7 @@ import pytest from smartsim import Experiment -from smartsim._core.launcher.dragon.dragonLauncher import DragonLauncher +from smartsim._core.launcher.dragon.dragon_launcher import DragonLauncher from smartsim.status import JobStatus # retrieved from pytest fixtures diff --git a/tests/_legacy/on_wlm/test_preview_wlm.py b/tests/_legacy/on_wlm/test_preview_wlm.py index bea865359..66705669e 100644 --- a/tests/_legacy/on_wlm/test_preview_wlm.py +++ b/tests/_legacy/on_wlm/test_preview_wlm.py @@ -31,7 +31,7 @@ from jinja2.filters import FILTERS from smartsim import Experiment -from smartsim._core import Manifest, previewrenderer +from smartsim._core import Manifest, preview_renderer from smartsim._core.config import CONFIG from smartsim.database import FeatureStore from smartsim.settings import QsubBatchSettings, RunSettings @@ -102,7 +102,7 @@ def test_preview_wlm_run_commands_cluster_feature_store_model( preview_manifest = Manifest(feature_store, smartsim_model) # Execute preview method - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output if pytest.test_launcher != "dragon": @@ -132,7 +132,7 @@ def test_preview_model_on_wlm(fileutils, test_dir, wlmutils): preview_manifest = Manifest(M1, M2) # Execute preview method - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") if pytest.test_launcher != "dragon": assert "Run Command" in output @@ -166,7 +166,7 @@ def test_preview_batch_model(fileutils, test_dir, wlmutils): preview_manifest = Manifest(model) # Execute preview method - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") assert "Batch Launch: True" in output assert "Batch Command" in output @@ -202,7 +202,7 @@ def test_preview_batch_ensemble(fileutils, test_dir, wlmutils): preview_manifest = Manifest(ensemble) # Execute preview method - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") assert "Batch Launch: True" in output assert "Batch Command" in output @@ -261,7 +261,7 @@ def test_preview_launch_command(test_dir, wlmutils, choose_host): ) # Execute preview method - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") assert "feature store" in output assert "echo-spam" in output @@ -307,7 +307,7 @@ def test_preview_batch_launch_command(fileutils, test_dir, wlmutils): preview_manifest = Manifest(feature_store, model) # Execute preview method - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Batch Launch: True" in output @@ -354,7 +354,7 @@ def test_ensemble_batch(test_dir, wlmutils): preview_manifest = Manifest(fs, ml_model, ensemble) # Call preview renderer for testing output - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Client Configuration" in output @@ -405,7 +405,7 @@ def test_preview_ensemble_fs_script(wlmutils, test_dir): preview_manifest = Manifest(ensemble, feature_store, feature_store_2) # Call preview renderer for testing output - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Torch Script" in output diff --git a/tests/_legacy/on_wlm/test_slurm_commands.py b/tests/_legacy/on_wlm/test_slurm_commands.py index 8411be6e0..b44d30965 100644 --- a/tests/_legacy/on_wlm/test_slurm_commands.py +++ b/tests/_legacy/on_wlm/test_slurm_commands.py @@ -25,7 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import pytest -from smartsim._core.launcher.slurm.slurmCommands import * +from smartsim._core.launcher.slurm.slurm_commands import * from smartsim.error.errors import LauncherError # retrieved from pytest fixtures diff --git a/tests/_legacy/test_collectors.py b/tests/_legacy/test_collectors.py index fdc8f6780..a474632c2 100644 --- a/tests/_legacy/test_collectors.py +++ b/tests/_legacy/test_collectors.py @@ -29,7 +29,7 @@ import pytest -import smartsim._core.entrypoints.telemetrymonitor +import smartsim._core.entrypoints.telemetry_monitor import smartsim._core.utils.telemetry.collector from conftest import MockCollectorEntityFunc, MockSink from smartsim._core.utils.telemetry.collector import ( diff --git a/tests/_legacy/test_controller.py b/tests/_legacy/test_controller.py index 19325c933..ad0c98fe8 100644 --- a/tests/_legacy/test_controller.py +++ b/tests/_legacy/test_controller.py @@ -30,8 +30,8 @@ from smartsim._core.control.controller import Controller from smartsim._core.launcher.step import Step +from smartsim.builders.ensemble import Ensemble from smartsim.database.orchestrator import FeatureStore -from smartsim.entity.ensemble import Ensemble from smartsim.settings.slurmSettings import SbatchSettings, SrunSettings controller = Controller() diff --git a/tests/_legacy/test_controller_errors.py b/tests/_legacy/test_controller_errors.py index 60b757f0b..5ae05d70a 100644 --- a/tests/_legacy/test_controller_errors.py +++ b/tests/_legacy/test_controller_errors.py @@ -29,10 +29,10 @@ from smartsim._core.control import Controller, Manifest from smartsim._core.launcher.step import Step -from smartsim._core.launcher.step.dragonStep import DragonStep +from smartsim._core.launcher.step.dragon_step import DragonStep +from smartsim.builders.ensemble import Ensemble from smartsim.database import FeatureStore from smartsim.entity import Application -from smartsim.entity.ensemble import Ensemble from smartsim.error import SmartSimError, SSUnsupportedError from smartsim.error.errors import SSUnsupportedError from smartsim.settings import RunSettings, SrunSettings diff --git a/tests/_legacy/test_dragon_client.py b/tests/_legacy/test_dragon_client.py index 80257b610..054f6f0d1 100644 --- a/tests/_legacy/test_dragon_client.py +++ b/tests/_legacy/test_dragon_client.py @@ -30,7 +30,7 @@ import pytest -from smartsim._core.launcher.step.dragonStep import DragonBatchStep, DragonStep +from smartsim._core.launcher.step.dragon_step import DragonBatchStep, DragonStep from smartsim.settings import DragonRunSettings from smartsim.settings.slurmSettings import SbatchSettings @@ -39,8 +39,8 @@ import smartsim._core.entrypoints.dragon_client as dragon_client -from smartsim._core.schemas.dragonRequests import * -from smartsim._core.schemas.dragonResponses import * +from smartsim._core.schemas.dragon_requests import * +from smartsim._core.schemas.dragon_responses import * @pytest.fixture diff --git a/tests/_legacy/test_dragon_launcher.py b/tests/_legacy/test_dragon_launcher.py index 4fe8bf71b..77f094b7d 100644 --- a/tests/_legacy/test_dragon_launcher.py +++ b/tests/_legacy/test_dragon_launcher.py @@ -39,17 +39,17 @@ import smartsim._core.config from smartsim._core._cli.scripts.dragon_install import create_dotenv from smartsim._core.config.config import get_config -from smartsim._core.launcher.dragon.dragonLauncher import ( +from smartsim._core.launcher.dragon.dragon_launcher import ( DragonConnector, DragonLauncher, ) -from smartsim._core.launcher.dragon.dragonSockets import ( +from smartsim._core.launcher.dragon.dragon_sockets import ( get_authenticator, get_secure_socket, ) -from smartsim._core.launcher.step.dragonStep import DragonBatchStep, DragonStep -from smartsim._core.schemas.dragonRequests import DragonBootstrapRequest -from smartsim._core.schemas.dragonResponses import ( +from smartsim._core.launcher.step.dragon_step import DragonBatchStep, DragonStep +from smartsim._core.schemas.dragon_requests import DragonBootstrapRequest +from smartsim._core.schemas.dragon_responses import ( DragonHandshakeResponse, DragonRunResponse, ) diff --git a/tests/_legacy/test_dragon_run_policy.py b/tests/_legacy/test_dragon_run_policy.py index 1d8d069fa..5da84bf30 100644 --- a/tests/_legacy/test_dragon_run_policy.py +++ b/tests/_legacy/test_dragon_run_policy.py @@ -28,7 +28,7 @@ import pytest -from smartsim._core.launcher.step.dragonStep import DragonBatchStep, DragonStep +from smartsim._core.launcher.step.dragon_step import DragonBatchStep, DragonStep from smartsim.settings.dragonRunSettings import DragonRunSettings from smartsim.settings.slurmSettings import SbatchSettings @@ -36,7 +36,7 @@ from dragon.infrastructure.policy import Policy import smartsim._core.entrypoints.dragon as drg - from smartsim._core.launcher.dragon.dragonBackend import DragonBackend + from smartsim._core.launcher.dragon.dragon_backend import DragonBackend dragon_loaded = True except: @@ -45,8 +45,8 @@ # The tests in this file belong to the group_b group pytestmark = pytest.mark.group_b -from smartsim._core.schemas.dragonRequests import * -from smartsim._core.schemas.dragonResponses import * +from smartsim._core.schemas.dragon_requests import * +from smartsim._core.schemas.dragon_responses import * @pytest.fixture diff --git a/tests/_legacy/test_dragon_run_request.py b/tests/_legacy/test_dragon_run_request.py index 3228bdee7..f5fdc73a0 100644 --- a/tests/_legacy/test_dragon_run_request.py +++ b/tests/_legacy/test_dragon_run_request.py @@ -44,13 +44,13 @@ dragon_loaded = False from smartsim._core.config import CONFIG -from smartsim._core.schemas.dragonRequests import * -from smartsim._core.schemas.dragonResponses import * +from smartsim._core.schemas.dragon_requests import * +from smartsim._core.schemas.dragon_responses import * from smartsim._core.utils.helpers import create_short_id_str from smartsim.status import TERMINAL_STATUSES, InvalidJobStatus, JobStatus if t.TYPE_CHECKING: - from smartsim._core.launcher.dragon.dragonBackend import ( + from smartsim._core.launcher.dragon.dragon_backend import ( DragonBackend, ProcessGroupInfo, ) @@ -149,7 +149,7 @@ def get_mock_backend( **{"System.return_value": system_mock, "Node.return_value": node_mock} ), ) - from smartsim._core.launcher.dragon.dragonBackend import DragonBackend + from smartsim._core.launcher.dragon.dragon_backend import DragonBackend dragon_backend = DragonBackend(pid=99999) monkeypatch.setattr( @@ -167,7 +167,7 @@ def set_mock_group_infos( process_mock.configure_mock(**{"returncode": 0}) dragon_mock.configure_mock(**{"native.process.Process.return_value": process_mock}) monkeypatch.setitem(sys.modules, "dragon", dragon_mock) - from smartsim._core.launcher.dragon.dragonBackend import ProcessGroupInfo + from smartsim._core.launcher.dragon.dragon_backend import ProcessGroupInfo running_group = MagicMock(status="Running") error_group = MagicMock(status="Error") diff --git a/tests/_legacy/test_dragon_run_request_nowlm.py b/tests/_legacy/test_dragon_run_request_nowlm.py index afd25aa9d..2b5526c69 100644 --- a/tests/_legacy/test_dragon_run_request_nowlm.py +++ b/tests/_legacy/test_dragon_run_request_nowlm.py @@ -30,8 +30,8 @@ # The tests in this file belong to the group_a group pytestmark = pytest.mark.group_a -from smartsim._core.schemas.dragonRequests import * -from smartsim._core.schemas.dragonResponses import * +from smartsim._core.schemas.dragon_requests import * +from smartsim._core.schemas.dragon_responses import * def test_run_request_with_null_policy(monkeypatch: pytest.MonkeyPatch) -> None: diff --git a/tests/_legacy/test_dragon_step.py b/tests/_legacy/test_dragon_step.py index 19f408e0b..17279a33c 100644 --- a/tests/_legacy/test_dragon_step.py +++ b/tests/_legacy/test_dragon_step.py @@ -32,7 +32,7 @@ import pytest -from smartsim._core.launcher.step.dragonStep import DragonBatchStep, DragonStep +from smartsim._core.launcher.step.dragon_step import DragonBatchStep, DragonStep from smartsim.settings import DragonRunSettings from smartsim.settings.pbsSettings import QsubBatchSettings from smartsim.settings.slurmSettings import SbatchSettings @@ -41,8 +41,8 @@ pytestmark = pytest.mark.group_a -from smartsim._core.schemas.dragonRequests import * -from smartsim._core.schemas.dragonResponses import * +from smartsim._core.schemas.dragon_requests import * +from smartsim._core.schemas.dragon_responses import * @pytest.fixture diff --git a/tests/_legacy/test_ensemble.py b/tests/_legacy/test_ensemble.py index 86146c8e4..62c7d8d4f 100644 --- a/tests/_legacy/test_ensemble.py +++ b/tests/_legacy/test_ensemble.py @@ -30,7 +30,8 @@ import pytest from smartsim import Experiment -from smartsim.entity import Application, Ensemble +from smartsim.builders import Ensemble +from smartsim.entity import Application from smartsim.error import EntityExistsError, SSUnsupportedError, UserStrategyError from smartsim.settings import RunSettings diff --git a/tests/_legacy/test_lsf_parser.py b/tests/_legacy/test_lsf_parser.py index abd27eb5a..0234ee4e9 100644 --- a/tests/_legacy/test_lsf_parser.py +++ b/tests/_legacy/test_lsf_parser.py @@ -26,7 +26,7 @@ import pytest -from smartsim._core.launcher.lsf import lsfParser +from smartsim._core.launcher.lsf import lsf_parser # The tests in this file belong to the group_b group pytestmark = pytest.mark.group_b @@ -37,7 +37,7 @@ def test_parse_bsub(): output = "Job <12345> is submitted to queue ." - step_id = lsfParser.parse_bsub(output) + step_id = lsf_parser.parse_bsub(output) assert step_id == "12345" @@ -57,15 +57,15 @@ def test_parse_bsub_error(): "Not a member of the specified project: . You are currently a member of the following projects:\n" "ABC123" ) - parsed_error = lsfParser.parse_bsub_error(output) + parsed_error = lsf_parser.parse_bsub_error(output) assert error == parsed_error output = "NOT A PARSABLE ERROR\nBUT STILL AN ERROR MESSAGE" - parsed_error = lsfParser.parse_bsub_error(output) + parsed_error = lsf_parser.parse_bsub_error(output) assert output == parsed_error output = " \n" - parsed_error = lsfParser.parse_bsub_error(output) + parsed_error = lsf_parser.parse_bsub_error(output) assert parsed_error == "LSF run error" @@ -79,7 +79,7 @@ def test_parse_bsub_nodes(fileutils): "1234567 smartsim RUN batch login1 batch3:a01n02:a01n02:a01n02:a01n02:a01n02:a01n06:a01n06:a01n06:a01n06:a01n06 SmartSim Jul 24 12:53\n" ) nodes = ["batch3", "a01n02", "a01n06"] - parsed_nodes = lsfParser.parse_bjobs_nodes(output) + parsed_nodes = lsf_parser.parse_bjobs_nodes(output) assert nodes == parsed_nodes @@ -98,7 +98,7 @@ def test_parse_max_step_id(): " 4 0 1 various various 137 Killed\n" " 5 0 3 various various 137 Killed\n" ) - parsed_id = lsfParser.parse_max_step_id_from_jslist(output) + parsed_id = lsf_parser.parse_max_step_id_from_jslist(output) assert parsed_id == "9" @@ -121,6 +121,6 @@ def test_parse_jslist(): " 1 1 4 various various 0 Running\n" " 11 1 1 1 1 1 Running\n" ) - parsed_result = lsfParser.parse_jslist_stepid(output, "1") + parsed_result = lsf_parser.parse_jslist_stepid(output, "1") result = ("Running", "0") assert parsed_result == result diff --git a/tests/_legacy/test_model.py b/tests/_legacy/test_model.py index f32a27a07..5adf8070f 100644 --- a/tests/_legacy/test_model.py +++ b/tests/_legacy/test_model.py @@ -31,7 +31,8 @@ from smartsim import Experiment from smartsim._core.control.manifest import LaunchedManifestBuilder from smartsim._core.launcher.step import SbatchStep, SrunStep -from smartsim.entity import Application, Ensemble +from smartsim.builders import Ensemble +from smartsim.entity import Application from smartsim.error import EntityExistsError, SSUnsupportedError from smartsim.settings import RunSettings, SbatchSettings, SrunSettings from smartsim.settings.mpiSettings import _BaseMPISettings diff --git a/tests/_legacy/test_output_files.py b/tests/_legacy/test_output_files.py index 713001feb..55ecfd90a 100644 --- a/tests/_legacy/test_output_files.py +++ b/tests/_legacy/test_output_files.py @@ -33,9 +33,9 @@ from smartsim._core.config import CONFIG from smartsim._core.control.controller import Controller, _AnonymousBatchJob from smartsim._core.launcher.step import Step +from smartsim.builders.ensemble import Ensemble from smartsim.database.orchestrator import FeatureStore from smartsim.entity.application import Application -from smartsim.entity.ensemble import Ensemble from smartsim.settings.base import RunSettings from smartsim.settings.slurmSettings import SbatchSettings, SrunSettings diff --git a/tests/_legacy/test_pals_settings.py b/tests/_legacy/test_pals_settings.py index 8bc23d14d..4fcf7cae3 100644 --- a/tests/_legacy/test_pals_settings.py +++ b/tests/_legacy/test_pals_settings.py @@ -33,7 +33,7 @@ import smartsim._core.config.config from smartsim._core.launcher import PBSLauncher -from smartsim._core.launcher.step.mpiStep import MpiexecStep +from smartsim._core.launcher.step.mpi_step import MpiexecStep from smartsim.error import SSUnsupportedError from smartsim.settings import PalsMpiexecSettings diff --git a/tests/_legacy/test_pbs_parser.py b/tests/_legacy/test_pbs_parser.py index ae01ffb19..b5b708175 100644 --- a/tests/_legacy/test_pbs_parser.py +++ b/tests/_legacy/test_pbs_parser.py @@ -28,7 +28,7 @@ import pytest -from smartsim._core.launcher.pbs import pbsParser +from smartsim._core.launcher.pbs import pbs_parser # The tests in this file belong to the group_b group pytestmark = pytest.mark.group_b @@ -39,14 +39,14 @@ def test_parse_qsub(): output = "12345.sdb" - step_id = pbsParser.parse_qsub(output) + step_id = pbs_parser.parse_qsub(output) assert step_id == "12345.sdb" def test_parse_qsub_error(): output = "qsub: Unknown queue" error = "Unknown queue" - parsed_error = pbsParser.parse_qsub_error(output) + parsed_error = pbs_parser.parse_qsub_error(output) assert error == parsed_error @@ -58,7 +58,7 @@ def test_parse_qstat_nodes(fileutils): file_path = fileutils.get_test_conf_path("qstat.json") output = Path(file_path).read_text() nodes = ["server_1", "server_2"] - parsed_nodes = pbsParser.parse_qstat_nodes(output) + parsed_nodes = pbs_parser.parse_qstat_nodes(output) assert nodes == parsed_nodes @@ -70,7 +70,7 @@ def test_parse_qstat_status(): "1289903.sdb jobname username 00:00:00 R queue\n" ) status = "R" - parsed_status = pbsParser.parse_qstat_jobid(output, "1289903.sdb") + parsed_status = pbs_parser.parse_qstat_jobid(output, "1289903.sdb") assert status == parsed_status @@ -80,7 +80,7 @@ def test_parse_qstat_status_not_found(): "---------------- ---------------- ---------------- -------- - -----\n" "1289903.sdb jobname username 00:00:00 R queue\n" ) - parsed_status = pbsParser.parse_qstat_jobid(output, "9999999.sdb") + parsed_status = pbs_parser.parse_qstat_jobid(output, "9999999.sdb") assert parsed_status is None @@ -90,5 +90,5 @@ def test_parse_qstat_status_json(fileutils): file_path = fileutils.get_test_conf_path("qstat.json") output = Path(file_path).read_text() status = "R" - parsed_status = pbsParser.parse_qstat_jobid_json(output, "16705.sdb") + parsed_status = pbs_parser.parse_qstat_jobid_json(output, "16705.sdb") assert status == parsed_status diff --git a/tests/_legacy/test_preview.py b/tests/_legacy/test_preview.py index 9ce46c315..82d443fb3 100644 --- a/tests/_legacy/test_preview.py +++ b/tests/_legacy/test_preview.py @@ -36,7 +36,7 @@ import smartsim import smartsim._core._cli.utils as _utils from smartsim import Experiment -from smartsim._core import Manifest, previewrenderer +from smartsim._core import Manifest, preview_renderer from smartsim._core.config import CONFIG from smartsim._core.control.controller import Controller from smartsim._core.control.job import Job @@ -130,7 +130,7 @@ def test_get_ifname_filter(): loader = jinja2.DictLoader(template_dict) env = jinja2.Environment(loader=loader, autoescape=True) - env.filters["get_ifname"] = previewrenderer.get_ifname + env.filters["get_ifname"] = preview_renderer.get_ifname t = env.get_template("ts") @@ -147,7 +147,7 @@ def test_get_fstype_filter(): template_dict = {"ts": template_str} loader = jinja2.DictLoader(template_dict) env = jinja2.Environment(loader=loader, autoescape=True) - env.filters["get_fstype"] = previewrenderer.get_fstype + env.filters["get_fstype"] = preview_renderer.get_fstype t = env.get_template("ts") output = t.render(config=CONFIG.database_cli) @@ -183,7 +183,7 @@ def test_experiment_preview(test_dir, wlmutils): exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher) # Execute method for template rendering - output = previewrenderer.render(exp, verbosity_level="debug") + output = preview_renderer.render(exp, verbosity_level="debug") # Evaluate output summary_lines = output.split("\n") @@ -203,7 +203,7 @@ def test_experiment_preview_properties(test_dir, wlmutils): exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher) # Execute method for template rendering - output = previewrenderer.render(exp, verbosity_level="debug") + output = preview_renderer.render(exp, verbosity_level="debug") # Evaluate output summary_lines = output.split("\n") @@ -232,7 +232,7 @@ def test_feature_store_preview_render(test_dir, wlmutils, choose_host): preview_manifest = Manifest(feature_store) # Execute method for template rendering - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Feature Store Identifier" in output @@ -268,7 +268,7 @@ def test_preview_to_file(test_dir, wlmutils): path = pathlib.Path(test_dir) / filename # Execute preview method exp.preview( - output_format=previewrenderer.Format.PLAINTEXT, + output_format=preview_renderer.Format.PLAINTEXT, output_filename=str(path), verbosity_level="debug", ) @@ -299,7 +299,7 @@ def test_model_preview(test_dir, wlmutils): preview_manifest = Manifest(hello_world_model, spam_eggs_model) # Execute preview method - rendered_preview = previewrenderer.render( + rendered_preview = preview_renderer.render( exp, preview_manifest, verbosity_level="debug" ) @@ -341,7 +341,7 @@ def test_model_preview_properties(test_dir, wlmutils): preview_manifest = Manifest(hello_world_model, spam_eggs_model) # Execute preview method - rendered_preview = previewrenderer.render( + rendered_preview = preview_renderer.render( exp, preview_manifest, verbosity_level="debug" ) @@ -400,7 +400,7 @@ def test_preview_model_tagged_files(fileutils, test_dir, wlmutils): preview_manifest = Manifest(hello_world_model) # Execute preview method - rendered_preview = previewrenderer.render( + rendered_preview = preview_renderer.render( exp, preview_manifest, verbosity_level="debug" ) @@ -431,7 +431,7 @@ def test_model_key_prefixing(test_dir, wlmutils): preview_manifest = Manifest(fs, model) # Execute preview method - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Key Prefix" in output @@ -469,7 +469,7 @@ def test_ensembles_preview(test_dir, wlmutils): ) preview_manifest = Manifest(ensemble) - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Ensemble Name" in output @@ -500,7 +500,7 @@ def test_preview_models_and_ensembles(test_dir, wlmutils): exp.generate(hello_world_model, spam_eggs_model, hello_ensemble) preview_manifest = Manifest(hello_world_model, spam_eggs_model, hello_ensemble) - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Models" in output @@ -541,7 +541,7 @@ def test_ensemble_preview_client_configuration(test_dir, wlmutils): preview_manifest = Manifest(fs, ml_model, ensemble) # Call preview renderer for testing output - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Client Configuration" in output @@ -584,7 +584,7 @@ def test_ensemble_preview_client_configuration_multifs(test_dir, wlmutils): preview_manifest = Manifest(fs1, fs2, ml_model, ensemble) # Call preview renderer for testing output - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Client Configuration" in output @@ -630,7 +630,7 @@ def test_ensemble_preview_attached_files(fileutils, test_dir, wlmutils): preview_manifest = Manifest(ensemble) # Call preview renderer for testing output - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Tagged Files for Model Configuration" in output @@ -745,7 +745,7 @@ def test_preview_colocated_fs_model_ensemble(fileutils, test_dir, wlmutils, mlut preview_manifest = Manifest(colo_ensemble) # Execute preview method - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Models" in output @@ -859,7 +859,7 @@ def test_preview_colocated_fs_script_ensemble(fileutils, test_dir, wlmutils, mlu preview_manifest = Manifest(colo_ensemble) # Execute preview method - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Torch Scripts" in output @@ -882,7 +882,7 @@ def test_preview_active_infrastructure(wlmutils, test_dir, preview_object): exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher) # Execute method for template rendering - output = previewrenderer.render( + output = preview_renderer.render( exp, active_fsjobs=preview_object, verbosity_level="debug" ) @@ -925,7 +925,7 @@ def test_preview_orch_active_infrastructure( preview_manifest = Manifest(feature_store2, feature_store3) # Execute method for template rendering - output = previewrenderer.render( + output = preview_renderer.render( exp, preview_manifest, active_fsjobs=preview_object, verbosity_level="debug" ) @@ -955,7 +955,7 @@ def test_preview_multifs_active_infrastructure( ) # Execute method for template rendering - output = previewrenderer.render( + output = preview_renderer.render( exp, active_fsjobs=preview_object_multifs, verbosity_level="debug" ) @@ -999,7 +999,7 @@ def test_preview_active_infrastructure_feature_store_error( preview_manifest = Manifest(orc) # Execute method for template rendering - output = previewrenderer.render( + output = preview_renderer.render( exp, preview_manifest, active_fsjobs=active_fsjobs, verbosity_level="debug" ) @@ -1059,7 +1059,7 @@ def test_verbosity_info_ensemble(test_dir, wlmutils): exp.generate(hello_world_model, spam_eggs_model, hello_ensemble) preview_manifest = Manifest(hello_world_model, spam_eggs_model, hello_ensemble) - output = previewrenderer.render(exp, preview_manifest, verbosity_level="info") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="info") assert "Executable" not in output assert "Executable Arguments" not in output @@ -1163,7 +1163,7 @@ def test_verbosity_info_colocated_fs_model_ensemble( preview_manifest = Manifest(colo_ensemble) # Execute preview method - output = previewrenderer.render(exp, preview_manifest, verbosity_level="info") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="info") assert "Outgoing Key Collision Prevention (Key Prefixing)" not in output assert "Devices Per Node" not in output @@ -1186,7 +1186,7 @@ def test_verbosity_info_feature_store(test_dir, wlmutils, choose_host): preview_manifest = Manifest(feature_store) # Execute method for template rendering - output = previewrenderer.render(exp, preview_manifest, verbosity_level="info") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="info") # Evaluate output assert "Executable" not in output @@ -1220,7 +1220,7 @@ def test_verbosity_info_ensemble(test_dir, wlmutils): preview_manifest = Manifest(fs, ml_model, ensemble) # Call preview renderer for testing output - output = previewrenderer.render(exp, preview_manifest, verbosity_level="info") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="info") # Evaluate output assert "Outgoing Key Collision Prevention (Key Prefixing)" in output @@ -1289,7 +1289,7 @@ def test_preview_colocated_fs_singular_model(wlmutils, test_dir): preview_manifest = Manifest(model_1, model_2) # Call preview renderer for testing output - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") assert "model_1" in output assert "model_2" in output @@ -1326,7 +1326,7 @@ def test_preview_fs_script(wlmutils, test_dir): preview_manifest = Manifest(model_instance) # Call preview renderer for testing output - output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug") + output = preview_renderer.render(exp, preview_manifest, verbosity_level="debug") # Evaluate output assert "Torch Script" in output diff --git a/tests/_legacy/test_sge_batch_settings.py b/tests/_legacy/test_sge_batch_settings.py index fa40b4b00..f81bee1ea 100644 --- a/tests/_legacy/test_sge_batch_settings.py +++ b/tests/_legacy/test_sge_batch_settings.py @@ -29,7 +29,7 @@ import pytest from smartsim import Experiment -from smartsim._core.launcher.sge.sgeParser import parse_qstat_jobid_xml +from smartsim._core.launcher.sge.sge_parser import parse_qstat_jobid_xml from smartsim.error import SSConfigError from smartsim.settings import SgeQsubBatchSettings from smartsim.settings.mpiSettings import _BaseMPISettings diff --git a/tests/_legacy/test_slurm_parser.py b/tests/_legacy/test_slurm_parser.py index a49d9b198..e73ec7ed7 100644 --- a/tests/_legacy/test_slurm_parser.py +++ b/tests/_legacy/test_slurm_parser.py @@ -26,7 +26,7 @@ import pytest -from smartsim._core.launcher.slurm import slurmParser +from smartsim._core.launcher.slurm import slurm_parser # The tests in this file belong to the group_b group pytestmark = pytest.mark.group_b @@ -41,7 +41,7 @@ def test_parse_salloc(): "salloc: Waiting for resource configuration\n" "salloc: Nodes nid00116 are ready for job" ) - alloc_id = slurmParser.parse_salloc(output) + alloc_id = slurm_parser.parse_salloc(output) assert alloc_id == "118568" @@ -54,7 +54,7 @@ def test_parse_salloc_extra(): "salloc: Waiting for resource configuration\n" "salloc: Nodes prod76-0006 are ready for job\n" ) - alloc_id = slurmParser.parse_salloc(output) + alloc_id = slurm_parser.parse_salloc(output) assert alloc_id == "22942" @@ -64,14 +64,14 @@ def test_parse_salloc_high(): "salloc: Waiting for resource configuration\n" "salloc: Nodes nid00034 are ready for job\n" ) - alloc_id = slurmParser.parse_salloc(output) + alloc_id = slurm_parser.parse_salloc(output) assert alloc_id == "29917893" def test_parse_salloc_error(): output = "salloc: error: Job submit/allocate failed: Job dependency problem" error = "Job submit/allocate failed: Job dependency problem" - parsed_error = slurmParser.parse_salloc_error(output) + parsed_error = slurm_parser.parse_salloc_error(output) assert error == parsed_error @@ -81,7 +81,7 @@ def test_parse_salloc_error_2(): "Try 'salloc --help' for more information\n" ) error = "unrecognized option '--no-a-option'" - parsed_error = slurmParser.parse_salloc_error(output) + parsed_error = slurm_parser.parse_salloc_error(output) assert error == parsed_error @@ -93,7 +93,7 @@ def test_parse_salloc_error_3(): "\nsalloc: error: Job submit/allocate failed: Invalid node name specified\n" ) error = "Job submit/allocate failed: Invalid node name specified" - parsed_error = slurmParser.parse_salloc_error(output) + parsed_error = slurm_parser.parse_salloc_error(output) assert error == parsed_error @@ -103,7 +103,7 @@ def test_parse_salloc_error_4(): "salloc: error: Job submit/allocate failed: Unspecified error\n" ) error = "No hardware architecture specified (-C)!" - parsed_error = slurmParser.parse_salloc_error(output) + parsed_error = slurm_parser.parse_salloc_error(output) assert error == parsed_error @@ -116,7 +116,7 @@ def test_parse_sstat_nodes(): """ output = "118594.extern|nid00028|38671|\n" "118594.0|nid00028|38703|" nodes = ["nid00028"] - parsed_nodes = slurmParser.parse_sstat_nodes(output, "118594") + parsed_nodes = slurm_parser.parse_sstat_nodes(output, "118594") assert nodes == parsed_nodes @@ -126,7 +126,7 @@ def test_parse_sstat_nodes_1(): """ output = "22942.0|prod76-0006|354345|" nodes = ["prod76-0006"] - parsed_nodes = slurmParser.parse_sstat_nodes(output, "22942.0") + parsed_nodes = slurm_parser.parse_sstat_nodes(output, "22942.0") assert nodes == parsed_nodes @@ -136,7 +136,7 @@ def test_parse_sstat_nodes_2(): """ output = "29917893.extern|nid00034|44860|\n" "29917893.0|nid00034|44887|\n" nodes = ["nid00034"] - parsed_nodes = slurmParser.parse_sstat_nodes(output, "29917893.0") + parsed_nodes = slurm_parser.parse_sstat_nodes(output, "29917893.0") assert nodes == parsed_nodes @@ -152,7 +152,7 @@ def test_parse_sstat_nodes_3(): "29917893.2|nid00034|45174|\n" ) nodes = ["nid00034"] - parsed_nodes = slurmParser.parse_sstat_nodes(output, "29917893.2") + parsed_nodes = slurm_parser.parse_sstat_nodes(output, "29917893.2") assert nodes == parsed_nodes @@ -171,7 +171,7 @@ def test_parse_sstat_nodes_4(): "30000.2|nid00036|45174,32435|\n" ) nodes = set(["nid00034", "nid00035", "nid00036"]) - parsed_nodes = set(slurmParser.parse_sstat_nodes(output, "30000")) + parsed_nodes = set(slurm_parser.parse_sstat_nodes(output, "30000")) assert nodes == parsed_nodes @@ -190,7 +190,7 @@ def test_parse_sstat_nodes_4(): "30000.2|nid00036|45174,32435|\n" ) nodes = set(["nid00034", "nid00035", "nid00036"]) - parsed_nodes = set(slurmParser.parse_sstat_nodes(output, "30000")) + parsed_nodes = set(slurm_parser.parse_sstat_nodes(output, "30000")) assert nodes == parsed_nodes @@ -206,7 +206,7 @@ def test_parse_sstat_nodes_5(): "29917893.2|nid00034|45174|\n" ) nodes = ["nid00034"] - parsed_nodes = slurmParser.parse_sstat_nodes(output, "29917893.2") + parsed_nodes = slurm_parser.parse_sstat_nodes(output, "29917893.2") assert nodes == parsed_nodes @@ -221,7 +221,7 @@ def test_parse_sacct_step_id(): "m2-119225.1|119225.1|" ) step_id = "119225.0" - parsed_step_id = slurmParser.parse_step_id_from_sacct(output, "m1-119225.0") + parsed_step_id = slurm_parser.parse_step_id_from_sacct(output, "m1-119225.0") assert step_id == parsed_step_id @@ -235,7 +235,7 @@ def test_parse_sacct_step_id_2(): "n1-119225.3|119225.3|" ) step_id = "119225.2" - parsed_step_id = slurmParser.parse_step_id_from_sacct( + parsed_step_id = slurm_parser.parse_step_id_from_sacct( output, "featurestore_0-119225.2" ) assert step_id == parsed_step_id @@ -251,7 +251,7 @@ def test_parse_sacct_step_id_2(): "cti_dlaunch1.0|962333.3|" ) step_id = "962333.1" - parsed_step_id = slurmParser.parse_step_id_from_sacct(output, "python-962333.1") + parsed_step_id = slurm_parser.parse_step_id_from_sacct(output, "python-962333.1") assert step_id == parsed_step_id @@ -261,7 +261,7 @@ def test_parse_sacct_status(): """ output = "29917893.2|COMPLETED|0:0|\n" status = ("COMPLETED", "0") - parsed_status = slurmParser.parse_sacct(output, "29917893.2") + parsed_status = slurm_parser.parse_sacct(output, "29917893.2") assert status == parsed_status @@ -271,7 +271,7 @@ def test_parse_sacct_status_1(): """ output = "22999.0|FAILED|1:0|\n" status = ("FAILED", "1") - parsed_status = slurmParser.parse_sacct(output, "22999.0") + parsed_status = slurm_parser.parse_sacct(output, "22999.0") assert status == parsed_status @@ -281,5 +281,5 @@ def test_parse_sacct_status_2(): """ output = "22999.10|COMPLETED|0:0|\n22999.1|FAILED|1:0|\n" status = ("FAILED", "1") - parsed_status = slurmParser.parse_sacct(output, "22999.1") + parsed_status = slurm_parser.parse_sacct(output, "22999.1") assert status == parsed_status diff --git a/tests/_legacy/test_slurm_settings.py b/tests/_legacy/test_slurm_settings.py index aa915cded..9fd0f5e82 100644 --- a/tests/_legacy/test_slurm_settings.py +++ b/tests/_legacy/test_slurm_settings.py @@ -100,7 +100,7 @@ def test_mpmd_compound_env_exports(): srun_2.env_vars = {"cmp2": "222,333", "norm2": "pqr"} srun.make_mpmd(srun_2) - from smartsim._core.launcher.step.slurmStep import SbatchStep, SrunStep + from smartsim._core.launcher.step.slurm_step import SbatchStep, SrunStep from smartsim.settings.slurmSettings import SbatchSettings step = SrunStep("teststep", "./", srun) @@ -160,7 +160,7 @@ def test_mpmd_non_compound_env_exports(): srun_2.env_vars = {"cmp2": "222", "norm2": "pqr"} srun.make_mpmd(srun_2) - from smartsim._core.launcher.step.slurmStep import SbatchStep, SrunStep + from smartsim._core.launcher.step.slurm_step import SbatchStep, SrunStep from smartsim.settings.slurmSettings import SbatchSettings step = SrunStep("teststep", "./", srun) @@ -220,7 +220,7 @@ def test_mpmd_non_compound_no_exports(): srun_2.env_vars = {} srun.make_mpmd(srun_2) - from smartsim._core.launcher.step.slurmStep import SbatchStep, SrunStep + from smartsim._core.launcher.step.slurm_step import SbatchStep, SrunStep from smartsim.settings.slurmSettings import SbatchSettings step = SrunStep("teststep", "./", srun) diff --git a/tests/_legacy/test_smartredis.py b/tests/_legacy/test_smartredis.py index ca8d1e0fa..f09cc8ca8 100644 --- a/tests/_legacy/test_smartredis.py +++ b/tests/_legacy/test_smartredis.py @@ -29,8 +29,9 @@ from smartsim import Experiment from smartsim._core.utils import installed_redisai_backends +from smartsim.builders import Ensemble from smartsim.database import FeatureStore -from smartsim.entity import Application, Ensemble +from smartsim.entity import Application from smartsim.status import JobStatus # The tests in this file belong to the group_b group diff --git a/tests/_legacy/test_step_info.py b/tests/_legacy/test_step_info.py index b0d736016..06e914b0a 100644 --- a/tests/_legacy/test_step_info.py +++ b/tests/_legacy/test_step_info.py @@ -26,7 +26,7 @@ import pytest -from smartsim._core.launcher.stepInfo import * +from smartsim._core.launcher.step_info import * from smartsim.status import JobStatus # The tests in this file belong to the group_b group diff --git a/tests/_legacy/test_symlinking.py b/tests/_legacy/test_symlinking.py index 4447a49d1..95aa187e6 100644 --- a/tests/_legacy/test_symlinking.py +++ b/tests/_legacy/test_symlinking.py @@ -32,9 +32,9 @@ from smartsim import Experiment from smartsim._core.config import CONFIG from smartsim._core.control.controller import Controller, _AnonymousBatchJob +from smartsim.builders.ensemble import Ensemble from smartsim.database.orchestrator import FeatureStore from smartsim.entity.application import Application -from smartsim.entity.ensemble import Ensemble from smartsim.settings.base import RunSettings from smartsim.settings.slurmSettings import SbatchSettings, SrunSettings diff --git a/tests/_legacy/test_telemetry_monitor.py b/tests/_legacy/test_telemetry_monitor.py index bc15f32e5..262f07e1e 100644 --- a/tests/_legacy/test_telemetry_monitor.py +++ b/tests/_legacy/test_telemetry_monitor.py @@ -39,12 +39,12 @@ from conftest import FileUtils, WLMUtils from smartsim import Experiment from smartsim._core.control.job import Job, JobEntity -from smartsim._core.control.jobmanager import JobManager -from smartsim._core.entrypoints.telemetrymonitor import get_parser +from smartsim._core.control.job_manager import JobManager +from smartsim._core.entrypoints.telemetry_monitor import get_parser from smartsim._core.launcher.launcher import WLMLauncher -from smartsim._core.launcher.slurm.slurmLauncher import SlurmLauncher +from smartsim._core.launcher.slurm.slurm_launcher import SlurmLauncher from smartsim._core.launcher.step.step import Step, proxyable_launch_cmd -from smartsim._core.launcher.stepInfo import StepInfo +from smartsim._core.launcher.step_info import StepInfo from smartsim._core.utils import serialize from smartsim._core.utils.helpers import get_ts_ms from smartsim._core.utils.telemetry.manifest import Run, RuntimeManifest diff --git a/tests/temp_tests/test_colocatedJobGroup.py b/tests/temp_tests/test_colocatedJobGroup.py index 90a5e254c..d6d17fc8a 100644 --- a/tests/temp_tests/test_colocatedJobGroup.py +++ b/tests/temp_tests/test_colocatedJobGroup.py @@ -27,8 +27,8 @@ import pytest from smartsim.entity.application import Application -from smartsim.launchable.basejob import BaseJob -from smartsim.launchable.colocatedJobGroup import ColocatedJobGroup +from smartsim.launchable.base_job import BaseJob +from smartsim.launchable.colocated_job_group import ColocatedJobGroup from smartsim.launchable.job import Job from smartsim.settings import LaunchSettings diff --git a/tests/temp_tests/test_core/test_commands/test_commandList.py b/tests/temp_tests/test_core/test_commands/test_commandList.py index 79d6f7e78..c6bc8d834 100644 --- a/tests/temp_tests/test_core/test_commands/test_commandList.py +++ b/tests/temp_tests/test_core/test_commands/test_commandList.py @@ -27,8 +27,8 @@ import pytest from smartsim._core.commands.command import Command -from smartsim._core.commands.commandList import CommandList -from smartsim.settings.launchCommand import LauncherType +from smartsim._core.commands.command_list import CommandList +from smartsim.settings.launch_command import LauncherType pytestmark = pytest.mark.group_a diff --git a/tests/temp_tests/test_core/test_commands/test_launchCommands.py b/tests/temp_tests/test_core/test_commands/test_launchCommands.py index 0c5e719cc..60bfe4b27 100644 --- a/tests/temp_tests/test_core/test_commands/test_launchCommands.py +++ b/tests/temp_tests/test_core/test_commands/test_launchCommands.py @@ -27,9 +27,9 @@ import pytest from smartsim._core.commands.command import Command -from smartsim._core.commands.commandList import CommandList -from smartsim._core.commands.launchCommands import LaunchCommands -from smartsim.settings.launchCommand import LauncherType +from smartsim._core.commands.command_list import CommandList +from smartsim._core.commands.launch_commands import LaunchCommands +from smartsim.settings.launch_command import LauncherType pytestmark = pytest.mark.group_a diff --git a/tests/temp_tests/test_jobGroup.py b/tests/temp_tests/test_jobGroup.py index 5f27199b5..f73516260 100644 --- a/tests/temp_tests/test_jobGroup.py +++ b/tests/temp_tests/test_jobGroup.py @@ -27,10 +27,10 @@ import pytest from smartsim.entity.application import Application -from smartsim.launchable.basejob import BaseJob +from smartsim.launchable.base_job import BaseJob from smartsim.launchable.job import Job -from smartsim.launchable.jobGroup import JobGroup -from smartsim.settings.launchSettings import LaunchSettings +from smartsim.launchable.job_group import JobGroup +from smartsim.settings.launch_settings import LaunchSettings pytestmark = pytest.mark.group_a diff --git a/tests/temp_tests/test_launchable.py b/tests/temp_tests/test_launchable.py index 9b2adb3e7..e87e68902 100644 --- a/tests/temp_tests/test_launchable.py +++ b/tests/temp_tests/test_launchable.py @@ -33,8 +33,8 @@ from smartsim.error.errors import SSUnsupportedError from smartsim.launchable import Job, Launchable from smartsim.launchable.launchable import SmartSimObject -from smartsim.launchable.mpmdjob import MPMDJob -from smartsim.launchable.mpmdpair import MPMDPair +from smartsim.launchable.mpmd_job import MPMDJob +from smartsim.launchable.mpmd_pair import MPMDPair from smartsim.settings import LaunchSettings pytestmark = pytest.mark.group_a diff --git a/tests/temp_tests/test_settings/conftest.py b/tests/temp_tests/test_settings/conftest.py index 90ffdd141..8697b1510 100644 --- a/tests/temp_tests/test_settings/conftest.py +++ b/tests/temp_tests/test_settings/conftest.py @@ -27,7 +27,7 @@ import pytest from smartsim._core.utils.launcher import LauncherProtocol, create_job_id -from smartsim.settings.arguments import launchArguments as launch +from smartsim.settings.arguments import launch_arguments as launch @pytest.fixture diff --git a/tests/temp_tests/test_settings/test_alpsLauncher.py b/tests/temp_tests/test_settings/test_alpsLauncher.py index 1017988c0..5abfbb9c7 100644 --- a/tests/temp_tests/test_settings/test_alpsLauncher.py +++ b/tests/temp_tests/test_settings/test_alpsLauncher.py @@ -29,13 +29,13 @@ import pytest -from smartsim._core.shell.shellLauncher import ShellLauncherCommand +from smartsim._core.shell.shell_launcher import ShellLauncherCommand from smartsim.settings import LaunchSettings from smartsim.settings.arguments.launch.alps import ( AprunLaunchArguments, _as_aprun_command, ) -from smartsim.settings.launchCommand import LauncherType +from smartsim.settings.launch_command import LauncherType pytestmark = pytest.mark.group_a diff --git a/tests/temp_tests/test_settings/test_batchSettings.py b/tests/temp_tests/test_settings/test_batchSettings.py index 38289e0f0..37fd3a33f 100644 --- a/tests/temp_tests/test_settings/test_batchSettings.py +++ b/tests/temp_tests/test_settings/test_batchSettings.py @@ -26,38 +26,46 @@ import pytest from smartsim.settings import BatchSettings -from smartsim.settings.batchCommand import SchedulerType +from smartsim.settings.batch_command import BatchSchedulerType pytestmark = pytest.mark.group_a @pytest.mark.parametrize( - "scheduler_enum", + "scheduler_enum,formatted_batch_args", [ - pytest.param(SchedulerType.Slurm, id="slurm"), - pytest.param(SchedulerType.Pbs, id="dragon"), - pytest.param(SchedulerType.Lsf, id="lsf"), + pytest.param( + BatchSchedulerType.Slurm, ["--launch=var", "--nodes=1"], id="slurm" + ), + pytest.param( + BatchSchedulerType.Pbs, ["-l", "nodes=1", "-launch", "var"], id="pbs" + ), + pytest.param( + BatchSchedulerType.Lsf, ["-launch", "var", "-nnodes", "1"], id="lsf" + ), ], ) -def test_create_scheduler_settings(scheduler_enum): +def test_create_scheduler_settings(scheduler_enum, formatted_batch_args): bs_str = BatchSettings( batch_scheduler=scheduler_enum.value, - scheduler_args={"launch": "var"}, + batch_args={"launch": "var"}, env_vars={"ENV": "VAR"}, ) - print(bs_str) + bs_str.batch_args.set_nodes(1) assert bs_str._batch_scheduler == scheduler_enum - # TODO need to test scheduler_args assert bs_str._env_vars == {"ENV": "VAR"} + print(bs_str.format_batch_args()) + assert bs_str.format_batch_args() == formatted_batch_args bs_enum = BatchSettings( batch_scheduler=scheduler_enum, - scheduler_args={"launch": "var"}, + batch_args={"launch": "var"}, env_vars={"ENV": "VAR"}, ) + bs_enum.batch_args.set_nodes(1) assert bs_enum._batch_scheduler == scheduler_enum - # TODO need to test scheduler_args assert bs_enum._env_vars == {"ENV": "VAR"} + assert bs_enum.format_batch_args() == formatted_batch_args def test_launcher_property(): @@ -68,10 +76,5 @@ def test_launcher_property(): def test_env_vars_property(): bs = BatchSettings(batch_scheduler="slurm", env_vars={"ENV": "VAR"}) assert bs.env_vars == {"ENV": "VAR"} - - -def test_env_vars_property_deep_copy(): - bs = BatchSettings(batch_scheduler="slurm", env_vars={"ENV": "VAR"}) - copy_env_vars = bs.env_vars - copy_env_vars.update({"test": "no_update"}) - assert bs.env_vars == {"ENV": "VAR"} + ref = bs.env_vars + assert ref is bs.env_vars diff --git a/tests/temp_tests/test_settings/test_dragonLauncher.py b/tests/temp_tests/test_settings/test_dragonLauncher.py index 1026765cf..a7685e18e 100644 --- a/tests/temp_tests/test_settings/test_dragonLauncher.py +++ b/tests/temp_tests/test_settings/test_dragonLauncher.py @@ -25,13 +25,13 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import pytest -from smartsim._core.launcher.dragon.dragonLauncher import ( +from smartsim._core.launcher.dragon.dragon_launcher import ( _as_run_request_args_and_policy, ) -from smartsim._core.schemas.dragonRequests import DragonRunPolicy, DragonRunRequestView +from smartsim._core.schemas.dragon_requests import DragonRunPolicy, DragonRunRequestView from smartsim.settings import LaunchSettings from smartsim.settings.arguments.launch.dragon import DragonLaunchArguments -from smartsim.settings.launchCommand import LauncherType +from smartsim.settings.launch_command import LauncherType pytestmark = pytest.mark.group_a diff --git a/tests/temp_tests/test_settings/test_launchSettings.py b/tests/temp_tests/test_settings/test_launchSettings.py index 6f2d3ee3a..3fc5e544a 100644 --- a/tests/temp_tests/test_settings/test_launchSettings.py +++ b/tests/temp_tests/test_settings/test_launchSettings.py @@ -29,7 +29,7 @@ import pytest from smartsim.settings import LaunchSettings -from smartsim.settings.launchCommand import LauncherType +from smartsim.settings.launch_command import LauncherType pytestmark = pytest.mark.group_a @@ -64,13 +64,8 @@ def test_launcher_property(): def test_env_vars_property(): ls = LaunchSettings(launcher="local", env_vars={"ENV": "VAR"}) assert ls.env_vars == {"ENV": "VAR"} - - -def test_env_vars_property_deep_copy(): - ls = LaunchSettings(launcher="local", env_vars={"ENV": "VAR"}) - copy_env_vars = ls.env_vars - copy_env_vars.update({"test": "no_update"}) - assert ls.env_vars == {"ENV": "VAR"} + ref = ls.env_vars + assert ref is ls.env_vars def test_update_env_vars(): diff --git a/tests/temp_tests/test_settings/test_localLauncher.py b/tests/temp_tests/test_settings/test_localLauncher.py index f5b40735a..6576b2249 100644 --- a/tests/temp_tests/test_settings/test_localLauncher.py +++ b/tests/temp_tests/test_settings/test_localLauncher.py @@ -29,13 +29,13 @@ import pytest -from smartsim._core.shell.shellLauncher import ShellLauncherCommand +from smartsim._core.shell.shell_launcher import ShellLauncherCommand from smartsim.settings import LaunchSettings from smartsim.settings.arguments.launch.local import ( LocalLaunchArguments, _as_local_command, ) -from smartsim.settings.launchCommand import LauncherType +from smartsim.settings.launch_command import LauncherType pytestmark = pytest.mark.group_a diff --git a/tests/temp_tests/test_settings/test_lsfLauncher.py b/tests/temp_tests/test_settings/test_lsfLauncher.py index 6074fc182..549c2483b 100644 --- a/tests/temp_tests/test_settings/test_lsfLauncher.py +++ b/tests/temp_tests/test_settings/test_lsfLauncher.py @@ -32,7 +32,7 @@ JsrunLaunchArguments, _as_jsrun_command, ) -from smartsim.settings.launchCommand import LauncherType +from smartsim.settings.launch_command import LauncherType pytestmark = pytest.mark.group_a diff --git a/tests/temp_tests/test_settings/test_lsfScheduler.py b/tests/temp_tests/test_settings/test_lsfScheduler.py index 59a1e7ccd..5e6b7fd0c 100644 --- a/tests/temp_tests/test_settings/test_lsfScheduler.py +++ b/tests/temp_tests/test_settings/test_lsfScheduler.py @@ -26,15 +26,15 @@ import pytest from smartsim.settings import BatchSettings -from smartsim.settings.batchCommand import SchedulerType +from smartsim.settings.batch_command import BatchSchedulerType pytestmark = pytest.mark.group_a def test_scheduler_str(): """Ensure scheduler_str returns appropriate value""" - bs = BatchSettings(batch_scheduler=SchedulerType.Lsf) - assert bs.scheduler_args.scheduler_str() == SchedulerType.Lsf.value + bs = BatchSettings(batch_scheduler=BatchSchedulerType.Lsf) + assert bs.batch_args.scheduler_str() == BatchSchedulerType.Lsf.value @pytest.mark.parametrize( @@ -60,18 +60,18 @@ def test_scheduler_str(): ], ) def test_update_env_initialized(function, value, flag, result): - lsfScheduler = BatchSettings(batch_scheduler=SchedulerType.Lsf) - getattr(lsfScheduler.scheduler_args, function)(*value) - assert lsfScheduler.scheduler_args._scheduler_args[flag] == result + lsfScheduler = BatchSettings(batch_scheduler=BatchSchedulerType.Lsf) + getattr(lsfScheduler.batch_args, function)(*value) + assert lsfScheduler.batch_args._batch_args[flag] == result def test_create_bsub(): batch_args = {"core_isolation": None} lsfScheduler = BatchSettings( - batch_scheduler=SchedulerType.Lsf, scheduler_args=batch_args + batch_scheduler=BatchSchedulerType.Lsf, batch_args=batch_args ) - lsfScheduler.scheduler_args.set_nodes(1) - lsfScheduler.scheduler_args.set_walltime("10:10:10") - lsfScheduler.scheduler_args.set_queue("default") + lsfScheduler.batch_args.set_nodes(1) + lsfScheduler.batch_args.set_walltime("10:10:10") + lsfScheduler.batch_args.set_queue("default") args = lsfScheduler.format_batch_args() assert args == ["-core_isolation", "-nnodes", "1", "-W", "10:10", "-q", "default"] diff --git a/tests/temp_tests/test_settings/test_mpiLauncher.py b/tests/temp_tests/test_settings/test_mpiLauncher.py index 9309674e5..57be23ee2 100644 --- a/tests/temp_tests/test_settings/test_mpiLauncher.py +++ b/tests/temp_tests/test_settings/test_mpiLauncher.py @@ -31,7 +31,7 @@ import pytest -from smartsim._core.shell.shellLauncher import ShellLauncherCommand +from smartsim._core.shell.shell_launcher import ShellLauncherCommand from smartsim.settings import LaunchSettings from smartsim.settings.arguments.launch.mpi import ( MpiexecLaunchArguments, @@ -41,7 +41,7 @@ _as_mpirun_command, _as_orterun_command, ) -from smartsim.settings.launchCommand import LauncherType +from smartsim.settings.launch_command import LauncherType pytestmark = pytest.mark.group_a diff --git a/tests/temp_tests/test_settings/test_palsLauncher.py b/tests/temp_tests/test_settings/test_palsLauncher.py index e1d7ef5c3..d38d1842c 100644 --- a/tests/temp_tests/test_settings/test_palsLauncher.py +++ b/tests/temp_tests/test_settings/test_palsLauncher.py @@ -30,13 +30,13 @@ import pytest -from smartsim._core.shell.shellLauncher import ShellLauncherCommand +from smartsim._core.shell.shell_launcher import ShellLauncherCommand from smartsim.settings import LaunchSettings from smartsim.settings.arguments.launch.pals import ( PalsMpiexecLaunchArguments, _as_pals_command, ) -from smartsim.settings.launchCommand import LauncherType +from smartsim.settings.launch_command import LauncherType pytestmark = pytest.mark.group_a diff --git a/tests/temp_tests/test_settings/test_pbsScheduler.py b/tests/temp_tests/test_settings/test_pbsScheduler.py index 1a866c1a1..36fde6776 100644 --- a/tests/temp_tests/test_settings/test_pbsScheduler.py +++ b/tests/temp_tests/test_settings/test_pbsScheduler.py @@ -27,15 +27,15 @@ from smartsim.settings import BatchSettings from smartsim.settings.arguments.batch.pbs import QsubBatchArguments -from smartsim.settings.batchCommand import SchedulerType +from smartsim.settings.batch_command import BatchSchedulerType pytestmark = pytest.mark.group_a def test_scheduler_str(): """Ensure scheduler_str returns appropriate value""" - bs = BatchSettings(batch_scheduler=SchedulerType.Pbs) - assert bs.scheduler_args.scheduler_str() == SchedulerType.Pbs.value + bs = BatchSettings(batch_scheduler=BatchSchedulerType.Pbs) + assert bs.batch_args.scheduler_str() == BatchSchedulerType.Pbs.value @pytest.mark.parametrize( @@ -61,20 +61,20 @@ def test_scheduler_str(): ], ) def test_create_pbs_batch(function, value, flag, result): - pbsScheduler = BatchSettings(batch_scheduler=SchedulerType.Pbs) - assert isinstance(pbsScheduler.scheduler_args, QsubBatchArguments) - getattr(pbsScheduler.scheduler_args, function)(*value) - assert pbsScheduler.scheduler_args._scheduler_args[flag] == result + pbsScheduler = BatchSettings(batch_scheduler=BatchSchedulerType.Pbs) + assert isinstance(pbsScheduler.batch_args, QsubBatchArguments) + getattr(pbsScheduler.batch_args, function)(*value) + assert pbsScheduler.batch_args._batch_args[flag] == result def test_format_pbs_batch_args(): - pbsScheduler = BatchSettings(batch_scheduler=SchedulerType.Pbs) - pbsScheduler.scheduler_args.set_nodes(1) - pbsScheduler.scheduler_args.set_walltime("10:00:00") - pbsScheduler.scheduler_args.set_queue("default") - pbsScheduler.scheduler_args.set_account("myproject") - pbsScheduler.scheduler_args.set_ncpus(10) - pbsScheduler.scheduler_args.set_hostlist(["host_a", "host_b", "host_c"]) + pbsScheduler = BatchSettings(batch_scheduler=BatchSchedulerType.Pbs) + pbsScheduler.batch_args.set_nodes(1) + pbsScheduler.batch_args.set_walltime("10:00:00") + pbsScheduler.batch_args.set_queue("default") + pbsScheduler.batch_args.set_account("myproject") + pbsScheduler.batch_args.set_ncpus(10) + pbsScheduler.batch_args.set_hostlist(["host_a", "host_b", "host_c"]) args = pbsScheduler.format_batch_args() assert args == [ "-l", diff --git a/tests/temp_tests/test_settings/test_slurmLauncher.py b/tests/temp_tests/test_settings/test_slurmLauncher.py index e5d9c718f..6be9b5542 100644 --- a/tests/temp_tests/test_settings/test_slurmLauncher.py +++ b/tests/temp_tests/test_settings/test_slurmLauncher.py @@ -27,13 +27,13 @@ import pytest -from smartsim._core.shell.shellLauncher import ShellLauncherCommand +from smartsim._core.shell.shell_launcher import ShellLauncherCommand from smartsim.settings import LaunchSettings from smartsim.settings.arguments.launch.slurm import ( SlurmLaunchArguments, _as_srun_command, ) -from smartsim.settings.launchCommand import LauncherType +from smartsim.settings.launch_command import LauncherType pytestmark = pytest.mark.group_a diff --git a/tests/temp_tests/test_settings/test_slurmScheduler.py b/tests/temp_tests/test_settings/test_slurmScheduler.py index a6afcef16..8ab489cc8 100644 --- a/tests/temp_tests/test_settings/test_slurmScheduler.py +++ b/tests/temp_tests/test_settings/test_slurmScheduler.py @@ -27,15 +27,15 @@ from smartsim.settings import BatchSettings from smartsim.settings.arguments.batch.slurm import SlurmBatchArguments -from smartsim.settings.batchCommand import SchedulerType +from smartsim.settings.batch_command import BatchSchedulerType pytestmark = pytest.mark.group_a -def test_scheduler_str(): +def test_batch_scheduler_str(): """Ensure scheduler_str returns appropriate value""" - bs = BatchSettings(batch_scheduler=SchedulerType.Slurm) - assert bs.scheduler_args.scheduler_str() == SchedulerType.Slurm.value + bs = BatchSettings(batch_scheduler=BatchSchedulerType.Slurm) + assert bs.batch_args.scheduler_str() == BatchSchedulerType.Slurm.value @pytest.mark.parametrize( @@ -74,15 +74,15 @@ def test_scheduler_str(): ], ) def test_sbatch_class_methods(function, value, flag, result): - slurmScheduler = BatchSettings(batch_scheduler=SchedulerType.Slurm) - getattr(slurmScheduler.scheduler_args, function)(*value) - assert slurmScheduler.scheduler_args._scheduler_args[flag] == result + slurmScheduler = BatchSettings(batch_scheduler=BatchSchedulerType.Slurm) + getattr(slurmScheduler.batch_args, function)(*value) + assert slurmScheduler.batch_args._batch_args[flag] == result def test_create_sbatch(): batch_args = {"exclusive": None, "oversubscribe": None} slurmScheduler = BatchSettings( - batch_scheduler=SchedulerType.Slurm, scheduler_args=batch_args + batch_scheduler=BatchSchedulerType.Slurm, batch_args=batch_args ) assert isinstance(slurmScheduler._arguments, SlurmBatchArguments) args = slurmScheduler.format_batch_args() @@ -94,32 +94,32 @@ def test_launch_args_input_mutation(): key0, key1, key2 = "arg0", "arg1", "arg2" val0, val1, val2 = "val0", "val1", "val2" - default_scheduler_args = { + default_batch_args = { key0: val0, key1: val1, key2: val2, } slurmScheduler = BatchSettings( - batch_scheduler=SchedulerType.Slurm, scheduler_args=default_scheduler_args + batch_scheduler=BatchSchedulerType.Slurm, batch_args=default_batch_args ) # Confirm initial values are set - assert slurmScheduler.scheduler_args._scheduler_args[key0] == val0 - assert slurmScheduler.scheduler_args._scheduler_args[key1] == val1 - assert slurmScheduler.scheduler_args._scheduler_args[key2] == val2 + assert slurmScheduler.batch_args._batch_args[key0] == val0 + assert slurmScheduler.batch_args._batch_args[key1] == val1 + assert slurmScheduler.batch_args._batch_args[key2] == val2 # Update our common run arguments val2_upd = f"not-{val2}" - default_scheduler_args[key2] = val2_upd + default_batch_args[key2] = val2_upd # Confirm previously created run settings are not changed - assert slurmScheduler.scheduler_args._scheduler_args[key2] == val2 + assert slurmScheduler.batch_args._batch_args[key2] == val2 def test_sbatch_settings(): - scheduler_args = {"nodes": 1, "time": "10:00:00", "account": "A3123"} + batch_args = {"nodes": 1, "time": "10:00:00", "account": "A3123"} slurmScheduler = BatchSettings( - batch_scheduler=SchedulerType.Slurm, scheduler_args=scheduler_args + batch_scheduler=BatchSchedulerType.Slurm, batch_args=batch_args ) formatted = slurmScheduler.format_batch_args() result = ["--nodes=1", "--time=10:00:00", "--account=A3123"] @@ -127,10 +127,10 @@ def test_sbatch_settings(): def test_sbatch_manual(): - slurmScheduler = BatchSettings(batch_scheduler=SchedulerType.Slurm) - slurmScheduler.scheduler_args.set_nodes(5) - slurmScheduler.scheduler_args.set_account("A3531") - slurmScheduler.scheduler_args.set_walltime("10:00:00") + slurmScheduler = BatchSettings(batch_scheduler=BatchSchedulerType.Slurm) + slurmScheduler.batch_args.set_nodes(5) + slurmScheduler.batch_args.set_account("A3531") + slurmScheduler.batch_args.set_walltime("10:00:00") formatted = slurmScheduler.format_batch_args() result = ["--nodes=5", "--account=A3531", "--time=10:00:00"] assert formatted == result diff --git a/tests/test_ensemble.py b/tests/test_ensemble.py index 5198681fe..9c9015251 100644 --- a/tests/test_ensemble.py +++ b/tests/test_ensemble.py @@ -31,10 +31,10 @@ import pytest -from smartsim.entity.ensemble import Ensemble +from smartsim.builders.ensemble import Ensemble +from smartsim.builders.utils.strategies import ParamSet from smartsim.entity.files import EntityFiles -from smartsim.entity.strategies import ParamSet -from smartsim.settings.launchSettings import LaunchSettings +from smartsim.settings.launch_settings import LaunchSettings pytestmark = pytest.mark.group_a @@ -109,7 +109,7 @@ def test_ensemble_user_created_strategy(mock_launcher_settings, test_dir): "echo", ("hello", "world"), permutation_strategy=user_created_function, - ).as_jobs(mock_launcher_settings) + ).build_jobs(mock_launcher_settings) assert len(jobs) == 1 @@ -125,7 +125,7 @@ def test_ensemble_without_any_members_raises_when_cast_to_jobs( permutation_strategy="random", max_permutations=30, replicas=0, - ).as_jobs(mock_launcher_settings) + ).build_jobs(mock_launcher_settings) def test_strategy_error_raised_if_a_strategy_that_dne_is_requested(test_dir): @@ -208,7 +208,7 @@ def test_all_perm_strategy( permutation_strategy="all_perm", max_permutations=max_perms, replicas=replicas, - ).as_jobs(mock_launcher_settings) + ).build_jobs(mock_launcher_settings) assert len(jobs) == expected_num_jobs @@ -222,7 +222,7 @@ def test_all_perm_strategy_contents(): permutation_strategy="all_perm", max_permutations=16, replicas=1, - ).as_jobs(mock_launcher_settings) + ).build_jobs(mock_launcher_settings) assert len(jobs) == 16 @@ -262,7 +262,7 @@ def test_step_strategy( permutation_strategy="step", max_permutations=max_perms, replicas=replicas, - ).as_jobs(mock_launcher_settings) + ).build_jobs(mock_launcher_settings) assert len(jobs) == expected_num_jobs @@ -301,5 +301,5 @@ def test_random_strategy( permutation_strategy="random", max_permutations=max_perms, replicas=replicas, - ).as_jobs(mock_launcher_settings) + ).build_jobs(mock_launcher_settings) assert len(jobs) == expected_num_jobs diff --git a/tests/test_experiment.py b/tests/test_experiment.py index 8034a1738..0b9914e53 100644 --- a/tests/test_experiment.py +++ b/tests/test_experiment.py @@ -47,8 +47,8 @@ from smartsim.error import errors from smartsim.experiment import Experiment from smartsim.launchable import job -from smartsim.settings import launchSettings -from smartsim.settings.arguments import launchArguments +from smartsim.settings import launch_settings +from smartsim.settings.arguments import launch_arguments from smartsim.status import InvalidJobStatus, JobStatus pytestmark = pytest.mark.group_a @@ -99,7 +99,7 @@ def job_maker(monkeypatch): def iter_jobs(): for i in itertools.count(): - settings = launchSettings.LaunchSettings("local") + settings = launch_settings.LaunchSettings("local") monkeypatch.setattr(settings, "_arguments", MockLaunchArgs(i)) yield job.Job(EchoHelloWorldEntity(), settings) @@ -160,7 +160,7 @@ def stop_jobs(self, *ids): @dataclasses.dataclass(frozen=True) class LaunchRecord: - launch_args: launchArguments.LaunchArguments + launch_args: launch_arguments.LaunchArguments entity: entity.SmartSimEntity env: t.Mapping[str, str | None] path: str @@ -186,7 +186,7 @@ def from_job(cls, job: job.Job): return cls(args, entity, env, path, out, err) -class MockLaunchArgs(launchArguments.LaunchArguments): +class MockLaunchArgs(launch_arguments.LaunchArguments): """A `LaunchArguments` subclass that will evaluate as true with another if and only if they were initialized with the same id. In practice this class has no arguments to set. diff --git a/tests/test_generator.py b/tests/test_generator.py index ff24018ca..8f5a02f0b 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -11,7 +11,8 @@ from smartsim import Experiment from smartsim._core.generation.generator import Generator -from smartsim.entity import Application, Ensemble +from smartsim.builders import Ensemble +from smartsim.entity import Application from smartsim.entity.files import EntityFiles from smartsim.launchable import Job from smartsim.settings import LaunchSettings @@ -226,7 +227,7 @@ def test_exp_private_generate_method_ensemble(test_dir, wlmutils, generator_inst """Test that Job directory was created from Experiment.""" ensemble = Ensemble("ensemble-name", "echo", replicas=2) launch_settings = LaunchSettings(wlmutils.get_test_launcher()) - job_list = ensemble.as_jobs(launch_settings) + job_list = ensemble.build_jobs(launch_settings) exp = Experiment(name="exp_name", exp_path=test_dir) for i, job in enumerate(job_list): job_run_path, _, _ = exp._generate(generator_instance, job, i) @@ -239,7 +240,7 @@ def test_exp_private_generate_method_ensemble(test_dir, wlmutils, generator_inst def test_generate_ensemble_directory(wlmutils, generator_instance): ensemble = Ensemble("ensemble-name", "echo", replicas=2) launch_settings = LaunchSettings(wlmutils.get_test_launcher()) - job_list = ensemble.as_jobs(launch_settings) + job_list = ensemble.build_jobs(launch_settings) for i, job in enumerate(job_list): # Call Generator.generate_job path, _, _ = generator_instance.generate_job(job, i) @@ -263,7 +264,7 @@ def test_generate_ensemble_directory_start(test_dir, wlmutils, monkeypatch): ) ensemble = Ensemble("ensemble-name", "echo", replicas=2) launch_settings = LaunchSettings(wlmutils.get_test_launcher()) - job_list = ensemble.as_jobs(launch_settings) + job_list = ensemble.build_jobs(launch_settings) exp = Experiment(name="exp_name", exp_path=test_dir) exp.start(*job_list) run_dir = listdir(test_dir) @@ -285,7 +286,7 @@ def test_generate_ensemble_copy(test_dir, wlmutils, monkeypatch, get_gen_copy_di "ensemble-name", "echo", replicas=2, files=EntityFiles(copy=get_gen_copy_dir) ) launch_settings = LaunchSettings(wlmutils.get_test_launcher()) - job_list = ensemble.as_jobs(launch_settings) + job_list = ensemble.build_jobs(launch_settings) exp = Experiment(name="exp_name", exp_path=test_dir) exp.start(*job_list) run_dir = listdir(test_dir) @@ -310,7 +311,7 @@ def test_generate_ensemble_symlink( files=EntityFiles(symlink=get_gen_symlink_dir), ) launch_settings = LaunchSettings(wlmutils.get_test_launcher()) - job_list = ensemble.as_jobs(launch_settings) + job_list = ensemble.build_jobs(launch_settings) exp = Experiment(name="exp_name", exp_path=test_dir) exp.start(*job_list) run_dir = listdir(test_dir) @@ -341,7 +342,7 @@ def test_generate_ensemble_configure( file_parameters=params, ) launch_settings = LaunchSettings(wlmutils.get_test_launcher()) - job_list = ensemble.as_jobs(launch_settings) + job_list = ensemble.build_jobs(launch_settings) exp = Experiment(name="exp_name", exp_path=test_dir) id = exp.start(*job_list) run_dir = listdir(test_dir) diff --git a/tests/test_permutation_strategies.py b/tests/test_permutation_strategies.py index b14514c99..314c21063 100644 --- a/tests/test_permutation_strategies.py +++ b/tests/test_permutation_strategies.py @@ -28,8 +28,8 @@ import pytest -from smartsim.entity import strategies -from smartsim.entity.strategies import ParamSet +from smartsim.builders.utils import strategies +from smartsim.builders.utils.strategies import ParamSet from smartsim.error import errors pytestmark = pytest.mark.group_a diff --git a/tests/test_shell_launcher.py b/tests/test_shell_launcher.py index 95e884710..f371d793f 100644 --- a/tests/test_shell_launcher.py +++ b/tests/test_shell_launcher.py @@ -37,7 +37,7 @@ import psutil import pytest -from smartsim._core.shell.shellLauncher import ShellLauncher, ShellLauncherCommand, sp +from smartsim._core.shell.shell_launcher import ShellLauncher, ShellLauncherCommand, sp from smartsim._core.utils import helpers from smartsim._core.utils.shell import * from smartsim.entity import entity @@ -156,7 +156,7 @@ def test_shell_launcher_start_calls_popen( ): """Test that the process leading up to the shell launcher popen call was correct""" with unittest.mock.patch( - "smartsim._core.shell.shellLauncher.sp.Popen" + "smartsim._core.shell.shell_launcher.sp.Popen" ) as mock_open: _ = shell_launcher.start(shell_cmd) mock_open.assert_called_once() @@ -167,7 +167,7 @@ def test_shell_launcher_start_calls_popen_with_value( ): """Test that popen was called with correct values""" with unittest.mock.patch( - "smartsim._core.shell.shellLauncher.sp.Popen" + "smartsim._core.shell.shell_launcher.sp.Popen" ) as mock_open: _ = shell_launcher.start(shell_cmd) mock_open.assert_called_once_with(