From 216b2dfd068608e33e7283f3669afcd40eb7e6eb Mon Sep 17 00:00:00 2001 From: Matt Drozt Date: Fri, 1 Nov 2024 15:23:00 -0500 Subject: [PATCH] Do not raise and stop backend on nonzero exit --- smartsim/_core/launcher/dragon/dragonBackend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smartsim/_core/launcher/dragon/dragonBackend.py b/smartsim/_core/launcher/dragon/dragonBackend.py index 5a7a28dd8..971c60a6f 100644 --- a/smartsim/_core/launcher/dragon/dragonBackend.py +++ b/smartsim/_core/launcher/dragon/dragonBackend.py @@ -409,7 +409,7 @@ def _create_redirect_workers( err_file: t.Optional[str], ) -> dragon_process_group.ProcessGroup: grp_redir = dragon_process_group.ProcessGroup( - restart=False, policy=global_policy, pmi_enabled=False + restart=False, ignore_error_on_exit=True, policy=global_policy, pmi_enabled=False ) for pol, puid in zip(policies, puids): proc = dragon_process.Process(None, ident=puid) @@ -528,7 +528,7 @@ def _start_steps(self) -> None: host_name=hosts[0], ) grp = dragon_process_group.ProcessGroup( - restart=False, pmi_enabled=request.pmi_enabled, policy=global_policy + restart=False, ignore_error_on_exit=True, pmi_enabled=request.pmi_enabled, policy=global_policy ) policies = []