From df2c464f246fbc5179f090873214df4c7c16a79b Mon Sep 17 00:00:00 2001 From: Jerry Mannil Date: Fri, 10 Jun 2022 00:03:06 -0700 Subject: [PATCH 1/3] fail fast on ssh errors or non-zero return code * Added "-k" option to enable fail fast option * Fail fast on non-zero error code Fixes #139 --- src/pdsh/dsh.c | 2 +- src/pdsh/opt.c | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/pdsh/dsh.c b/src/pdsh/dsh.c index 1957421b..cad8d41d 100644 --- a/src/pdsh/dsh.c +++ b/src/pdsh/dsh.c @@ -753,7 +753,7 @@ static void *_rsh_thread(void *args) a->rc = rv; /* if a single qshell thread fails, terminate whole job */ - if (a->kill_on_fail && a->state == DSH_FAILED) { + if (a->kill_on_fail && ((a->state == DSH_FAILED) || (a->rc > 0))) { _fwd_signal(SIGTERM); errx("%p: terminating all processes\n"); } diff --git a/src/pdsh/opt.c b/src/pdsh/opt.c index 790fa2db..44b85dc2 100644 --- a/src/pdsh/opt.c +++ b/src/pdsh/opt.c @@ -66,7 +66,8 @@ #define OPT_USAGE_DSH "\ Usage: pdsh [-options] command ...\n\ --S return largest of remote command return values\n" +-S return largest of remote command return values\n\ +-k fail fast on connect failure or non-zero return code\n" /* -s option only useful on AIX */ #if HAVE_MAGIC_RSHELL_CLEANUP @@ -113,9 +114,9 @@ Usage: rpdcp [-options] src [src2...] dir\n\ /* undocumented "-K" option - keep domain name in output */ #if HAVE_MAGIC_RSHELL_CLEANUP -#define DSH_ARGS "sS" +#define DSH_ARGS "sSk" #else -#define DSH_ARGS "S" +#define DSH_ARGS "Sk" #endif #define PCP_ARGS "pryzZe:" #define GEN_ARGS "hLNKR:M:t:cqf:w:x:l:u:bI:dVT:Q" @@ -691,6 +692,9 @@ void opt_args(opt_t * opt, int argc, char *argv[]) else goto test_module_option; break; + case 'k': + opt->kill_on_fail = true; + break; default: test_module_option: if (mod_process_opt(opt, c, optarg) < 0) _usage(opt); From 36f2f561d336abd07b587baf91a02835d68d7ea7 Mon Sep 17 00:00:00 2001 From: Jerry Mannil Date: Mon, 13 Jun 2022 11:26:39 -0700 Subject: [PATCH 2/3] doc: fast-fail update Add '-k' option for fast-fail --- doc/pdsh.1.in | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/pdsh.1.in b/doc/pdsh.1.in index ccd529d4..d4d7f0d2 100644 --- a/doc/pdsh.1.in +++ b/doc/pdsh.1.in @@ -144,10 +144,13 @@ as if they had been given to \fB\-w\fR and preceeded with the minus `-' character. .SH "Standard pdsh options" -.TP +.TP .I "-S" Return the largest of the remote command return values. .TP +.I "-k" +Fail fast on connect failure or non-zero return code. +.TP .I "-h" Output usage menu and quit. A list of available rcmd modules will also be printed at the end of the usage message. From c94c3ca87a9652273700030079b6c27646575837 Mon Sep 17 00:00:00 2001 From: Jerry Mannil Date: Mon, 13 Jun 2022 11:41:11 -0700 Subject: [PATCH 3/3] test: fast-fail update Add tests that exercise '-k' option --- tests/t2001-ssh.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/t2001-ssh.sh b/tests/t2001-ssh.sh index 44efe73c..a9dddb07 100755 --- a/tests/t2001-ssh.sh +++ b/tests/t2001-ssh.sh @@ -197,5 +197,20 @@ test_expect_success 'ssh works with pdsh -S and multiple targets' ' done ' unset PDSH_SSH_ARGS +test_expect_success 'ssh works with pdsh -k' ' + TEST_EXIT_CODE=$(random 254) && + export PDSH_SSH_ARGS="-n%n -i0 -e$TEST_EXIT_CODE" + test_expect_code "1" pdsh -Rssh -k -w foo0 command +' +unset PDSH_SSH_ARGS +test_expect_success 'ssh works with pdsh -k and multiple targets' ' + for n in $(seq 1 24); do + TEST_EXIT_CODE=$(random 254) && + FAILING_RANK=$(random $n) && + export PDSH_SSH_ARGS="-n%n -i$FAILING_RANK -e$TEST_EXIT_CODE" + test_expect_code "1" pdsh -Rssh -k -wfoo[0-$n] command + done +' +unset PDSH_SSH_ARGS test_done