From d9efb4514061f365e5879993a707c2d8c40706a0 Mon Sep 17 00:00:00 2001 From: Sofer Athlan-Guyot Date: Wed, 18 Dec 2024 15:54:14 +0100 Subject: [PATCH] Add configuration variables for control plane testing during update. Add a number of variables to control various aspects of the control plane testing. Using those new varibles we can configure control plane testing settings from the job definition. Increased the default time we wait for the last vm to be created and destroyed as 5 minutes (the previous timeout) was slightly too short. It's now 7 minutes. Closes: https://issues.redhat.com/browse/OSPRH-12349 --- roles/update/README.md | 3 +++ roles/update/defaults/main.yml | 3 +++ roles/update/tasks/main.yml | 2 +- roles/update/templates/control_plane_test_stop.sh.j2 | 6 +++--- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/roles/update/README.md b/roles/update/README.md index 73fac13004..a42b8c5e9e 100644 --- a/roles/update/README.md +++ b/roles/update/README.md @@ -12,6 +12,9 @@ Role to run update * `cifmw_update_ping_loss_second` : (Integer) Number of seconds that the ping test is allowed to fail. Default to `0`. Note that 1 packet loss is always accepted to avoid false positive. * `cifmw_update_ping_loss_percent` : (Integer) Maximum percentage of ping loss accepted. Default to `0`. Only relevant when `cifmw_update_ping_loss_second` is not 0. * `cifmw_update_control_plane_check`: (Boolean) Activate a continuous control plane testing. Default to `False` +* `cifmw_update_ctl_plane_max_cons_fail`: (Integer) For continuous control plane testing, maximum number of consecutive failures allowed. Default to 2. +* `cifmw_update_ctl_plane_max_fail`: (Integer) For continuous control plane testing, maximum number of failures allowed. Default to 3. +* `cifmw_update_ctl_plane_max_tries`: (Integer) For continuous control plane testing, number of retries allowed to stop and destroy the last vm created. Each retry is 5 seconds apart. Default to 84, so 7 minutes. * `cifmw_update_openstackclient_pod_timeout`: (Integer) Maximum number of seconds to wait for the openstackclient Pod to be available during control plane testing, as it is being restarted during update. Default to `10` seconds. ## Examples diff --git a/roles/update/defaults/main.yml b/roles/update/defaults/main.yml index 527c6361f2..4d72c61a14 100644 --- a/roles/update/defaults/main.yml +++ b/roles/update/defaults/main.yml @@ -47,3 +47,6 @@ cifmw_update_ping_loss_percent: 0 # Control plane Testing cifmw_update_control_plane_check: false cifmw_update_openstackclient_pod_timeout: 10 # in seconds. +cifmw_update_ctl_plane_max_cons_fail: 2 +cifmw_update_ctl_plane_max_fail: 3 +cifmw_update_ctl_plane_max_tries: 84 diff --git a/roles/update/tasks/main.yml b/roles/update/tasks/main.yml index 6fbd74e4a3..93987dd205 100644 --- a/roles/update/tasks/main.yml +++ b/roles/update/tasks/main.yml @@ -72,5 +72,5 @@ when: - cifmw_update_control_plane_check | bool - not cifmw_update_run_dryrun | bool - ansible.builtin.shell: | + ansible.builtin.command: | {{ cifmw_update_artifacts_basedir }}/control_plane_test_stop.sh diff --git a/roles/update/templates/control_plane_test_stop.sh.j2 b/roles/update/templates/control_plane_test_stop.sh.j2 index 9eb3648c2a..47d203e18a 100644 --- a/roles/update/templates/control_plane_test_stop.sh.j2 +++ b/roles/update/templates/control_plane_test_stop.sh.j2 @@ -4,10 +4,10 @@ # Get the pid, kill it and wait for the end of the last run. set -eu -MAX_CONS_FAIL=${1:-2} -MAX_FAIL=${2:-3} +MAX_CONS_FAIL=${1:-{{ cifmw_update_ctl_plane_max_cons_fail }}} +MAX_FAIL=${2:-{{ cifmw_update_ctl_plane_max_fail }}} BASE_DIR="${3:-{{ cifmw_update_artifacts_basedir }}}" -STOP_MAX_TRIES=${4:-60} # 5 seconds x MAX_TRIES = 5 min by default +STOP_MAX_TRIES=${4:-{{ cifmw_update_ctl_plane_max_tries }}} # times 5 seconds pid_file="${BASE_DIR}/control-plane-test.pid"