Skip to content

Commit

Permalink
Introduce control plane testing.
Browse files Browse the repository at this point in the history
Just before starting the update and for the entire update, this
continuously:
1. creates a vm;
2. attach a volume (optional) and a fip to ip
3. ssh to it
4. destroy it
5. restart from 1.

This give a good level of confidence that the openstack API is still
reachable during the update.
  • Loading branch information
sathlan authored and openshift-merge-bot[bot] committed Sep 10, 2024
1 parent 85b389a commit c006610
Show file tree
Hide file tree
Showing 8 changed files with 358 additions and 22 deletions.
2 changes: 1 addition & 1 deletion roles/update/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ Role to run update
* `cifmw_update_create_volume`: (Boolean) Attach a volume to the test OS instance when set to true. Default to `False`
* `cifmw_update_ping_loss_second` : (Integer) Number of seconds that the ping test is allowed to fail. Default to `0`. Note that 1 packet loss is always accepted to avoid false positive.
* `cifmw_update_ping_loss_percent` : (Integer) Maximum percentage of ping loss accepted. Default to `0`. Only relevant when `cifmw_update_ping_loss_second` is not 0.

* `cifmw_update_control_plane_check`: (Boolean) Activate a continuous control plane testing. Default to `False`
## Examples
2 changes: 2 additions & 0 deletions roles/update/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,5 @@ cifmw_update_ping_test: false
cifmw_update_create_volume: false
cifmw_update_ping_loss_second: 0
cifmw_update_ping_loss_percent: 0

cifmw_update_control_plane_check: false
198 changes: 198 additions & 0 deletions roles/update/files/continuous-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
#!/bin/bash
set -eu
## ---------------------------------------------------------------------
## NAME:
## continuous-test.sh - run a script in a loop and gather the results.
##
## SYNOPSIS
## continuous-test.sh [OPTION] [SCRIPT]
##
## DESCRIPTION
## Run SCRIPT and collect date, time and exit status.
##
## The SCRIPT will be continuously run until we get a SIGUSR1
## signal. When the signal is caught, we will wait for the last
## run to end and dump to stdout the result of all commands.
##
## The output of the each command will be saved into "continuous-test-<pid>/" under
## the current directory.
##
## A /var/run/continuous-test.pid will register the pid of the
## running process.
##
## OPTIONS
## -d Enable debug mode.
## -l <PREFIX> Prefix used for:
## - Logfile: Default to ./continuous-test-<PID>.log
## - Done file: Default to ./continuous-test-<PID>.done
##
## The logfile will hold the result of each command run and the
## done file indicate that the last run is finished when we want
## to end the continuous test.
##
## Both those files will have the <PID> added to the prefix so that
## multiple command can be run in parallel if needed.
##
## The pid can be find in the PIDFILE.
##
## -p <PIDFILE> save the PID to that file.
## Default to ./continuous-test.pid
##
## -o <DIR> Directory where to save all those files. Default to
## the directory where continuous-test.sh is.
##
## FILES
##
## /var/run/continuous-test.pid will hold the pid of the process
## ./continuous-test.log have the result of the check
## ./continuous-test-<pid>/<files> will hold the output of each command.
##
## ENVIRONMENT
## CT_SCRIPT_ARGS A string holding any argument that should
## be passed to SCRIPT.
##
## AUTHOR
## Athlan-Guyot Sofer <[email protected]>
## ---------------------------------------------------------------------
FILE=$(basename $0)

CT_PARENT=${CT_PARENT:-true}
CT_CHILD=${CT_CHILD:-false}

CT_STOP=false

## ---------------------------------------------------------------------
## Function definitions.
process_sig() {
echo "$$: received term signal" >&2
CT_STOP=true
}

process_sigterm_parent() {
echo "$$: Parent received term signal" >&2
if [ -n "${CT_PID}" ]; then
echo "$$: received term signal: killing $CT_PID" >&2
kill -s USR1 $CT_PID
else
# Should not happen.
echo "$$: received term signal: killing group" >&2
kill -s USR1 0
fi
}

# Daemonize the process. This will fork a process and detach from the
# console after setting the environment from the options.
if "${CT_PARENT}"; then
export DEBUG=false
while getopts :p:l:o:d OPT; do
case $OPT in
l|+l)
CT_PREFIX="$OPTARG"
;;
p|+p)
CT_PIDFILE="$OPTARG"
;;
o|+o)
CT_DIR="$OPTARG"
;;
d|+d)
DEBUG=true
;;
*)
echo "usage: ${0##*/} [-l LOGFILE] [-p PIDFILE] [-d] SCRIPT"
exit 2
esac
done
shift $(( OPTIND - 1 ))
OPTIND=1
if [ -z "${CT_DIR}" ]; then
CT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
fi
export CT_DIR
if $DEBUG; then
export CT_TTY=$(tty)
else
export CT_TTY=/dev/null
fi
exec 2>$CT_TTY
echo "entering parent $$ $FILE" >&2
export CT_SCRIPT_ARGS=${CT_SCRIPT_ARGS:-""}
export CT_SCRIPT="${@:?'SCRIPT cannot be empty.'}"
export CT_PREFIX="${CT_PREFIX:-}"
export CT_PIDFILE="${CT_PIDFILE:-}"
export CT_CHILD=true
export CT_PARENT=false
setsid ${CT_DIR}/${FILE} "$@" </dev/null >$CT_TTY 2>$CT_TTY &
CT_PID=$!
if $DEBUG ; then
trap process_sigterm_parent SIGTERM SIGINT
wait $CT_PID
echo "leaving parent $$ after waiting for $CT_PID/$FILE" >&2
else
echo "leaving parent $$ $FILE" >&2
fi
sync
exit 0
fi

if "${CT_CHILD}"; then
if [ -n "${CT_TTY}" ]; then
exec 2> ${CT_TTY}
exec 1> ${CT_TTY}
else
CT_TTY=/dev/null
fi
echo "entering child $$ running $FILE" >&2
if [ -z "${CT_PREFIX}" ]; then
CT_LOGFILE="${CT_DIR}/continuous-test-$$.log"
else
CT_LOGFILE="${CT_DIR}/${CT_PREFIX}-$$.log"
fi
if [ -z "${CT_PIDFILE}" ]; then
CT_PIDFILE="${CT_DIR}/continuous-test.pid"
fi
export CT_LOGFILE
export CT_PIDFILE
export CT_CMD_OUT_DIR="${CT_DIR}/ct-$$"
trap process_sig SIGTERM SIGUSR1
export CT_CHILD=false
export CT_PARENT=false
echo $$ > "${CT_PIDFILE}"
# Main loop where eventually run the script.
while ! $CT_STOP; do
setsid ${CT_DIR}/$FILE "$@" </dev/null 2>$CT_TTY
done
echo "Leaving child $$ running $FILE" >&2
if [ -z "${CT_PREFIX}" ]; then
CT_ENDFILE="${CT_DIR}/continuous-test-$$.done"
else
CT_ENDFILE="${CT_DIR}/${CT_PREFIX}-$$.done"
fi
date > $CT_ENDFILE
sync
exit 0
fi

exec >>$CT_LOGFILE
mkdir -p "${CT_CMD_OUT_DIR}"
echo "entering loop $$ $CT_SCRIPT" >&2
# We cannot have to jobs in the same seconds, or else we will
# overwrite the file. sleep 1 prevents this.
sleep 1
start_time="$(date +%s)"
start_time_h="$(date -d@${start_time})"
echo -n "${start_time_h} (${start_time}) "
set +e
"${CT_SCRIPT}" ${CT_SCRIPT_ARGS} &>> "${CT_CMD_OUT_DIR}/${start_time}.log"
RC="${?}"
set -e
end_time="$(date +%s)"
duration=$((end_time - start_time))
echo -n "${duration}s "

if [ $RC -eq 0 ]; then
echo "SUCCESS (0)"
else
echo "FAILED (${RC})"
fi
echo "leaving loop $$" >&2
72 changes: 52 additions & 20 deletions roles/update/tasks/create_test_files.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,58 @@
# License for the specific language governing permissions and limitations
# under the License.

- name: Ensure update log directory exists.
ansible.builtin.file:
path: "{{ cifmw_update_artifacts_basedir }}"
state: directory
mode: "0755"
- name: Update testing related files
when: ( cifmw_update_ping_test | bool ) or ( cifmw_update_control_plane_check | bool )
block:
- name: Ensure update log directory exists.
ansible.builtin.file:
path: "{{ cifmw_update_artifacts_basedir }}"
state: directory
mode: "0755"
- name: Create workload launch script
ansible.builtin.template:
src: "workload_launch.sh.j2"
dest: "{{ cifmw_update_workload_launch_script }}"
mode: "0775"

- name: Create workload launch script
ansible.builtin.template:
src: "workload_launch.sh.j2"
dest: "{{ cifmw_update_workload_launch_script }}"
mode: "0775"
- name: Ping test related files
when: cifmw_update_ping_test | bool
block:
- name: Create start l3 agent connectivity check scripts
ansible.builtin.template:
src: "l3_agent_start_ping.sh.j2"
dest: "{{ cifmw_update_ping_start_script }}"
mode: "0775"

- name: Create start l3 agent connectivity check scripts
ansible.builtin.template:
src: "l3_agent_start_ping.sh.j2"
dest: "{{ cifmw_update_ping_start_script }}"
mode: "0775"
- name: Create stop l3 agent connectivity check scripts
ansible.builtin.template:
src: "l3_agent_stop_ping.sh.j2"
dest: "{{ cifmw_update_ping_stop_script }}"
mode: "0775"

- name: Create stop l3 agent connectivity check scripts
ansible.builtin.template:
src: "l3_agent_stop_ping.sh.j2"
dest: "{{ cifmw_update_ping_stop_script }}"
mode: "0775"
- name: Control plane testing related files
when: cifmw_update_control_plane_check|bool
block:
- name: Create control plane wrapper
ansible.builtin.copy:
src: "continuous-test.sh"
dest: "{{ cifmw_update_artifacts_basedir }}/continuous-test.sh"
mode: "0775"

- name: Create control plane start script
ansible.builtin.template:
src: "control_plane_test_start.sh.j2"
dest: "{{ cifmw_update_artifacts_basedir }}/control_plane_test_start.sh"
mode: "0775"

- name: Create control plane stop script
ansible.builtin.template:
src: "control_plane_test_stop.sh.j2"
dest: "{{ cifmw_update_artifacts_basedir }}/control_plane_test_stop.sh"
mode: "0775"

- name: Create control plane workload launch wrapper
ansible.builtin.template:
src: "workload_launch_k8s.sh.j2"
dest: "{{ cifmw_update_artifacts_basedir }}/workload_launch_k8s.sh"
mode: "0775"
16 changes: 15 additions & 1 deletion roles/update/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

- name: Create the support files for test
ansible.builtin.include_tasks: create_test_files.yml
when: cifmw_update_ping_test | bool

- name: Trigger the ping test
when:
Expand All @@ -30,6 +29,14 @@
- name: Start ping test
ansible.builtin.include_tasks: l3_agent_connectivity_check_start.yml

- name: Trigger the continuous control plane test
when:
- cifmw_update_control_plane_check | bool
- not cifmw_update_run_dryrun | bool
ansible.builtin.shell: |
{{ cifmw_update_artifacts_basedir }}/control_plane_test_start.sh
- name: Set openstack_update_run Makefile environment variables
tags:
- always
Expand Down Expand Up @@ -60,3 +67,10 @@
when:
- cifmw_update_ping_test | bool
- not cifmw_update_run_dryrun | bool

- name: Stop the continuous control plane test
when:
- cifmw_update_control_plane_check | bool
- not cifmw_update_run_dryrun | bool
ansible.builtin.shell: |
{{ cifmw_update_artifacts_basedir }}/control_plane_test_stop.sh
12 changes: 12 additions & 0 deletions roles/update/templates/control_plane_test_start.sh.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
#
# Script to test control plane by creating a vm in a loop during the
# update. Start sequence.
set -eu
BASE_DIR="${1:-{{ cifmw_update_artifacts_basedir }}}"

continuous_test_wrapper="${BASE_DIR}/continuous-test.sh"

if [ -e "${continuous_test_wrapper}" ]; then
${continuous_test_wrapper} -o "${BASE_DIR}" -l control-plane-test -p "${BASE_DIR}/control-plane-test.pid" "${BASE_DIR}/workload_launch_k8s.sh"
fi
Loading

0 comments on commit c006610

Please sign in to comment.