From 7f9707b67c0a460bc56aa25b436681fbbdc2d004 Mon Sep 17 00:00:00 2001 From: Thomas Applencourt Date: Tue, 3 Sep 2024 16:22:23 +0000 Subject: [PATCH 1/3] fix race condition --- integration_tests/light_iprof_only_sync.sh | 38 ++++++++++++---------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/integration_tests/light_iprof_only_sync.sh b/integration_tests/light_iprof_only_sync.sh index 5c7a4623..908c7ace 100755 --- a/integration_tests/light_iprof_only_sync.sh +++ b/integration_tests/light_iprof_only_sync.sh @@ -10,11 +10,9 @@ RT_SIGNAL_GLOBAL_BARRIER=$((SIGRTMIN + 1)) RT_SIGNAL_LOCAL_BARRIER=$((SIGRTMIN + 2)) RT_SIGNAL_FINISH=$((SIGRTMIN + 3)) -# Initialize a variable to track signal reception -SIGNAL_RECEIVED="false" # Signal handler for capturing signals handle_signal() { - echo "$PARENT_PID | Received signal $1 from mpi_daemon" + echo "$PARENT_PID $(date) | Received signal $1 from mpi_daemon" if [ "$1" == "RT_SIGNAL_READY" ]; then SIGNAL_RECEIVED="true" fi @@ -25,36 +23,40 @@ trap 'handle_signal RT_SIGNAL_READY' $RT_SIGNAL_READY # Function to wait for RT_SIGNAL_READY wait_for_signal() { - SIGNAL_RECEIVED="false" while [[ "$SIGNAL_RECEIVED" == "false" ]]; do sleep 0.1 # Small sleep to prevent busy looping done } -# Function to send signals, using adjusted SIGRTMIN corresponding to MPI signal daemon defines +# To avoid race condition, `SIGNAL_RECEIVED` need to be set +# before spawning or signaling the daemon +spawn_daemon_blocking() { + local parent_pid=$$ + SIGNAL_RECEIVED="false" + "${THAPI_BIN_DIR}"/sync_daemon_"${THAPI_SYNC_DAEMON}" parent_pid & + DAEMON_PID=$! + wait_for_signal +} + send_signal_blocking() { - kill -$1 $DAEMON_PID + SIGNAL_RECEIVED="false" + kill -"$1" $DAEMON_PID wait_for_signal } -# Get the PID of this script -PARENT_PID=$$ -# Start sync daemon in the background -${THAPI_BIN_DIR}/sync_daemon_${THAPI_SYNC_DAEMON} $PARENT_PID & -DAEMON_PID=$! -echo "$PARENT_PID | Wait for daemon to be ready" -wait_for_signal -echo "$PARENT_PID | Send Local Barrier signal" +echo "$PARENT_PID $(date) | Spawn Daemon" +spawn_daemon_blocking +echo "$PARENT_PID $(date) | Send Local Barrier signal" send_signal_blocking $RT_SIGNAL_LOCAL_BARRIER # Run test program "$@" # Final synchronization after mpi_hello_world execution -echo "$PARENT_PID | Send Local Barrier signal" +echo "$PARENT_PID $(date) | Send Local Barrier signal" send_signal_blocking $RT_SIGNAL_LOCAL_BARRIER -echo "$PARENT_PID | Send Global Barrier signal" +echo "$PARENT_PID $(date) | Send Global Barrier signal" send_signal_blocking $RT_SIGNAL_GLOBAL_BARRIER -echo "$PARENT_PID | Send Termination signal" +echo "$PARENT_PID $(date) | Send Termination signal" send_signal_blocking $RT_SIGNAL_FINISH -echo "$PARENT_PID | Wait for daemon to quit" +echo "$PARENT_PID $(date) | Wait for daemon to quit" wait $DAEMON_PID From a41f3164b00a3a100c3f732f6484a835c62cbb3a Mon Sep 17 00:00:00 2001 From: Thomas Applencourt Date: Tue, 3 Sep 2024 16:31:15 +0000 Subject: [PATCH 2/3] PARENT_PID --- integration_tests/light_iprof_only_sync.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/integration_tests/light_iprof_only_sync.sh b/integration_tests/light_iprof_only_sync.sh index 908c7ace..20683ee2 100755 --- a/integration_tests/light_iprof_only_sync.sh +++ b/integration_tests/light_iprof_only_sync.sh @@ -1,5 +1,7 @@ #!/bin/bash set -euo pipefail +# For loging and Daemon to send signal to us +PARENT_PID=$$ # Get base real-time signal number SIGRTMIN=$(kill -l SIGRTMIN) @@ -31,9 +33,8 @@ wait_for_signal() { # To avoid race condition, `SIGNAL_RECEIVED` need to be set # before spawning or signaling the daemon spawn_daemon_blocking() { - local parent_pid=$$ SIGNAL_RECEIVED="false" - "${THAPI_BIN_DIR}"/sync_daemon_"${THAPI_SYNC_DAEMON}" parent_pid & + "${THAPI_BIN_DIR}"/sync_daemon_"${THAPI_SYNC_DAEMON}" PARENT_PID & DAEMON_PID=$! wait_for_signal } From 3821b3a5d808c0f569a62de78eeda8c1e241ec17 Mon Sep 17 00:00:00 2001 From: Thomas Applencourt Date: Tue, 3 Sep 2024 18:58:22 +0000 Subject: [PATCH 3/3] fix PARENT_PID --- integration_tests/light_iprof_only_sync.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/integration_tests/light_iprof_only_sync.sh b/integration_tests/light_iprof_only_sync.sh index 20683ee2..2e5bd8d1 100755 --- a/integration_tests/light_iprof_only_sync.sh +++ b/integration_tests/light_iprof_only_sync.sh @@ -1,5 +1,6 @@ #!/bin/bash set -euo pipefail + # For loging and Daemon to send signal to us PARENT_PID=$$ @@ -14,7 +15,7 @@ RT_SIGNAL_FINISH=$((SIGRTMIN + 3)) # Signal handler for capturing signals handle_signal() { - echo "$PARENT_PID $(date) | Received signal $1 from mpi_daemon" + echo "$PARENT_PID $(date) | Received signal $1 from sync_daemon" if [ "$1" == "RT_SIGNAL_READY" ]; then SIGNAL_RECEIVED="true" fi @@ -34,7 +35,7 @@ wait_for_signal() { # before spawning or signaling the daemon spawn_daemon_blocking() { SIGNAL_RECEIVED="false" - "${THAPI_BIN_DIR}"/sync_daemon_"${THAPI_SYNC_DAEMON}" PARENT_PID & + "${THAPI_BIN_DIR}"/sync_daemon_"${THAPI_SYNC_DAEMON}" $PARENT_PID & DAEMON_PID=$! wait_for_signal }