From 7f9707b67c0a460bc56aa25b436681fbbdc2d004 Mon Sep 17 00:00:00 2001 From: Thomas Applencourt Date: Tue, 3 Sep 2024 16:22:23 +0000 Subject: [PATCH] fix race condition --- integration_tests/light_iprof_only_sync.sh | 38 ++++++++++++---------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/integration_tests/light_iprof_only_sync.sh b/integration_tests/light_iprof_only_sync.sh index 5c7a4623..908c7ace 100755 --- a/integration_tests/light_iprof_only_sync.sh +++ b/integration_tests/light_iprof_only_sync.sh @@ -10,11 +10,9 @@ RT_SIGNAL_GLOBAL_BARRIER=$((SIGRTMIN + 1)) RT_SIGNAL_LOCAL_BARRIER=$((SIGRTMIN + 2)) RT_SIGNAL_FINISH=$((SIGRTMIN + 3)) -# Initialize a variable to track signal reception -SIGNAL_RECEIVED="false" # Signal handler for capturing signals handle_signal() { - echo "$PARENT_PID | Received signal $1 from mpi_daemon" + echo "$PARENT_PID $(date) | Received signal $1 from mpi_daemon" if [ "$1" == "RT_SIGNAL_READY" ]; then SIGNAL_RECEIVED="true" fi @@ -25,36 +23,40 @@ trap 'handle_signal RT_SIGNAL_READY' $RT_SIGNAL_READY # Function to wait for RT_SIGNAL_READY wait_for_signal() { - SIGNAL_RECEIVED="false" while [[ "$SIGNAL_RECEIVED" == "false" ]]; do sleep 0.1 # Small sleep to prevent busy looping done } -# Function to send signals, using adjusted SIGRTMIN corresponding to MPI signal daemon defines +# To avoid race condition, `SIGNAL_RECEIVED` need to be set +# before spawning or signaling the daemon +spawn_daemon_blocking() { + local parent_pid=$$ + SIGNAL_RECEIVED="false" + "${THAPI_BIN_DIR}"/sync_daemon_"${THAPI_SYNC_DAEMON}" parent_pid & + DAEMON_PID=$! + wait_for_signal +} + send_signal_blocking() { - kill -$1 $DAEMON_PID + SIGNAL_RECEIVED="false" + kill -"$1" $DAEMON_PID wait_for_signal } -# Get the PID of this script -PARENT_PID=$$ -# Start sync daemon in the background -${THAPI_BIN_DIR}/sync_daemon_${THAPI_SYNC_DAEMON} $PARENT_PID & -DAEMON_PID=$! -echo "$PARENT_PID | Wait for daemon to be ready" -wait_for_signal -echo "$PARENT_PID | Send Local Barrier signal" +echo "$PARENT_PID $(date) | Spawn Daemon" +spawn_daemon_blocking +echo "$PARENT_PID $(date) | Send Local Barrier signal" send_signal_blocking $RT_SIGNAL_LOCAL_BARRIER # Run test program "$@" # Final synchronization after mpi_hello_world execution -echo "$PARENT_PID | Send Local Barrier signal" +echo "$PARENT_PID $(date) | Send Local Barrier signal" send_signal_blocking $RT_SIGNAL_LOCAL_BARRIER -echo "$PARENT_PID | Send Global Barrier signal" +echo "$PARENT_PID $(date) | Send Global Barrier signal" send_signal_blocking $RT_SIGNAL_GLOBAL_BARRIER -echo "$PARENT_PID | Send Termination signal" +echo "$PARENT_PID $(date) | Send Termination signal" send_signal_blocking $RT_SIGNAL_FINISH -echo "$PARENT_PID | Wait for daemon to quit" +echo "$PARENT_PID $(date) | Wait for daemon to quit" wait $DAEMON_PID