Skip to content

Commit

Permalink
fix race condition
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas Applencourt committed Sep 3, 2024
1 parent ceaabfc commit 7f9707b
Showing 1 changed file with 20 additions and 18 deletions.
38 changes: 20 additions & 18 deletions integration_tests/light_iprof_only_sync.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@ RT_SIGNAL_GLOBAL_BARRIER=$((SIGRTMIN + 1))
RT_SIGNAL_LOCAL_BARRIER=$((SIGRTMIN + 2))
RT_SIGNAL_FINISH=$((SIGRTMIN + 3))

# Initialize a variable to track signal reception
SIGNAL_RECEIVED="false"
# Signal handler for capturing signals
handle_signal() {
echo "$PARENT_PID | Received signal $1 from mpi_daemon"
echo "$PARENT_PID $(date) | Received signal $1 from mpi_daemon"
if [ "$1" == "RT_SIGNAL_READY" ]; then
SIGNAL_RECEIVED="true"
fi
Expand All @@ -25,36 +23,40 @@ trap 'handle_signal RT_SIGNAL_READY' $RT_SIGNAL_READY

# Function to wait for RT_SIGNAL_READY
wait_for_signal() {
SIGNAL_RECEIVED="false"
while [[ "$SIGNAL_RECEIVED" == "false" ]]; do
sleep 0.1 # Small sleep to prevent busy looping
done
}

# Function to send signals, using adjusted SIGRTMIN corresponding to MPI signal daemon defines
# To avoid race condition, `SIGNAL_RECEIVED` need to be set
# before spawning or signaling the daemon
spawn_daemon_blocking() {
local parent_pid=$$
SIGNAL_RECEIVED="false"
"${THAPI_BIN_DIR}"/sync_daemon_"${THAPI_SYNC_DAEMON}" parent_pid &
DAEMON_PID=$!
wait_for_signal
}

send_signal_blocking() {
kill -$1 $DAEMON_PID
SIGNAL_RECEIVED="false"
kill -"$1" $DAEMON_PID
wait_for_signal
}

# Get the PID of this script
PARENT_PID=$$
# Start sync daemon in the background
${THAPI_BIN_DIR}/sync_daemon_${THAPI_SYNC_DAEMON} $PARENT_PID &
DAEMON_PID=$!
echo "$PARENT_PID | Wait for daemon to be ready"
wait_for_signal
echo "$PARENT_PID | Send Local Barrier signal"
echo "$PARENT_PID $(date) | Spawn Daemon"
spawn_daemon_blocking
echo "$PARENT_PID $(date) | Send Local Barrier signal"
send_signal_blocking $RT_SIGNAL_LOCAL_BARRIER
# Run test program
"$@"

# Final synchronization after mpi_hello_world execution
echo "$PARENT_PID | Send Local Barrier signal"
echo "$PARENT_PID $(date) | Send Local Barrier signal"
send_signal_blocking $RT_SIGNAL_LOCAL_BARRIER
echo "$PARENT_PID | Send Global Barrier signal"
echo "$PARENT_PID $(date) | Send Global Barrier signal"
send_signal_blocking $RT_SIGNAL_GLOBAL_BARRIER
echo "$PARENT_PID | Send Termination signal"
echo "$PARENT_PID $(date) | Send Termination signal"
send_signal_blocking $RT_SIGNAL_FINISH
echo "$PARENT_PID | Wait for daemon to quit"
echo "$PARENT_PID $(date) | Wait for daemon to quit"
wait $DAEMON_PID

0 comments on commit 7f9707b

Please sign in to comment.