Skip to content

Commit

Permalink
simplify barrier
Browse files Browse the repository at this point in the history
  • Loading branch information
TApplencourt committed Aug 21, 2024
1 parent 9e07869 commit 8b8cd50
Showing 1 changed file with 7 additions and 35 deletions.
42 changes: 7 additions & 35 deletions xprof/sync_daemon_mpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,62 +78,34 @@ int MPIX_Init_Session(MPI_Session *lib_shandle, MPI_Comm *lib_comm) {
}

int signal_loop(int parent_pid, MPI_Comm MPI_COMM_WORLD_THAPI, MPI_Comm MPI_COMM_NODE) {
// Required MPI info
int global_rank;
MPI_Comm_rank(MPI_COMM_WORLD_THAPI, &global_rank);
int global_size;
MPI_Comm_size(MPI_COMM_WORLD_THAPI, &global_size);
int local_rank;
MPI_Comm_rank(MPI_COMM_NODE, &local_rank);
int local_size;
MPI_Comm_size(MPI_COMM_NODE, &local_size);

// Initialize signal set and add signals
sigset_t signal_set;
sigemptyset(&signal_set);
sigaddset(&signal_set, RT_SIGNAL_GLOBAL_BARRIER);
sigaddset(&signal_set, RT_SIGNAL_LOCAL_BARRIER);
sigaddset(&signal_set, RT_SIGNAL_FINISH);

sigprocmask(SIG_BLOCK, &signal_set, NULL);

// Send ready to parent
kill(parent_pid, RT_SIGNAL_READY);
// Main loop
// Non blocked signal will be handled as usual

// Processing loop:
// Should be only exited when receiving RT_SIGNAL_FINISH
while (true) {
int signum;
sigwait(&signal_set, &signum);
if (signum == RT_SIGNAL_FINISH) {
// Ready signal will be sent after cleaning
return 0;
} else if (signum == RT_SIGNAL_LOCAL_BARRIER) {
MPI_Barrier(MPI_COMM_NODE);
goto next_iteration;
} else if (signum == RT_SIGNAL_GLOBAL_BARRIER) {
// Local master who are not the global master, send a message
if (global_rank != 0 && local_rank == 0) {
MPI_Send(&local_size, 1, MPI_INT, 0, MPI_TAG_GLOBAL_BARRIER, MPI_COMM_WORLD_THAPI);
// Global master receive messages from local masters
} else if (global_rank == 0) {
// Global master may or may not be a local master
int sum_local_size_recv = 0;
if (local_rank == 0)
sum_local_size_recv = local_size;
while (sum_local_size_recv != global_size) {
int local_size_recv;
MPI_Recv(&local_size_recv, 1, MPI_INT, MPI_ANY_SOURCE, MPI_TAG_GLOBAL_BARRIER,
MPI_COMM_WORLD_THAPI, MPI_STATUS_IGNORE);
sum_local_size_recv += local_size_recv;
}
}
goto next_iteration;
MPI_Barrier(MPI_COMM_WORLD_THAPI);
} else {
fprintf(stderr, "Wrong signal rreseved %d. Exiting", signum);
return 1;
}
next_iteration:
kill(parent_pid, RT_SIGNAL_READY);
kill(parent_pid, RT_SIGNAL_READY);
}

// Unreachable
fprintf(stderr, "Wrong signal_loop exit");
return 1;
Expand Down

0 comments on commit 8b8cd50

Please sign in to comment.