Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apibreak #201

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions doc/rst/users/quick.rst
Original file line number Diff line number Diff line change
Expand Up @@ -200,14 +200,28 @@ other programs in the examples directory.
SCR_Route_file(file, scr_file);

/* Use the new file name to perform your checkpoint I/O */
int valid = 1;
FILE* fs = fopen(scr_file, "w");
if (fs != NULL) {
fwrite(state, ..., fs);
fclose(fs);
size_t nwritten = fwrite(state, size, count, fs);
if (nwritten < count) {
/* write failed, tell SCR this process failed */
valid = 0;
}

int close_rc = fclose(fs);
if (close_rc != 0) {
/* failed to close file, tell SCR this process failed */
valid = 0;
}
} else {
/* failed to open file, tell SCR this process failed */
valid = 0;
}

/* Tell SCR that you are done with your checkpoint phase */
SCR_Complete_output(1);
int allvalid;
SCR_Complete_output(valid, &allvalid);

return;
}
Expand Down
13 changes: 5 additions & 8 deletions examples/test_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ double getbw(char* name, char* buf, size_t size, int times)
/* using scr, start our output */
scr_retval = SCR_Start_output(label, flags);
if (scr_retval != SCR_SUCCESS) {
printf("%d: failed calling SCR_Start_checkpoint(): %d: @%s:%d\n",
printf("%d: failed calling SCR_Start_output(): %d: @%s:%d\n",
rank, scr_retval, __FILE__, __LINE__
);
}
Expand Down Expand Up @@ -301,7 +301,8 @@ double getbw(char* name, char* buf, size_t size, int times)

/* mark this checkpoint as complete */
if (use_scr) {
scr_retval = SCR_Complete_output(valid);
int allvalid;
scr_retval = SCR_Complete_output(valid, &allvalid);
if (scr_retval != SCR_SUCCESS) {
printf("%d: failed calling SCR_Complete_output: %d: @%s:%d\n",
rank, scr_retval, __FILE__, __LINE__
Expand Down Expand Up @@ -510,12 +511,8 @@ int main (int argc, char* argv[])
}

/* indicate to library that we're done with restart, tell it whether we read our data ok */
scr_retval = SCR_Complete_restart(found_checkpoint);
if (scr_retval == SCR_SUCCESS) {
/* all procs succeeded in reading their checkpoint file,
* we've successfully restarted */
restarted = 1;
} else {
scr_retval = SCR_Complete_restart(found_checkpoint, &restarted);
if (scr_retval != SCR_SUCCESS) {
printf("%d: failed calling SCR_Complete_restart: %d: @%s:%d\n",
rank, scr_retval, __FILE__, __LINE__
);
Expand Down
9 changes: 5 additions & 4 deletions examples/test_api_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ double getbw(char* name, char* buf, size_t size, int times)
*/

/* instruct SCR we are starting the next checkpoint */
scr_retval = SCR_Start_checkpoint();
scr_retval = SCR_Start_output(NULL, SCR_FLAG_CHECKPOINT);
if (scr_retval != SCR_SUCCESS) {
printf("%d: failed calling SCR_Start_checkpoint(): %d: @%s:%d\n",
printf("%d: failed calling SCR_Start_output(): %d: @%s:%d\n",
rank, scr_retval, __FILE__, __LINE__
);
}
Expand Down Expand Up @@ -107,9 +107,10 @@ double getbw(char* name, char* buf, size_t size, int times)
*/

/* mark this checkpoint as complete */
scr_retval = SCR_Complete_checkpoint(valid);
int allvalid;
scr_retval = SCR_Complete_output(valid, &allvalid);
if (scr_retval != SCR_SUCCESS) {
printf("%d: failed calling SCR_Complete_checkpoint: %d: @%s:%d\n",
printf("%d: failed calling SCR_Complete_output: %d: @%s:%d\n",
rank, scr_retval, __FILE__, __LINE__
);
}
Expand Down
28 changes: 12 additions & 16 deletions examples/test_api_multiple.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,10 @@ int main (int argc, char* argv[])
}

// done reading our checkpoint
SCR_Complete_restart(valid);
int all_valid = 0;
SCR_Complete_restart(valid, &all_valid);

// check that everyone found their checkpoint files ok
int all_valid = 0;
MPI_Allreduce(&valid, &all_valid, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD);
if (!all_valid && rank == 0) {
printf("At least one rank (perhaps all) did not find its checkpoint\n");
}
Expand Down Expand Up @@ -183,7 +182,7 @@ int main (int argc, char* argv[])
int t;
for(t=0; t < 1; t++) {
int rc;
int all_valid = 1;
int valid = 1;

// define a name for this checkpoint
sprintf(ckptname, "timestep.%d", timestep);
Expand All @@ -198,8 +197,6 @@ int main (int argc, char* argv[])

// write out each of our checkpoint files
for (i=0; i < num_files; i++) {
int valid = 0;

// define path to checkpoint file
char origpath[1024];
sprintf(origpath, "%s/%s", ckptname, files[i]);
Expand All @@ -216,8 +213,6 @@ int main (int argc, char* argv[])
// open file and write checkpoint
int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
if (fd_me > 0) {
valid = 1;

// write the checkpoint
rc = write_checkpoint(fd_me, timestep, bufs[i], filesizes[i]);
if (rc < 0) { valid = 0; }
Expand All @@ -228,12 +223,14 @@ int main (int argc, char* argv[])
// make sure the close is without error
rc = close(fd_me);
if (rc < 0) { valid = 0; }
} else {
valid = 0;
}
if (!valid) { all_valid = 0; }
}

// complete the checkpoint
scr_retval = SCR_Complete_output(all_valid);
int allvalid;
scr_retval = SCR_Complete_output(valid, &allvalid);
if (scr_retval != SCR_SUCCESS) {
printf("%d: failed calling SCR_Complete_output(): %d: @%s:%d\n",
rank, scr_retval, __FILE__, __LINE__
Expand All @@ -250,7 +247,7 @@ int main (int argc, char* argv[])
double time_start = MPI_Wtime();
for(t=0; t < times; t++) {
int rc;
int all_valid = 1;
int valid = 1;

// define a name for this checkpoint
sprintf(ckptname, "timestep.%d", timestep);
Expand All @@ -265,8 +262,6 @@ int main (int argc, char* argv[])

// write out each of our checkpoint files
for (i=0; i < num_files; i++) {
int valid = 0;

// define path to checkpoint file
char origpath[1024];
sprintf(origpath, "%s/%s", ckptname, files[i]);
Expand All @@ -284,7 +279,6 @@ int main (int argc, char* argv[])
int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
if (fd_me > 0) {
count++;
valid = 1;

// write the checkpoint
rc = write_checkpoint(fd_me, timestep, bufs[i], filesizes[i]);
Expand All @@ -296,12 +290,14 @@ int main (int argc, char* argv[])
// make sure the close is without error
rc = close(fd_me);
if (rc < 0) { valid = 0; }
} else {
valid = 0;
}
if (!valid) { all_valid = 0; }
}

// complete the checkpoint
scr_retval = SCR_Complete_output(all_valid);
int allvalid;
scr_retval = SCR_Complete_output(valid, &allvalid);
if (scr_retval != SCR_SUCCESS) {
printf("%d: failed calling SCR_Complete_output(): %d: @%s:%d\n",
rank, scr_retval, __FILE__, __LINE__
Expand Down
33 changes: 16 additions & 17 deletions examples/test_api_multiple_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,15 +170,14 @@ int main (int argc, char* argv[])
int t;
for(t=0; t < 1; t++) {
int rc;
int all_valid = 1;
scr_retval = SCR_Start_checkpoint();
int valid = 1;
scr_retval = SCR_Start_output(NULL, SCR_FLAG_CHECKPOINT);
if (scr_retval != SCR_SUCCESS) {
printf("%d: failed calling SCR_Start_checkpoint(): %d: @%s:%d\n",
printf("%d: failed calling SCR_Start_output(): %d: @%s:%d\n",
rank, scr_retval, __FILE__, __LINE__
);
}
for (i=0; i < num_files; i++) {
int valid = 0;
char file[2094];
scr_retval = SCR_Route_file(files[i], file);
if (scr_retval != SCR_SUCCESS) {
Expand All @@ -188,8 +187,6 @@ int main (int argc, char* argv[])
}
int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
if (fd_me > 0) {
valid = 1;

// write the checkpoint
rc = write_checkpoint(fd_me, timestep, bufs[i], filesizes[i]);
if (rc < 0) { valid = 0; }
Expand All @@ -200,12 +197,14 @@ int main (int argc, char* argv[])
// make sure the close is without error
rc = close(fd_me);
if (rc < 0) { valid = 0; }
} else {
valid = 0;
}
if (!valid) { all_valid = 0; }
}
scr_retval = SCR_Complete_checkpoint(all_valid);
int allvalid;
scr_retval = SCR_Complete_output(valid, &allvalid);
if (scr_retval != SCR_SUCCESS) {
printf("%d: failed calling SCR_Complete_checkpoint(): %d: @%s:%d\n",
printf("%d: failed calling SCR_Complete_output(): %d: @%s:%d\n",
rank, scr_retval, __FILE__, __LINE__
);
}
Expand All @@ -220,15 +219,14 @@ int main (int argc, char* argv[])
double time_start = MPI_Wtime();
for(t=0; t < times; t++) {
int rc;
int all_valid = 1;
scr_retval = SCR_Start_checkpoint();
int valid = 1;
scr_retval = SCR_Start_output(NULL, SCR_FLAG_CHECKPOINT);
if (scr_retval != SCR_SUCCESS) {
printf("%d: failed calling SCR_Start_checkpoint(): %d: @%s:%d\n",
printf("%d: failed calling SCR_Start_output(): %d: @%s:%d\n",
rank, scr_retval, __FILE__, __LINE__
);
}
for (i=0; i < num_files; i++) {
int valid = 0;
char file[2094];
scr_retval = SCR_Route_file(files[i], file);
if (scr_retval != SCR_SUCCESS) {
Expand All @@ -239,7 +237,6 @@ int main (int argc, char* argv[])
int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
if (fd_me > 0) {
count++;
valid = 1;

// write the checkpoint
rc = write_checkpoint(fd_me, timestep, bufs[i], filesizes[i]);
Expand All @@ -251,12 +248,14 @@ int main (int argc, char* argv[])
// make sure the close is without error
rc = close(fd_me);
if (rc < 0) { valid = 0; }
} else {
valid = 0;
}
if (!valid) { all_valid = 0; }
}
scr_retval = SCR_Complete_checkpoint(all_valid);
int allvalid;
scr_retval = SCR_Complete_output(valid, &allvalid);
if (scr_retval != SCR_SUCCESS) {
printf("%d: failed calling SCR_Complete_checkpoint(): %d: @%s:%d\n",
printf("%d: failed calling SCR_Complete_output(): %d: @%s:%d\n",
rank, scr_retval, __FILE__, __LINE__
);
}
Expand Down
24 changes: 2 additions & 22 deletions examples/test_ckpt.F
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ program test_ckpt_F
read(readunit,iostat=ios) R1
close(readunit)

call SCR_COMPLETE_RESTART(valid, ierr)
call SCR_COMPLETE_RESTART(valid, flag, ierr)
endif

if (mynod == 0) then
Expand All @@ -72,26 +72,6 @@ program test_ckpt_F
forall(i=1:ni,j=1:nj,k=1:nk) W1(i,j,k) =
+ nodeoff*mynod+i+ni*(j-1+nj*(k-1))

! test checkpoint interface
call SCR_START_CHECKPOINT(ierr)

write(file_suffix, '(i5.5)') loop
ckptname = "ckpt_" // trim(file_suffix)

writeunit = mynod
write(file_suffix, '(i5.5)') writeunit
fname = trim(ckptname) // "/" //
+ trim(basefname) // trim(file_suffix) // ".ckpt"
call SCR_ROUTE_FILE(fname, fname_scr, ierr)

valid = 1
open(unit=writeunit,file=fname_scr,form='unformatted',
+ action='write')
write(writeunit,iostat=ios) W1
close(writeunit)

call SCR_COMPLETE_CHECKPOINT(valid, ierr)

! test output interface
write(file_suffix, '(i5.5)') loop
ckptname = "output_" // trim(file_suffix)
Expand All @@ -110,7 +90,7 @@ program test_ckpt_F
write(writeunit,iostat=ios) W1
close(writeunit)

call SCR_COMPLETE_OUTPUT(valid, ierr)
call SCR_COMPLETE_OUTPUT(valid, flag, ierr)

call MPI_BARRIER(MPI_COMM_WORLD, ierr)

Expand Down
5 changes: 3 additions & 2 deletions examples/test_ckpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ int checkpoint(int size_mb)
MPI_Comm_rank(MPI_COMM_WORLD, &rank);

/* Inform SCR that we are starting a new checkpoint */
SCR_Start_checkpoint();
SCR_Start_output(NULL, SCR_FLAG_CHECKPOINT);

/* Build the filename for our checkpoint file */
sprintf(tmp, "rank_%d", rank);
Expand All @@ -50,7 +50,8 @@ int checkpoint(int size_mb)
cout << "Out: " << file << "\n";

/* Tell SCR whether this process wrote its checkpoint files successfully */
SCR_Complete_checkpoint(1);
int allvalid;
SCR_Complete_output(1, &allvalid);

return 0;
}
Expand Down
Loading