diff --git a/iowrite.cpp b/iowrite.cpp index 583e6ab7a..f25e38534 100644 --- a/iowrite.cpp +++ b/iowrite.cpp @@ -1522,7 +1522,7 @@ bool writeRestart( const std::string& versionInfo, const std::string& configInfo, DataReducer& dataReducer, - const string& name, + string& name, const uint& fileIndex, const int& stripe) { @@ -1557,6 +1557,7 @@ bool writeRestart( fname.width(7); fname.fill('0'); fname << fileIndex << "." << currentDate << ".vlsv"; + name = fname.str(); phiprof::Timer openTimer {"open"}; //Open the file with vlsvWriter: @@ -1588,7 +1589,9 @@ bool writeRestart( MPI_Info_set(MPIinfo, factor, stripeChar); } - if( vlsvWriter.open( fname.str(), MPI_COMM_WORLD, masterProcessId, MPIinfo ) == false) return false; + if (vlsvWriter.open( fname.str(), MPI_COMM_WORLD, masterProcessId, MPIinfo ) == false) { + return false; + } if( MPIinfo != MPI_INFO_NULL ) { MPI_Info_free(&MPIinfo); diff --git a/iowrite.h b/iowrite.h index e0d23885c..dfe9b45e1 100644 --- a/iowrite.h +++ b/iowrite.h @@ -85,7 +85,7 @@ bool writeRestart( const std::string& versionInfo, const std::string& configInfo, DataReducer& dataReducer, - const std::string& name, + std::string& name, const uint& fileIndex, const int& stripe ); diff --git a/vlasiator.cpp b/vlasiator.cpp index d8175d691..e9bf8cfa0 100644 --- a/vlasiator.cpp +++ b/vlasiator.cpp @@ -590,7 +590,8 @@ int main(int argn,char* args[]) { writeGhosts ) == false ) { - cerr << "FAILED TO WRITE GRID AT " << __FILE__ << " " << __LINE__ << endl; + // TODO make this std::format when we get C++20 + abort_mpi(std::string(__FILE__) + ":" + std::to_string(__LINE__) + ": FAILED TO WRITE GRID", 1); } phiprof::stop("Initialization"); @@ -729,7 +730,8 @@ int main(int argn,char* args[]) { writeGhosts ) == false ) { - cerr << "FAILED TO WRITE GRID AT " << __FILE__ << " " << __LINE__ << endl; + // TODO make this std::format when we get C++20 + abort_mpi(std::string(__FILE__) + ":" + std::to_string(__LINE__) + ": FAILED TO WRITE GRID", 1); } P::systemWriteDistributionWriteStride.pop_back(); @@ -946,7 +948,8 @@ int main(int argn,char* args[]) { writeGhosts ) == false ) { - cerr << "FAILED TO WRITE GRID AT" << __FILE__ << " " << __LINE__ << endl; + // TODO make this std::format when we get C++20 + abort_mpi(std::string(__FILE__) + ":" + std::to_string(__LINE__) + ": FAILED TO WRITE GRID", 1); } P::systemWrites[i]++; // Special case for large timesteps @@ -1015,25 +1018,22 @@ int main(int argn,char* args[]) { if (myRank == MASTER_RANK) logFile << "(IO): Writing restart data to disk, tstep = " << P::tstep << " t = " << P::t << endl << writeVerbose; //Write the restart: - if( writeRestart(mpiGrid, - perBGrid, // TODO: Merge all the fsgrids passed here into one meta-object - EGrid, - EHallGrid, - EGradPeGrid, - momentsGrid, - dPerBGrid, - dMomentsGrid, - BgBGrid, - volGrid, - technicalGrid, - version, - config, - outputReducer,"restart",(uint)P::t,P::restartStripeFactor) == false ) { - logFile << "(IO): ERROR Failed to write restart!" << endl << writeVerbose; - cerr << "FAILED TO WRITE RESTART" << endl; - } + // TODO: Merge all the fsgrids passed here into one meta-object + std::string restartFilename {"restart"}; + bool restartSuccess {writeRestart(mpiGrid, perBGrid, EGrid, EHallGrid, EGradPeGrid, momentsGrid, dPerBGrid, dMomentsGrid, BgBGrid, volGrid, technicalGrid, version, config, outputReducer, restartFilename, (uint)P::t,P::restartStripeFactor)}; + MPI_Reduce(myRank == MASTER_RANK ? MPI_IN_PLACE : &restartSuccess, &restartSuccess, 1, MPI_CXX_BOOL, MPI_LAND, MASTER_RANK, MPI_COMM_WORLD); if (myRank == MASTER_RANK) { - logFile << "(IO): .... done!"<< endl << writeVerbose; + if(!restartSuccess) { + // If restart write fails, remove the malformed file and hope a human clears space soon + // Sanity check, this should be set before writeRestart returns + if (restartFilename != "restart") { + std::remove(restartFilename.c_str()); + } + logFile << "(IO): ERROR Failed to write restart!" << endl << writeVerbose; + cerr << "FAILED TO WRITE RESTART" << endl; + } else { + logFile << "(IO): .... done!"<< endl << writeVerbose; + } } timer.stop(); }