Skip to content

Commit

Permalink
more work on load checkpoint EOD
Browse files Browse the repository at this point in the history
  • Loading branch information
cnpetra committed Sep 3, 2024
1 parent a14a445 commit d497955
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 44 deletions.
133 changes: 103 additions & 30 deletions src/Optimization/hiopAlgFilterIPM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -987,8 +987,29 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run()

nlp->runStats.tmOptimizTotal.start();

startingProcedure(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); //this also evaluates the nlp
_mu=mu0;
//
// starting point:
// - user provided (with slack adjustments and lsq eq. duals initialization
// or
// - loaded checkpoint
//
if(nlp->options->GetString("checkpoint_load_on_start") != "yes") {
//this also evaluates the nlp
startingProcedure(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d);
_mu=mu0;
} else {
//
//checkpoint load
//
//load from file: will populate it_curr, _Hess_lagr, and algorithmic parameters
load_state_from_file(nlp->options->GetString("checkpoint_file"));
//additionally: need to evaluate the nlp
if(!this->evalNlp_noHess(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d)) {
nlp->log->printf(hovError, "Failure in evaluating user NLP functions at loaded checkpoint.");
return Error_In_User_Function;
}
solver_status_ = NlpSolve_SolveNotCalled;
}

//update log bar
logbar->updateWithNlpInfo(*it_curr, _mu, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d);
Expand Down Expand Up @@ -1503,13 +1524,8 @@ void hiopAlgFilterIPMQuasiNewton::outputIteration(int lsStatus, int lsNum, int u

#ifdef HIOP_USE_AXOM

//declaration required by C++14, not anymore by C++17 or after
constexpr char hiopAlgFilterIPMQuasiNewton::default_state_filename[];

void hiopAlgFilterIPMQuasiNewton::save_state_to_file(const ::std::string& path_in)
void hiopAlgFilterIPMQuasiNewton::save_state_to_file(const ::std::string& path)
{
auto path = path_in=="" ? default_state_filename : path_in;

sidre::DataStore* ds = new sidre::DataStore();

this->save_state_to_data_store(ds);
Expand All @@ -1522,41 +1538,90 @@ void hiopAlgFilterIPMQuasiNewton::save_state_to_file(const ::std::string& path_i
delete ds;
}

void hiopAlgFilterIPMQuasiNewton::load_state_from_file(const ::std::string& path_in)
void hiopAlgFilterIPMQuasiNewton::load_state_from_file(const ::std::string& path)
{
auto path = path_in=="" ? default_state_filename : path_in;
//todo
sidre::DataStore* ds = new sidre::DataStore();
sidre::IOManager reader(this->get_nlp()->get_comm());
reader.read(ds->getRoot(), path, false);

this->load_state_from_data_store(ds);

delete ds;
}

void hiopAlgFilterIPMQuasiNewton::save_state_to_data_store(::axom::sidre::DataStore* ds)
void hiopAlgFilterIPMQuasiNewton::
copy_vec_to_new_view(const ::std::string& name, const hiopVector* vec, sidre::Group* nlp_group)
{
using IndType = sidre::IndexType;
sidre::Group* nlp_group = ds->getRoot()->createGroup("hiop solver");
const IndType size = vec->get_local_size();
sidre::View* dest = nlp_group->createViewAndAllocate(name, sidre::DOUBLE_ID, size);

//create views for each member that needs to be saved

const double* x = it_curr->get_x()->local_data_host();
const IndType size = it_curr->get_x()->get_local_size();
sidre::View* dest = nlp_group->createViewAndAllocate("x", axom::sidre::DOUBLE_ID, size);
double *const dest_ptr(dest->getArray());
const auto stride(dest->getStride());
double *const dest_ptr(dest->getArray());
const double* arr = vec->local_data_host_const();
if(1==stride) {
::std::copy(x, x+size, dest_ptr);
::std::copy(arr, arr+size, dest_ptr);
} else {
for(IndType i=0; i<size; ++i) {
dest_ptr[i*stride] = x[i];
dest_ptr[i*stride] = arr[i];
}
}
}
void hiopAlgFilterIPMQuasiNewton::
copy_vec_from_view(const ::std::string& name, hiopVector* vec, const sidre::Group* nlp_group)
{
const sidre::View* view = nlp_group->getView(name);
if(view) {

}
else {
nlp->log->printf(hovWarning, "Could not find view '%s' in checkpointing file.\n", name.c_str());
}
}

void hiopAlgFilterIPMQuasiNewton::load_state_from_data_store(const sidre::DataStore* ds)
void hiopAlgFilterIPMQuasiNewton::save_state_to_data_store(::axom::sidre::DataStore* ds)
{
//Group* nlp_group = ds->getRoot()->createGroup("hiop solver");
using IndType = sidre::IndexType;
sidre::Group* nlp_group = ds->getRoot()->createGroup("hiop solver");

//create views for each member that needs to be saved
copy_vec_to_new_view("x", it_curr->get_x(), nlp_group);
copy_vec_to_new_view("d", it_curr->get_d(), nlp_group);
copy_vec_to_new_view("sxl", it_curr->get_sxl(), nlp_group);
copy_vec_to_new_view("sxu", it_curr->get_sxu(), nlp_group);
copy_vec_to_new_view("sdl", it_curr->get_sdl(), nlp_group);
copy_vec_to_new_view("sdu", it_curr->get_sdu(), nlp_group);
copy_vec_to_new_view("yc", it_curr->get_yc(), nlp_group);
copy_vec_to_new_view("zl", it_curr->get_zl(), nlp_group);
copy_vec_to_new_view("zu", it_curr->get_zu(), nlp_group);
copy_vec_to_new_view("vl", it_curr->get_vl(), nlp_group);
copy_vec_to_new_view("vu", it_curr->get_vu(), nlp_group);

//quasi-Newton Hessian approximation

//algorithmic parameters for this state
//mu, iteration number
const double alg_params[] = {_mu};

}

void hiopAlgFilterIPMQuasiNewton::load_state_from_data_store(const sidre::DataStore* ds)
{
const sidre::Group* nlp_group = ds->getRoot()->getGroup("hiop solver");

copy_vec_from_view("x", it_curr->get_x(), nlp_group);
copy_vec_from_view("d", it_curr->get_d(), nlp_group);
copy_vec_from_view("sxl", it_curr->get_sxl(), nlp_group);
copy_vec_from_view("sxu", it_curr->get_sxu(), nlp_group);
copy_vec_from_view("sdl", it_curr->get_sdl(), nlp_group);
copy_vec_from_view("sdu", it_curr->get_sdu(), nlp_group);
copy_vec_from_view("yc", it_curr->get_yc(), nlp_group);
copy_vec_from_view("zl", it_curr->get_zl(), nlp_group);
copy_vec_from_view("zu", it_curr->get_zu(), nlp_group);
copy_vec_from_view("vl", it_curr->get_vl(), nlp_group);
copy_vec_from_view("vu", it_curr->get_vu(), nlp_group);


const double* x = it_curr->get_x()->local_data_host();
//destination = nlp_group->createViewAndAllocate("x", ::axom::sidre::DOUBLE_ID, size);
}

void hiopAlgFilterIPMQuasiNewton::checkpointing_stuff()
Expand All @@ -1566,12 +1631,20 @@ void hiopAlgFilterIPMQuasiNewton::checkpointing_stuff()
}
int chk_every_N = nlp->options->GetInteger("checkpoint_save_every_N_iter");
//check iteration
::std::string path = nlp->options->GetString("checkpoint_file");

if(iter_num>0 && iter_num % chk_every_N==0) {
using ::std::string;
// replace "#" in checkpointing file with iteration number
string path = nlp->options->GetString("checkpoint_file");
auto pos = path.find("#");
if(string::npos != pos) {
auto s_it_num = ::std::to_string(iter_num);
path.replace(pos, 1, s_it_num);
}


// replace #

nlp->log->printf(hovSummary, "Saving checkpoint at iter %d in '%s'.\n", iter_num, path.c_str());
//actual checkpointing via axom::sidre
save_state_to_file(path);
}
}
#endif // HIOP_USE_AXOM

Expand Down
31 changes: 18 additions & 13 deletions src/Optimization/hiopAlgFilterIPM.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,38 +352,43 @@ class hiopAlgFilterIPMQuasiNewton : public hiopAlgFilterIPMBase
virtual void save_state_to_data_store(::axom::sidre::DataStore* data_store);
virtual void load_state_from_data_store(const ::axom::sidre::DataStore* data_store);

static constexpr char default_state_filename[] = "hiop_qn_state.sidre";

/**
* @brief save the state of the algorithm to the file
* @brief Save the state of the algorithm to the file
* @param path the name of the file
*
* @details
* Internally, HiOp uses axom::sidre::DataStore, which is saved to the file. If argument is the
* empty string, HiOp will attempt saving the state to the path specified by default_state_filename
* static member.
* Internally, HiOp uses axom::sidre::DataStore and sidre's scalable IO.
*/
void save_state_to_file(const ::std::string& path="");
void save_state_to_file(const ::std::string& path);

/**
* @brief load the state of the algorithm from file
* @param path the name of the file to load from
*
* @details
* The file should contains a axom::sidre::DataStore that was previously saved using save_state_to_file().
* If argument is the empty string, HiOp will attempt loading state from the path specified by
* default_state_filename static member.
*
* The file should contains a axom::sidre::DataStore that was previously saved using
* save_state_to_file().
*/
void load_state_from_file(const ::std::string& path="");
void load_state_from_file(const ::std::string& path);
#endif // HIOP_USE_AXOM
private:
virtual void outputIteration(int lsStatus, int lsNum, int use_soc = 0, int use_fr = 0);

#ifdef HIOP_USE_AXOM
///@brief The options-based logic for saving checkpoint and the call to save_state().
void checkpointing_stuff();
#endif

/**
* @brief Copy HiOp vector to a (new) axom::sidre::View.
*
* @details A new view is created/allocated within the sidre view. Pointer is managed by the DataStore.
*/
void copy_vec_to_new_view(const ::std::string& name, const hiopVector* vec, ::axom::sidre::Group* nlp_group);

/// Copy content of the named sidre view into HiOp Vector.
void copy_vec_from_view(const ::std::string& name, hiopVector* vec, const axom::sidre::Group* nlp_group);
#endif // HIOP_USE_AXOM

private:
hiopNlpDenseConstraints* nlpdc;
private:
Expand Down
2 changes: 1 addition & 1 deletion src/Utils/hiopOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1301,7 +1301,7 @@ void hiopOptionsNLP::register_options()
register_int_option("checkpoint_save_every_N_iter", 10, 1, 1e+6, msgcsN);

constexpr char msgcf[] = "Path to checkpoint file. If present character '#' will be replaced "
"with iteration number.";
"with the iteration number.";
register_str_option("checkpoint_file", "hiop_state_#.chk", msgcf);

constexpr char msgclos[] = "On (re)start the NLP solver will load checkpoint file "
Expand Down

0 comments on commit d497955

Please sign in to comment.