From 3dc19d6fdf4085a8d63ef0d60f2be90d1e41d947 Mon Sep 17 00:00:00 2001 From: Andreas Singraber Date: Sun, 6 Sep 2020 00:42:01 +0200 Subject: [PATCH 1/9] Add keyword to select decoupling type. --- src/libnnp/Settings.cpp | 1 + src/libnnptrain/Training.cpp | 42 ++++++++++++++++++++++++++++++++++++ src/libnnptrain/Training.h | 17 +++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/src/libnnp/Settings.cpp b/src/libnnp/Settings.cpp index 68c0638d8..7f6a2c2ca 100644 --- a/src/libnnp/Settings.cpp +++ b/src/libnnp/Settings.cpp @@ -73,6 +73,7 @@ map const createKnownKeywordsMap() m["jacobian_mode" ] = ""; m["update_strategy" ] = ""; m["selection_mode" ] = ""; + m["decoupling_type" ] = ""; m["task_batch_size_energy" ] = ""; m["task_batch_size_force" ] = ""; m["gradient_type" ] = ""; diff --git a/src/libnnptrain/Training.cpp b/src/libnnptrain/Training.cpp index 5cd08a80e..94a5acc92 100644 --- a/src/libnnptrain/Training.cpp +++ b/src/libnnptrain/Training.cpp @@ -40,6 +40,7 @@ Training::Training() : Dataset(), jacobianMode (JM_SUM ), updateStrategy (US_COMBINED ), selectionMode (SM_RANDOM ), + decouplingType (DT_GLOBAL ), hasUpdaters (false ), hasStructures (false ), useForces (false ), @@ -991,6 +992,47 @@ void Training::setupTraining() { kalmanType = (KalmanFilter::KalmanType) atoi(settings["kalman_type"].c_str()); + decouplingType = (DecouplingType) + atoi(settings["decoupling_type"].c_str()); + if (decouplingType == DT_GLOBAL) + { + log << strpr("No decoupling (GEKF) selected: DT_GLOBAL (%d).\n", + decouplingType); + } + else if (decouplingType == DT_ELEMENT) + { + log << strpr("Per-element decoupling (ED-GEKF) selected: " + "DT_ELEMENT (%d).\n", decouplingType); + } + else if (decouplingType == DT_LAYER) + { + log << strpr("Per-layer decoupling selected: " + "DT_LAYER (%d).\n", decouplingType); + } + else if (decouplingType == DT_NODE) + { + log << strpr("Per-node decoupling (NDEKF) selected: " + "DT_NODE (%d).\n", decouplingType); + } + else if (decouplingType == DT_FULL) + { + log << strpr("Full (per-weight) decoupling selected: " + "DT_FULL (%d).\n", decouplingType); + } + else + { + throw runtime_error("ERROR: Unknown Kalman filter decoupling " + "type.\n"); + } + if (decouplingType != DT_GLOBAL && + (parallelMode != PM_TRAIN_ALL || updateStrategy != US_COMBINED)) + { + throw runtime_error(strpr("ERROR: Kalman filter decoupling works " + "only in conjunction with" + "\"parallel_mode %d\" and " + "\"update_strategy %d\".\n", + PM_TRAIN_ALL, US_COMBINED)); + } } for (size_t i = 0; i < numUpdaters; ++i) diff --git a/src/libnnptrain/Training.h b/src/libnnptrain/Training.h index 1ae678255..93022ebc8 100644 --- a/src/libnnptrain/Training.h +++ b/src/libnnptrain/Training.h @@ -110,6 +110,21 @@ class Training : public Dataset SM_THRESHOLD }; + /// Kalman filter decoupling type (requires UT_KF and US_COMBINED). + enum DecouplingType + { + /// No decoupling, global extended Kalman filter (GEKF). + DT_GLOBAL, + /// Per-element decoupling, ED-GEKF (doi.org/10.1021/acs.jctc.5b00211). + DT_ELEMENT, + /// Layer decoupling. + DT_LAYER, + /// Node decoupling, NDEKF. + DT_NODE, + /// Full decoupling, FDEKF. + DT_FULL + }; + /// Constructor. Training(); /// Destructor, updater vector needs to be cleaned. @@ -309,6 +324,8 @@ class Training : public Dataset UpdateStrategy updateStrategy; /// Selection mode for update candidates. SelectionMode selectionMode; + /// Kalman filter decoupling type if KF training selected. + DecouplingType decouplingType; /// If this rank performs weight updates. bool hasUpdaters; /// If this rank holds structure information. From dce6dd9130c7f9bb74e489945e03e62e6f5c1fad Mon Sep 17 00:00:00 2001 From: Andreas Singraber Date: Sun, 6 Sep 2020 00:55:31 +0200 Subject: [PATCH 2/9] Fix examples with new keyword. --- examples/input.nn.recommended | 1 + examples/nnp-train/Cu2S_PBE/input.nn | 1 + examples/nnp-train/H2O_RPBE-D3/input.nn | 1 + examples/nnp-train/LJ/input.nn | 1 + 4 files changed, 4 insertions(+) diff --git a/examples/input.nn.recommended b/examples/input.nn.recommended index e53934e0f..a0030bf52 100644 --- a/examples/input.nn.recommended +++ b/examples/input.nn.recommended @@ -35,6 +35,7 @@ parallel_mode 0 # Training parallelization used ( jacobian_mode 1 # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian). update_strategy 0 # Update strategy (0 = Combined, 1 = Per-element). selection_mode 2 # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold). +decoupling_type 0 # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF). task_batch_size_energy 1 # Number of energy update candidates prepared per task for each update (0 = Entire training set). task_batch_size_force 1 # Number of force update candidates prepared per task for each update (0 = Entire training set). memorize_symfunc_results # Keep symmetry function results in memory. diff --git a/examples/nnp-train/Cu2S_PBE/input.nn b/examples/nnp-train/Cu2S_PBE/input.nn index b4aa9333e..23eccf6db 100644 --- a/examples/nnp-train/Cu2S_PBE/input.nn +++ b/examples/nnp-train/Cu2S_PBE/input.nn @@ -44,6 +44,7 @@ parallel_mode 0 # Training parallelization used ( jacobian_mode 1 # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian). update_strategy 0 # Update strategy (0 = Combined, 1 = Per-element). selection_mode 2 # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold). +decoupling_type 0 # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF). task_batch_size_energy 1 # Number of energy update candidates prepared per task for each update (0 = Entire training set). task_batch_size_force 1 # Number of force update candidates prepared per task for each update (0 = Entire training set). memorize_symfunc_results # Keep symmetry function results in memory. diff --git a/examples/nnp-train/H2O_RPBE-D3/input.nn b/examples/nnp-train/H2O_RPBE-D3/input.nn index b2666eecd..05b3fcacb 100644 --- a/examples/nnp-train/H2O_RPBE-D3/input.nn +++ b/examples/nnp-train/H2O_RPBE-D3/input.nn @@ -44,6 +44,7 @@ parallel_mode 0 # Training parallelization used ( jacobian_mode 1 # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian). update_strategy 0 # Update strategy (0 = Combined, 1 = Per-element). selection_mode 2 # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold). +decoupling_type 0 # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF). task_batch_size_energy 1 # Number of energy update candidates prepared per task for each update (0 = Entire training set). task_batch_size_force 1 # Number of force update candidates prepared per task for each update (0 = Entire training set). memorize_symfunc_results # Keep symmetry function results in memory. diff --git a/examples/nnp-train/LJ/input.nn b/examples/nnp-train/LJ/input.nn index d42118f4b..2a1812341 100644 --- a/examples/nnp-train/LJ/input.nn +++ b/examples/nnp-train/LJ/input.nn @@ -34,6 +34,7 @@ parallel_mode 0 # Training parallelization used ( jacobian_mode 1 # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian). update_strategy 0 # Update strategy (0 = Combined, 1 = Per-element). selection_mode 2 # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold). +decoupling_type 0 # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF). task_batch_size_energy 1 # Number of energy update candidates prepared per task for each update (0 = Entire training set). task_batch_size_force 1 # Number of force update candidates prepared per task for each update (0 = Entire training set). memorize_symfunc_results # Keep symmetry function results in memory. From d0a6af3bc6585718085362717342e42959f5bf52 Mon Sep 17 00:00:00 2001 From: Andreas Singraber Date: Tue, 8 Sep 2020 00:07:21 +0200 Subject: [PATCH 3/9] Added MPI communicator and decoupling group mask --- src/libnnptrain/KalmanFilter.cpp | 50 ++++++++++++++++++++++++++++++-- src/libnnptrain/KalmanFilter.h | 20 +++++++++++++ src/libnnptrain/Training.cpp | 24 +++++++++++++++ 3 files changed, 92 insertions(+), 2 deletions(-) diff --git a/src/libnnptrain/KalmanFilter.cpp b/src/libnnptrain/KalmanFilter.cpp index ab55a8419..48c7f4101 100644 --- a/src/libnnptrain/KalmanFilter.cpp +++ b/src/libnnptrain/KalmanFilter.cpp @@ -19,6 +19,7 @@ #include #include #include +#include // std::map using namespace Eigen; using namespace std; @@ -27,6 +28,8 @@ using namespace nnp; KalmanFilter::KalmanFilter(size_t const sizeState, KalmanType const type) : Updater(sizeState), + myRank (0 ), + numProcs (0 ), sizeObservation(0 ), numUpdates (0 ), epsilon (0.0 ), @@ -67,12 +70,47 @@ KalmanFilter::KalmanFilter(size_t const sizeState, // Prevent problems with unallocated K when log starts. K.resize(sizeState, sizeObservation); K.setZero(); + + // Set default decoupling group mask. + groupMask = new Map(0, sizeState); } KalmanFilter::~KalmanFilter() { } +void KalmanFilter::setupMPI(MPI_Comm* communicator) +{ + MPI_Comm_dup(*communicator, &comm); + MPI_Comm_rank(comm, &myRank); + MPI_Comm_size(comm, &numProcs); + + return; +} + +void KalmanFilter::setupDecoupling(int const* const mask) +{ + new (groupMask) Map(mask, sizeState); + + for (long i = 0; i < groupMask->size(); ++i) + { + groupMap[(*groupMask)(i)].push_back(i); + } + + // Check consistency of group map. + for (size_t i = 0; i < groupMap.size(); ++i) + { + if (groupMap.find(i) == groupMap.end()) + { + throw runtime_error(strpr("ERROR: Kalman filter decoupling group " + "mask is inconsistent, group %zu not " + "present.\n", i)); + } + } + + return; +} + void KalmanFilter::setSizeObservation(size_t const size) { sizeObservation = size; @@ -317,8 +355,8 @@ vector KalmanFilter::info() const if (type == KT_STANDARD) { v.push_back(strpr("KalmanType::KT_STANDARD (%d)\n", type)); - v.push_back(strpr("sizeState = %zu\n", sizeState)); - v.push_back(strpr("sizeObservation = %zu\n", sizeObservation)); + v.push_back(strpr("myRank = %d\n", myRank)); + v.push_back(strpr("numProcs = %d\n", numProcs)); v.push_back(strpr("epsilon = %12.4E\n", epsilon)); v.push_back(strpr("q0 = %12.4E\n", q0 )); v.push_back(strpr("qtau = %12.4E\n", qtau )); @@ -339,7 +377,15 @@ vector KalmanFilter::info() const v.push_back(strpr("lambda = %12.4E\n", lambda)); v.push_back(strpr("nu = %12.4E\n", nu )); } + v.push_back(strpr("sizeState = %zu\n", sizeState)); + v.push_back(strpr("sizeObservation = %zu\n", sizeObservation)); v.push_back(strpr("OpenMP threads used: %d\n", nbThreads())); + v.push_back(strpr("Number of decoupling groups: %zu\n", groupMap.size())); + for (size_t i = 0; i < groupMap.size(); ++i) + { + v.push_back(strpr(" - group %5zu size: %zu\n", + i, groupMap.at(i).size())); + } return v; } diff --git a/src/libnnptrain/KalmanFilter.h b/src/libnnptrain/KalmanFilter.h index 356cee949..003445649 100644 --- a/src/libnnptrain/KalmanFilter.h +++ b/src/libnnptrain/KalmanFilter.h @@ -50,6 +50,16 @@ class KalmanFilter : public Updater /** Destructor. */ virtual ~KalmanFilter(); + /** Basic MPI setup. + * + * @param[in] communicator Input communicator of calling program. + */ + void setupMPI(MPI_Comm* communicator); + /** Set decoupling group mask. + * + * @param[in] mask Input group mask provided via array of integers. + */ + void setupDecoupling(int const* const mask); /** Set observation vector size. * * @param[in] size Size of the observation vector. @@ -194,6 +204,10 @@ class KalmanFilter : public Updater private: /// Kalman filter type. KalmanType type; + /// My process ID. + int myRank; + /// Total number of MPI processors. + int numProcs; /// Size of observation (measurement) vector. std::size_t sizeObservation; /// Total number of updates performed. @@ -222,6 +236,10 @@ class KalmanFilter : public Updater double nu; /// Forgetting gain factor gamma for fading memory Kalman filter. double gamma; + /// Decoupling group map. + std::map> groupMap; + /// Decoupling group mask vector. + Eigen::Map* groupMask; /// State vector. Eigen::Map* w; /// Error vector. @@ -234,6 +252,8 @@ class KalmanFilter : public Updater Eigen::MatrixXd K; /// Intermediate result X = P . H. Eigen::MatrixXd X; + /// Global MPI communicator. + MPI_Comm comm; }; ////////////////////////////////// diff --git a/src/libnnptrain/Training.cpp b/src/libnnptrain/Training.cpp index 94a5acc92..106a9bb55 100644 --- a/src/libnnptrain/Training.cpp +++ b/src/libnnptrain/Training.cpp @@ -1050,6 +1050,30 @@ void Training::setupTraining() updaters.push_back( (Updater*)new KalmanFilter(numWeightsPerUpdater.at(i), kalmanType)); + KalmanFilter* u = dynamic_cast(updaters.back()); + u->setupMPI(&comm); + vector groupMask(weights.at(i).size(), 0); + if (decouplingType == DT_ELEMENT) + { + for (size_t i = 0; i < numElements; ++i) + { + fill(groupMask.begin() + weightsOffset.at(i), + groupMask.begin() + weightsOffset.at(i) + + elements.at(i). + neuralNetwork->getNumConnections(), + i); + } + } + else if (decouplingType == DT_LAYER) + { + } + else if (decouplingType == DT_NODE) + { + } + else if (decouplingType == DT_FULL) + { + } + u->setupDecoupling(groupMask.data()); } updaters.back()->setState(&(weights.at(i).front())); } From 31bcd583732bce1fd48d1fce227bfa62baa2d72e Mon Sep 17 00:00:00 2001 From: Andreas Singraber Date: Tue, 8 Sep 2020 17:35:13 +0200 Subject: [PATCH 4/9] Started with group mask for layer decoupling, WIP! --- src/libnnp/NeuralNetwork.cpp | 15 +++++++++++++++ src/libnnp/NeuralNetwork.h | 11 +++++++++++ src/libnnptrain/Training.cpp | 20 ++++++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/src/libnnp/NeuralNetwork.cpp b/src/libnnp/NeuralNetwork.cpp index e7b7808c6..be44be604 100644 --- a/src/libnnp/NeuralNetwork.cpp +++ b/src/libnnp/NeuralNetwork.cpp @@ -921,6 +921,21 @@ void NeuralNetwork::getNeuronStatistics(long* count, return; } +vector> NeuralNetwork::getLayerBoundaries() const +{ + vector> boundaries; + + for (int i = 0; i < numLayers - 1; ++i) + { + boundaries.push_back(make_pair(weightOffset[i], + weightOffset[i+1] - 1)); + } + boundaries.push_back(make_pair(weightOffset[numLayers - 1], + numConnections - 1)); + + return boundaries; +} + /* void NeuralNetwork::writeStatus(int element, int epoch) { diff --git a/src/libnnp/NeuralNetwork.h b/src/libnnp/NeuralNetwork.h index c56c58e9f..c9c499734 100644 --- a/src/libnnp/NeuralNetwork.h +++ b/src/libnnp/NeuralNetwork.h @@ -17,8 +17,10 @@ #ifndef NEURALNETWORK_H #define NEURALNETWORK_H +#include // std::size_t #include // std::ofstream #include // std::string +#include // std::pair #include // std::vector namespace nnp @@ -333,6 +335,15 @@ class NeuralNetwork * Counters and summation variables for neuron statistics are reset. */ void resetNeuronStatistics(); + /** Get layer boundaries in combined weight vector (for Kalman filter + * decoupling setup). + * + * @return Vector with layer boundaries as pair (begin, end), + * (numLayers + 1) points. + */ + std::vector> getLayerBoundaries() const; //void writeStatus(int, int); long getMemoryUsage(); /** Print neural network architecture. diff --git a/src/libnnptrain/Training.cpp b/src/libnnptrain/Training.cpp index 106a9bb55..396465336 100644 --- a/src/libnnptrain/Training.cpp +++ b/src/libnnptrain/Training.cpp @@ -1066,6 +1066,26 @@ void Training::setupTraining() } else if (decouplingType == DT_LAYER) { + size_t index = 0; + for (size_t i = 0; i < numElements; ++i) + { + for (auto b : elements.at(i).neuralNetwork + ->getLayerBoundaries()) + { + log << strpr("%zu %zu\n", b.first, b.second); + fill(groupMask.begin() + + weightsOffset.at(i) + b.first, + groupMask.begin() + + weightsOffset.at(i) + b.second, + index); + index++; + } + } + index = 0; + for (auto m : groupMask) + { + log << strpr("%zu %zu\n", index, m); + } } else if (decouplingType == DT_NODE) { From 33719c4dfa0625050dab1483924f5e4cb3e9f0d7 Mon Sep 17 00:00:00 2001 From: Andreas Singraber Date: Wed, 9 Sep 2020 00:30:30 +0200 Subject: [PATCH 5/9] All decoupling group masks look correct --- src/libnnp/NeuralNetwork.cpp | 34 ++++++++++++++++++++++++++--- src/libnnp/NeuralNetwork.h | 8 +++++++ src/libnnptrain/Training.cpp | 42 +++++++++++++++++++++++++++++------- 3 files changed, 73 insertions(+), 11 deletions(-) diff --git a/src/libnnp/NeuralNetwork.cpp b/src/libnnp/NeuralNetwork.cpp index be44be604..42d4ac390 100644 --- a/src/libnnp/NeuralNetwork.cpp +++ b/src/libnnp/NeuralNetwork.cpp @@ -21,6 +21,8 @@ #include // fprintf, stderr #include // exit, EXIT_FAILURE, rand, srand #include // std::numeric_limits +#include // std::iota +#include // std::valarray, std::slice #define EXP_LIMIT 35.0 @@ -925,17 +927,43 @@ vector> NeuralNetwork::getLayerBoundaries() const { vector> boundaries; - for (int i = 0; i < numLayers - 1; ++i) + for (int i = 0; i < numLayers - 2; ++i) { boundaries.push_back(make_pair(weightOffset[i], - weightOffset[i+1] - 1)); + weightOffset[i + 1] - 1)); } - boundaries.push_back(make_pair(weightOffset[numLayers - 1], + boundaries.push_back(make_pair(weightOffset[numLayers - 2], numConnections - 1)); return boundaries; } +vector> NeuralNetwork::getNodeWeightIndices() const +{ + vector> indices; + valarray wlist(numConnections); + + // Fill array with increasing numbers from 0 to numConnections - 1. + iota(begin(wlist), end(wlist), 0); + + for (int i = 1; i < numLayers; i++) + { + for (int j = 0; j < layers[i].numNeurons; j++) + { + indices.push_back(vector()); + // Use slice function to get the right weight indices. + valarray v = wlist[slice(weightOffset[i - 1] + j, + layers[i].numNeuronsPrevLayer, + layers[i].numNeurons)]; + indices.back().assign(begin(v), end(v)); + // Add bias weight. + indices.back().push_back(biasOffset[i - 1] + j); + } + } + + return indices; +} + /* void NeuralNetwork::writeStatus(int element, int epoch) { diff --git a/src/libnnp/NeuralNetwork.h b/src/libnnp/NeuralNetwork.h index c9c499734..61f30ce09 100644 --- a/src/libnnp/NeuralNetwork.h +++ b/src/libnnp/NeuralNetwork.h @@ -344,6 +344,14 @@ class NeuralNetwork std::vector> getLayerBoundaries() const; + /** Get indices of weights associated with nodes (for Kalman filter + * decoupling setup). + * + * @return Vector of each nodes weight indices. + */ + std::vector< + std::vector< + std::size_t>> getNodeWeightIndices() const; //void writeStatus(int, int); long getMemoryUsage(); /** Print neural network architecture. diff --git a/src/libnnptrain/Training.cpp b/src/libnnptrain/Training.cpp index 396465336..2ed36e190 100644 --- a/src/libnnptrain/Training.cpp +++ b/src/libnnptrain/Training.cpp @@ -1052,8 +1052,13 @@ void Training::setupTraining() kalmanType)); KalmanFilter* u = dynamic_cast(updaters.back()); u->setupMPI(&comm); - vector groupMask(weights.at(i).size(), 0); - if (decouplingType == DT_ELEMENT) + vector groupMask(weights.at(i).size(), + numeric_limits::max()); + if (decouplingType == DT_GLOBAL) + { + fill(groupMask.begin(), groupMask.end(), 0); + } + else if (decouplingType == DT_ELEMENT) { for (size_t i = 0; i < numElements; ++i) { @@ -1076,23 +1081,44 @@ void Training::setupTraining() fill(groupMask.begin() + weightsOffset.at(i) + b.first, groupMask.begin() - + weightsOffset.at(i) + b.second, + + weightsOffset.at(i) + b.second + 1, index); index++; } } - index = 0; - for (auto m : groupMask) - { - log << strpr("%zu %zu\n", index, m); - } } else if (decouplingType == DT_NODE) { + size_t index = 0; + for (size_t i = 0; i < numElements; ++i) + { + vector> v = elements.at(i) + .neuralNetwork->getNodeWeightIndices(); + for (auto n : v) + { + for (auto w : n) + { + groupMask.at(weightsOffset.at(i) + w) = index; + } + index++; + } + } } else if (decouplingType == DT_FULL) { + size_t index = 0; + for (auto& m : groupMask) + { + m = index; + index++; + } } + //size_t index = 0; + //for (auto m : groupMask) + //{ + // log << strpr("%zu %zu\n", index, m); + // index++; + //} u->setupDecoupling(groupMask.data()); } updaters.back()->setState(&(weights.at(i).front())); From 5c540dba27cb1006bd87f2c286ff65bfbe49e812 Mon Sep 17 00:00:00 2001 From: Andreas Singraber Date: Tue, 15 Sep 2020 23:03:17 +0200 Subject: [PATCH 6/9] Introduced new NN weight memory layout - Weights and biases grouped by neuron. - Simplifies node decoupling implementation => block submatrices. - Old weight ordering available via compile flag (-DALTERNATIVE_WEIGHT_ORDERING) - Weight file output remains unchanged. --- src/libnnp/Mode.cpp | 4 +- src/libnnp/NeuralNetwork.cpp | 275 +++++++++++++++++++++++++++++-- src/libnnp/NeuralNetwork.h | 98 +++++++++-- src/libnnptrain/KalmanFilter.cpp | 27 +++ src/libnnptrain/KalmanFilter.h | 6 + src/libnnptrain/Training.cpp | 73 ++++++-- src/makefile.gnu | 6 +- src/makefile.intel | 3 + src/makefile.llvm | 3 + 9 files changed, 443 insertions(+), 52 deletions(-) diff --git a/src/libnnp/Mode.cpp b/src/libnnp/Mode.cpp index 475ec8ff3..bd7359734 100644 --- a/src/libnnp/Mode.cpp +++ b/src/libnnp/Mode.cpp @@ -884,7 +884,9 @@ void Mode::setupNeuralNetworkWeights(string const& fileNameFormat) weights.push_back(atof(splitLine.at(0).c_str())); } } - it->neuralNetwork->setConnections(&(weights.front())); + // Attention: need alternative (old) ordering scheme here for + // backward compatibility! + it->neuralNetwork->setConnectionsAO(&(weights.front())); file.close(); } diff --git a/src/libnnp/NeuralNetwork.cpp b/src/libnnp/NeuralNetwork.cpp index 42d4ac390..748835b4f 100644 --- a/src/libnnp/NeuralNetwork.cpp +++ b/src/libnnp/NeuralNetwork.cpp @@ -82,12 +82,26 @@ NeuralNetwork(int numLayers, weightOffset[i] = weightOffset[i-1] + (layers[i-1].numNeurons + 1) * layers[i].numNeurons; } +#ifndef ALTERNATIVE_WEIGHT_ORDERING + biasOffset = new int*[numLayers-1]; + for (int i = 0; i < numLayers-1; i++) + { + biasOffset[i] = new int[layers[i+1].numNeurons]; + for (int j = 0; j < layers[i+1].numNeurons; j++) + { + biasOffset[i][j] = weightOffset[i] + + (layers[i+1].numNeuronsPrevLayer + 1) + * (j + 1) - 1; + } + } +#else biasOffset = new int[numLayers-1]; for (int i = 0; i < numLayers-1; i++) { biasOffset[i] = weightOffset[i] + layers[i+1].numNeurons * layers[i].numNeurons; } +#endif biasOnlyOffset = new int[numLayers-1]; biasOnlyOffset[0] = 0; for (int i = 1; i < numLayers-1; i++) @@ -108,7 +122,15 @@ NeuralNetwork::~NeuralNetwork() } delete[] layers; delete[] weightOffset; +#ifndef ALTERNATIVE_WEIGHT_ORDERING + for (int i = 0; i < numLayers - 1; i++) + { + delete[] biasOffset[i]; + } + delete[] biasOffset; +#else delete[] biasOffset; +#endif delete[] biasOnlyOffset; } @@ -150,6 +172,27 @@ void NeuralNetwork::setConnections(double const* const& connections) { int count = 0; + for (int i = 1; i < numLayers; i++) + { + for (int j = 0; j < layers[i].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + { + layers[i].neurons[j].weights[k] = connections[count]; + count++; + } + layers[i].neurons[j].bias = connections[count]; + count++; + } + } + + return; +} + +void NeuralNetwork::setConnectionsAO(double const* const& connections) +{ + int count = 0; + for (int i = 1; i < numLayers; i++) { for (int j = 0; j < layers[i].numNeuronsPrevLayer; j++) @@ -174,6 +217,27 @@ void NeuralNetwork::getConnections(double* connections) const { int count = 0; + for (int i = 1; i < numLayers; i++) + { + for (int j = 0; j < layers[i].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + { + connections[count] = layers[i].neurons[j].weights[k] ; + count++; + } + connections[count] = layers[i].neurons[j].bias; + count++; + } + } + + return; +} + +void NeuralNetwork::getConnectionsAO(double* connections) const +{ + int count = 0; + for (int i = 1; i < numLayers; i++) { for (int j = 0; j < layers[i].numNeuronsPrevLayer; j++) @@ -430,6 +494,52 @@ void NeuralNetwork::calculateDEdG(double *dEdG) const void NeuralNetwork::calculateDEdc(double* dEdc) const { +#ifndef ALTERNATIVE_WEIGHT_ORDERING + int count = 0; + + for (int i = 0; i < numConnections; i++) + { + dEdc[i] = 0.0; + } + + for (int i = 0; i < outputLayer->numNeurons; i++) + { + dEdc[biasOffset[numLayers-2][i]] = outputLayer->neurons[i].dfdx; + if (normalizeNeurons) + { + dEdc[biasOffset[numLayers-2][i]] /= + outputLayer->numNeuronsPrevLayer; + } + } + + for (int i = numLayers - 2; i >= 0; i--) + { + count = 0; + for (int j = 0; j < layers[i+1].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeurons; k++) + { + dEdc[weightOffset[i]+count] = dEdc[biasOffset[i][j]] + * layers[i].neurons[k].value; + count++; + if (i >= 1) + { + dEdc[biasOffset[i-1][k]] += dEdc[biasOffset[i][j]] + * layers[i+1].neurons[j].weights[k] + * layers[i].neurons[k].dfdx; + } + } + count++; + } + if (normalizeNeurons && i >= 1) + { + for (int k = 0; k < layers[i].numNeurons; k++) + { + dEdc[biasOffset[i-1][k]] /= layers[i].numNeuronsPrevLayer; + } + } + } +#else int count = 0; for (int i = 0; i < numConnections; i++) @@ -470,6 +580,7 @@ void NeuralNetwork::calculateDEdc(double* dEdc) const } } } +#endif return; } @@ -514,6 +625,67 @@ void NeuralNetwork::calculateDFdc(double* dFdc, } void NeuralNetwork::writeConnections(std::ofstream& file) const +{ + // File header. + vector title; + vector colName; + vector colInfo; + vector colSize; + title.push_back("Neural network connection values (weights and biases)."); + colSize.push_back(24); + colName.push_back("connection"); + colInfo.push_back("Neural network connection value."); + colSize.push_back(1); + colName.push_back("t"); + colInfo.push_back("Connection type (a = weight, b = bias)."); + colSize.push_back(9); + colName.push_back("index"); + colInfo.push_back("Index enumerating weights."); + colSize.push_back(5); + colName.push_back("l_s"); + colInfo.push_back("Starting point layer (end point layer for biases)."); + colSize.push_back(5); + colName.push_back("n_s"); + colInfo.push_back("Starting point neuron in starting layer (end point " + "neuron for biases)."); + colSize.push_back(5); + colName.push_back("l_e"); + colInfo.push_back("End point layer."); + colSize.push_back(5); + colName.push_back("n_e"); + colInfo.push_back("End point neuron in end layer."); + appendLinesToFile(file, + createFileHeader(title, colSize, colName, colInfo)); + + int count = 0; + for (int i = 1; i < numLayers; i++) + { + for (int j = 0; j < layers[i].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + { + count++; + file << strpr("%24.16E a %9d %5d %5d %5d %5d\n", + layers[i].neurons[j].weights[k], + count, + i - 1, + k + 1, + i, + j + 1); + } + count++; + file << strpr("%24.16E b %9d %5d %5d\n", + layers[i].neurons[j].bias, + count, + i, + j + 1); + } + } + + return; +} + +void NeuralNetwork::writeConnectionsAO(std::ofstream& file) const { // File header. vector title; @@ -647,6 +819,67 @@ void NeuralNetwork::calculateD2EdGdc(int index, double const* const& dEdb, double* d2EdGdc) const { +#ifndef ALTERNATIVE_WEIGHT_ORDERING + int count = 0; + + for (int i = 0; i < outputLayer->numNeurons; i++) + { + d2EdGdc[biasOffset[numLayers-2][i]] = outputLayer->neurons[i].d2fdx2 + * outputLayer->neurons[i].dxdG; + if (normalizeNeurons) + { + d2EdGdc[biasOffset[numLayers-2][i]] /= + outputLayer->numNeuronsPrevLayer; + } + } + + for (int i = numLayers-2; i >= 0; i--) + { + count = 0; + for (int j = 0; j < layers[i+1].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeurons; k++) + { + if (i == 0) + { + d2EdGdc[weightOffset[i]+count] = d2EdGdc[biasOffset[i][j]] + * layers[i].neurons[k].value; + if (k == index) + { + d2EdGdc[weightOffset[i]+count] += + dEdb[biasOnlyOffset[i]+j]; + } + } + else + { + d2EdGdc[weightOffset[i]+count] = d2EdGdc[biasOffset[i][j]] + * layers[i].neurons[k].value + + dEdb[biasOnlyOffset[i]+j] * layers[i].neurons[k].dfdx + * layers[i].neurons[k].dxdG; + } + count++; + if (i >= 1) + { + d2EdGdc[biasOffset[i-1][k]] += + layers[i+1].neurons[j].weights[k] + * (d2EdGdc[biasOffset[i][j]] + * layers[i].neurons[k].dfdx + + dEdb[biasOnlyOffset[i]+j] + * layers[i].neurons[k].d2fdx2 + * layers[i].neurons[k].dxdG); + } + } + count++; + } + for (int k = 0; k < layers[i].numNeurons; k++) + { + if (normalizeNeurons && i >= 1) + { + d2EdGdc[biasOffset[i-1][k]] /= layers[i].numNeuronsPrevLayer; + } + } + } +#else int count = 0; for (int i = 0; i < outputLayer->numNeurons; i++) @@ -701,6 +934,7 @@ void NeuralNetwork::calculateD2EdGdc(int index, } } } +#endif return; } @@ -938,30 +1172,29 @@ vector> NeuralNetwork::getLayerBoundaries() const return boundaries; } -vector> NeuralNetwork::getNodeWeightIndices() const +vector> NeuralNetwork::getNeuronBoundaries() const { - vector> indices; - valarray wlist(numConnections); - - // Fill array with increasing numbers from 0 to numConnections - 1. - iota(begin(wlist), end(wlist), 0); +#ifndef ALTERNATIVE_WEIGHT_ORDERING + vector> boundaries; - for (int i = 1; i < numLayers; i++) + for (int i = 0; i < numLayers - 1; ++i) { - for (int j = 0; j < layers[i].numNeurons; j++) + boundaries.push_back(make_pair(weightOffset[i], + biasOffset[i][0])); + for (int j = 0; j < layers[i+1].numNeurons - 1; ++j) { - indices.push_back(vector()); - // Use slice function to get the right weight indices. - valarray v = wlist[slice(weightOffset[i - 1] + j, - layers[i].numNeuronsPrevLayer, - layers[i].numNeurons)]; - indices.back().assign(begin(v), end(v)); - // Add bias weight. - indices.back().push_back(biasOffset[i - 1] + j); + boundaries.push_back(make_pair(biasOffset[i][j] + 1, + biasOffset[i][j+1])); + } } +#else + throw runtime_error("ERROR: Neuron boundaries not available with " + "alternative (old) weight memory layout, recompile " + "without -DALTERNATIVE_WEIGHT_ORDERING.\n"); +#endif - return indices; + return boundaries; } /* @@ -1012,7 +1245,15 @@ long NeuralNetwork::getMemoryUsage() int numNeurons = getNumNeurons(); mem += (numLayers - 1) * sizeof(int); // weightOffset +#ifndef ALTERNATIVE_WEIGHT_ORDERING + mem += (numLayers - 1) * sizeof(int*); // biasOffset + for (int i = 0; i < numLayers - 1; i++) + { + mem += layers[i].numNeurons * sizeof(int); + } +#else mem += (numLayers - 1) * sizeof(int); // biasOffset +#endif mem += (numLayers - 1) * sizeof(int); // biasOnlyOffset mem += numLayers * sizeof(Layer); // layers mem += numNeurons * sizeof(Neuron); // neurons diff --git a/src/libnnp/NeuralNetwork.h b/src/libnnp/NeuralNetwork.h index 61f30ce09..1e5284e3d 100644 --- a/src/libnnp/NeuralNetwork.h +++ b/src/libnnp/NeuralNetwork.h @@ -154,29 +154,70 @@ class NeuralNetwork * @f[ * \underbrace{ * \overbrace{ - * a^{01}_{00}, \ldots, a^{01}_{0m_1}, - * a^{01}_{10}, \ldots, a^{01}_{1m_1}, + * a^{01}_{00}, \ldots, a^{01}_{n_00}, b^{1}_{0} + * }^{\text{Neuron}^{1}_{0}} + * \overbrace{ + * a^{01}_{01}, \ldots, a^{01}_{n_01}, b^{1}_{1} + * }^{\text{Neuron}^{1}_{1}} * \ldots, - * a^{01}_{n_00}, \ldots, a^{01}_{n_0m_1}, + * \overbrace{ + * a^{01}_{0n_1}, \ldots, a^{01}_{n_0n_1}, b^{1}_{n_1} + * }^{\text{Neuron}^{1}_{n_1}} + * }_{\text{Layer } 0 \rightarrow 1}, + * \underbrace{ + * a^{12}_{00}, \ldots, b^{2}_{n_2} + * }_{\text{Layer } 1 \rightarrow 2}, + * \ldots, + * \underbrace{ + * a^{p-1,p}_{00}, \ldots, b^{p}_{n_p} + * }_{\text{Layer } p-1 \rightarrow p} + * @f] + * where @f$a^{i-1, i}_{jk}@f$ is the weight connecting neuron + * @f$j@f$ in layer @f$i-1@f$ to neuron @f$k@f$ in layer + * @f$i@f$ and @f$b^{i}_{k}@f$ is the bias assigned to neuron + * @f$k@f$ in layer @f$i@f$. + * + * This ordering scheme is used internally to store weights in memory. + * Because all weights and biases connected to a neuron are aligned in a + * continuous block this memory layout simplifies the implementation of + * node-decoupled Kalman filter training (NDEKF). However, for backward + * compatibility reasons this layout is NOT used for storing weights on + * disk (see setConnectionsAO()). + */ + void setConnections(double const* const& connections); + /** Set neural network weights and biases (alternative ordering) + * + * @param[in] connections One-dimensional array with neural network + * connections in the following order: + * @f[ + * \underbrace{ + * \overbrace{ + * a^{01}_{00}, \ldots, a^{01}_{0n_1}, + * a^{01}_{10}, \ldots, a^{01}_{1n_1}, + * \ldots, + * a^{01}_{n_00}, \ldots, a^{01}_{n_0n_1}, * }^{\text{Weights}} * \overbrace{ - * b^{1}_{0}, \ldots, b^{1}_{m_1} + * b^{1}_{0}, \ldots, b^{1}_{n_1} * }^{\text{Biases}} * }_{\text{Layer } 0 \rightarrow 1}, * \underbrace{ - * a^{12}_{00}, \ldots, b^{2}_{m_2} + * a^{12}_{00}, \ldots, b^{2}_{n_2} * }_{\text{Layer } 1 \rightarrow 2}, * \ldots, * \underbrace{ - * a^{p-1,p}_{00}, \ldots, b^{p}_{m_p} + * a^{p-1,p}_{00}, \ldots, b^{p}_{n_p} * }_{\text{Layer } p-1 \rightarrow p} * @f] * where @f$a^{i-1, i}_{jk}@f$ is the weight connecting neuron * @f$j@f$ in layer @f$i-1@f$ to neuron @f$k@f$ in layer * @f$i@f$ and @f$b^{i}_{k}@f$ is the bias assigned to neuron * @f$k@f$ in layer @f$i@f$. + * + * This memory layout is used when storing weights on disk. */ - void setConnections(double const* const& connections); + void setConnectionsAO( + double const* const& connections); /** Get neural network weights and biases. * * @param[out] connections One-dimensional array with neural network @@ -184,6 +225,13 @@ class NeuralNetwork * #setConnections()) */ void getConnections(double* connections) const; + /** Get neural network weights and biases (alternative ordering). + * + * @param[out] connections One-dimensional array with neural network + * connections (same order as described in + * #setConnectionsAO()) + */ + void getConnectionsAO(double* connections) const; /** Initialize connections with random numbers. * * @param[in] seed Random number generator seed. @@ -213,14 +261,14 @@ class NeuralNetwork ModificationScheme modificationScheme, double parameter1, double parameter2); - /** Set neural network input layer node values. + /** Set neural network input layer neuron values. * - * @param[in] input Input layer node values. + * @param[in] input Input layer neuron values. */ void setInput(double const* const& input) const; - /** Get neural network output layer node values. + /** Get neural network output layer neuron values. * - * @param[out] output Output layer node values. + * @param[out] output Output layer neuron values. */ void getOutput(double* output) const; /** Propagate input information through all layers. @@ -297,10 +345,20 @@ class NeuralNetwork void calculateDFdc(double* dFdc, double const* const& dGdxyz) const; /** Write connections to file. + * + * __CAUTION__: For compatibility reasons this format is NOT used for + * storing NN weights to disk! * * @param[in,out] file File stream to write to. */ void writeConnections(std::ofstream& file) const; + /** Write connections to file (alternative ordering). + * + * This NN weights ordering layout is used when writing weights to disk. + * + * @param[in,out] file File stream to write to. + */ + void writeConnectionsAO(std::ofstream& file) const; /** Return gathered neuron statistics. * * @param[out] count Number of neuron output value calculations. @@ -339,19 +397,20 @@ class NeuralNetwork * decoupling setup). * * @return Vector with layer boundaries as pair (begin, end), - * (numLayers + 1) points. + * (numLayers - 1) points. */ std::vector> getLayerBoundaries() const; - /** Get indices of weights associated with nodes (for Kalman filter + /** Get neuron boundaries in combined weight vector (for Kalman filter * decoupling setup). * - * @return Vector of each nodes weight indices. + * @return Vector with neuron boundaries as pair (begin, end), + * (numNeurons - inputLayer->numNeurons) points. */ - std::vector< - std::vector< - std::size_t>> getNodeWeightIndices() const; + std::vector> getNeuronBoundaries() const; //void writeStatus(int, int); long getMemoryUsage(); /** Print neural network architecture. @@ -423,8 +482,13 @@ class NeuralNetwork int numHiddenLayers; /// Offset adress of weights per layer in combined weights+bias array. int* weightOffset; +#ifndef ALTERNATIVE_WEIGHT_ORDERING + /// Offset adress of each neuron per layer in combined weights+bias array. + int** biasOffset; +#else /// Offset adress of biases per layer in combined weights+bias array. int* biasOffset; +#endif /// Offset adress of biases per layer in bias only array. int* biasOnlyOffset; /// Pointer to input layer. diff --git a/src/libnnptrain/KalmanFilter.cpp b/src/libnnptrain/KalmanFilter.cpp index 48c7f4101..345116072 100644 --- a/src/libnnptrain/KalmanFilter.cpp +++ b/src/libnnptrain/KalmanFilter.cpp @@ -108,6 +108,26 @@ void KalmanFilter::setupDecoupling(int const* const mask) } } + numGroupsPerProc.resize(numProcs, 0); + groupOffsetPerProc.resize(numProcs, 0); + int quotient = groupMap.size() / numProcs; + int remainder = groupMap.size() % numProcs; + int groupSum = 0; + for (int i = 0; i < numProcs; i++) + { + numGroupsPerProc.at(i) = quotient; + if (remainder > 0 && i < remainder) + { + numGroupsPerProc.at(i)++; + } + groupOffsetPerProc.at(i) = groupSum; + groupSum += numGroupsPerProc.at(i); + } + for (size_t i = 0; i < numGroupsPerProc.at(myRank); ++i) + { + myGroups.push_back(groupOffsetPerProc.at(myRank) + i); + } + return; } @@ -386,6 +406,13 @@ vector KalmanFilter::info() const v.push_back(strpr(" - group %5zu size: %zu\n", i, groupMap.at(i).size())); } + v.push_back(strpr("Number of decoupling groups of proc %d: %zu\n", + myRank, myGroups.size())); + for (size_t i = 0; i < myGroups.size(); ++i) + { + v.push_back(strpr(" - group %5zu size: %zu\n", + i, groupMap.at(myGroups.at(i)).size())); + } return v; } diff --git a/src/libnnptrain/KalmanFilter.h b/src/libnnptrain/KalmanFilter.h index 003445649..e1c1f2723 100644 --- a/src/libnnptrain/KalmanFilter.h +++ b/src/libnnptrain/KalmanFilter.h @@ -236,6 +236,12 @@ class KalmanFilter : public Updater double nu; /// Forgetting gain factor gamma for fading memory Kalman filter. double gamma; + /// List of group indices of this processor. + std::vector myGroups; + /// Number of groups for each processor. + std::vector numGroupsPerProc; + /// Offset in terms of groups for each processor. + std::vector groupOffsetPerProc; /// Decoupling group map. std::map> groupMap; /// Decoupling group mask vector. diff --git a/src/libnnptrain/Training.cpp b/src/libnnptrain/Training.cpp index 2ed36e190..80feeb754 100644 --- a/src/libnnptrain/Training.cpp +++ b/src/libnnptrain/Training.cpp @@ -301,7 +301,12 @@ void Training::initializeWeights() w.at(i).at(j) = minWeights + gsl_rng_uniform(rngGlobal) * (maxWeights - minWeights); } - elements.at(i).neuralNetwork->setConnections(&(w.at(i).front())); +#ifndef ALTERNATIVE_WEIGHT_ORDERING + // FIX!!! + elements.at(i).neuralNetwork->setConnectionsAO(&(w.at(i).front())); +#else + elements.at(i).neuralNetwork->setConnectionsAO(&(w.at(i).front())); +#endif } if (settings.keywordExists("nguyen_widrow_weights_short")) { @@ -1077,7 +1082,6 @@ void Training::setupTraining() for (auto b : elements.at(i).neuralNetwork ->getLayerBoundaries()) { - log << strpr("%zu %zu\n", b.first, b.second); fill(groupMask.begin() + weightsOffset.at(i) + b.first, groupMask.begin() @@ -1089,20 +1093,29 @@ void Training::setupTraining() } else if (decouplingType == DT_NODE) { +#ifndef ALTERNATIVE_WEIGHT_ORDERING size_t index = 0; for (size_t i = 0; i < numElements; ++i) { - vector> v = elements.at(i) - .neuralNetwork->getNodeWeightIndices(); - for (auto n : v) + for (auto b : elements.at(i).neuralNetwork + ->getNeuronBoundaries()) { - for (auto w : n) - { - groupMask.at(weightsOffset.at(i) + w) = index; - } + log << strpr("%zu %zu %zu\n", i, b.first, b.second); + fill(groupMask.begin() + + weightsOffset.at(i) + b.first, + groupMask.begin() + + weightsOffset.at(i) + b.second + 1, + index); index++; } } +#else + throw runtime_error("ERROR: Node-decoupled Kalman filter " + "(NDEFK) training not possible with " + "alternative (old) weight memory " + "layout, recompile without " + "-DALTERNATIVE_WEIGHT_ORDERING.\n"); +#endif } else if (decouplingType == DT_FULL) { @@ -1113,12 +1126,6 @@ void Training::setupTraining() index++; } } - //size_t index = 0; - //for (auto m : groupMask) - //{ - // log << strpr("%zu %zu\n", index, m); - // index++; - //} u->setupDecoupling(groupMask.data()); } updaters.back()->setState(&(weights.at(i).front())); @@ -1564,7 +1571,9 @@ void Training::writeWeights(string const fileNameFormat) const string fileName = strpr(fileNameFormat.c_str(), elements.at(i).getAtomicNumber()); file.open(fileName.c_str()); - elements.at(i).neuralNetwork->writeConnections(file); + // Attention: need alternative (old) ordering scheme here for + // backward compatibility! + elements.at(i).neuralNetwork->writeConnectionsAO(file); file.close(); } @@ -2392,6 +2401,22 @@ void Training::update(bool force) else { nn->calculateDEdc(&(dXdc.at(i).front())); + //vector temp(dXdc.at(i).size(), 0.0); + //vector tempAO(dXdc.at(i).size(), 0.0); + //nn->calculateDEdc(&(temp.front())); + //ofstream tf("dedc.out"); + //size_t count; + //count = 0; + //sort(temp.begin(), temp.end()); + //for (auto i : temp) tf << strpr("%zu %16.8E\n", count++, i); + //tf.close(); + //nn->calculateDEdcAO(&(tempAO.front())); + //tf.open("dedc.out.ao"); + //count = 0; + //sort(tempAO.begin(), tempAO.end()); + //for (auto i : tempAO) tf << strpr("%zu %16.8E\n", count++, i); + //tf.close(); + //throw runtime_error("END HERE"); } // Finally sum up Jacobian. if (updateStrategy == US_ELEMENT) iu = i; @@ -2827,7 +2852,11 @@ void Training::getWeights() for (size_t i = 0; i < numElements; ++i) { NeuralNetwork const* const& nn = elements.at(i).neuralNetwork; +#ifndef ALTERNATIVE_WEIGHT_ORDERING nn->getConnections(&(weights.at(0).at(pos))); +#else + nn->getConnectionsAO(&(weights.at(0).at(pos))); +#endif pos += nn->getNumConnections(); } } @@ -2836,7 +2865,11 @@ void Training::getWeights() for (size_t i = 0; i < numElements; ++i) { NeuralNetwork const* const& nn = elements.at(i).neuralNetwork; +#ifndef ALTERNATIVE_WEIGHT_ORDERING nn->getConnections(&(weights.at(i).front())); +#else + nn->getConnectionsAO(&(weights.at(i).front())); +#endif } } @@ -2851,7 +2884,11 @@ void Training::setWeights() for (size_t i = 0; i < numElements; ++i) { NeuralNetwork* const& nn = elements.at(i).neuralNetwork; +#ifndef ALTERNATIVE_WEIGHT_ORDERING nn->setConnections(&(weights.at(0).at(pos))); +#else + nn->setConnectionsAO(&(weights.at(0).at(pos))); +#endif pos += nn->getNumConnections(); } } @@ -2860,7 +2897,11 @@ void Training::setWeights() for (size_t i = 0; i < numElements; ++i) { NeuralNetwork* const& nn = elements.at(i).neuralNetwork; +#ifndef ALTERNATIVE_WEIGHT_ORDERING nn->setConnections(&(weights.at(i).front())); +#else + nn->setConnectionsAO(&(weights.at(i).front())); +#endif } } diff --git a/src/makefile.gnu b/src/makefile.gnu index 223a03485..f35aeb0b6 100644 --- a/src/makefile.gnu +++ b/src/makefile.gnu @@ -14,7 +14,8 @@ PROJECT_EIGEN=/usr/include/eigen3/ PROJECT_CC=g++ PROJECT_MPICC=mpic++ # OpenMP parallelization is disabled by default, add flag "-fopenmp" to enable. -PROJECT_CFLAGS=-O3 -march=native -std=c++11 +PROJECT_CFLAGS=-O0 +#3 -march=native -std=c++11 PROJECT_CFLAGS_MPI=-Wno-long-long PROJECT_DEBUG=-g -pedantic-errors -Wall -Wextra PROJECT_TEST=--coverage -fno-default-inline -fno-inline -fno-inline-small-functions -fno-elide-constructors @@ -56,3 +57,6 @@ PROJECT_OPTIONS+= -DEIGEN_DONT_PARALLELIZE # Use improved memory layout for symmetry function derivatives. PROJECT_OPTIONS+= -DIMPROVED_SFD_MEMORY + +# Use alternative (old) ordering scheme of weights (NDEKF not possible). +#PROJECT_OPTIONS+= -DALTERNATIVE_WEIGHT_ORDERING diff --git a/src/makefile.intel b/src/makefile.intel index 17e61b7a0..4a9a0e94c 100644 --- a/src/makefile.intel +++ b/src/makefile.intel @@ -57,3 +57,6 @@ PROJECT_OPTIONS+= -DEIGEN_DONT_PARALLELIZE # Use improved memory layout for symmetry function derivatives. PROJECT_OPTIONS+= -DIMPROVED_SFD_MEMORY + +# Use alternative (old) ordering scheme of weights (NDEKF not possible). +#PROJECT_OPTIONS+= -DALTERNATIVE_WEIGHT_ORDERING diff --git a/src/makefile.llvm b/src/makefile.llvm index b748ce08b..30deb9a03 100644 --- a/src/makefile.llvm +++ b/src/makefile.llvm @@ -58,3 +58,6 @@ PROJECT_OPTIONS+= -DEIGEN_DONT_PARALLELIZE # Use improved memory layout for symmetry function derivatives. PROJECT_OPTIONS+= -DIMPROVED_SFD_MEMORY + +# Use alternative (old) ordering scheme of weights (NDEKF not possible). +#PROJECT_OPTIONS+= -DALTERNATIVE_WEIGHT_ORDERING From 0235ce50e739559d283168302a0cbcebe3d47891 Mon Sep 17 00:00:00 2001 From: Andreas Singraber Date: Wed, 16 Sep 2020 17:11:35 +0200 Subject: [PATCH 7/9] Changed decoupling setup - Works now with limits instead of a group mask. - Groups are assumed to be contiguous. - Implemented rank 0 only update via new communicator. --- src/libnnp/NeuralNetwork.cpp | 23 ++--- src/libnnp/NeuralNetwork.h | 12 +-- src/libnnptrain/KalmanFilter.cpp | 159 ++++++++++++++++++++++--------- src/libnnptrain/KalmanFilter.h | 32 ++++--- src/libnnptrain/Training.cpp | 95 +++++++++--------- test/cpp/Example_nnp_train.h | 14 +-- 6 files changed, 205 insertions(+), 130 deletions(-) diff --git a/src/libnnp/NeuralNetwork.cpp b/src/libnnp/NeuralNetwork.cpp index 748835b4f..ea812a336 100644 --- a/src/libnnp/NeuralNetwork.cpp +++ b/src/libnnp/NeuralNetwork.cpp @@ -21,8 +21,6 @@ #include // fprintf, stderr #include // exit, EXIT_FAILURE, rand, srand #include // std::numeric_limits -#include // std::iota -#include // std::valarray, std::slice #define EXP_LIMIT 35.0 @@ -1157,33 +1155,32 @@ void NeuralNetwork::getNeuronStatistics(long* count, return; } -vector> NeuralNetwork::getLayerBoundaries() const +vector> NeuralNetwork::getLayerLimits() const { - vector> boundaries; + vector> limits; for (int i = 0; i < numLayers - 2; ++i) { - boundaries.push_back(make_pair(weightOffset[i], + limits.push_back(make_pair(weightOffset[i], weightOffset[i + 1] - 1)); } - boundaries.push_back(make_pair(weightOffset[numLayers - 2], + limits.push_back(make_pair(weightOffset[numLayers - 2], numConnections - 1)); - return boundaries; + return limits; } -vector> NeuralNetwork::getNeuronBoundaries() const +vector> NeuralNetwork::getNeuronLimits() const { + vector> limits; #ifndef ALTERNATIVE_WEIGHT_ORDERING - vector> boundaries; - for (int i = 0; i < numLayers - 1; ++i) { - boundaries.push_back(make_pair(weightOffset[i], + limits.push_back(make_pair(weightOffset[i], biasOffset[i][0])); for (int j = 0; j < layers[i+1].numNeurons - 1; ++j) { - boundaries.push_back(make_pair(biasOffset[i][j] + 1, + limits.push_back(make_pair(biasOffset[i][j] + 1, biasOffset[i][j+1])); } @@ -1194,7 +1191,7 @@ vector> NeuralNetwork::getNeuronBoundaries() const "without -DALTERNATIVE_WEIGHT_ORDERING.\n"); #endif - return boundaries; + return limits; } /* diff --git a/src/libnnp/NeuralNetwork.h b/src/libnnp/NeuralNetwork.h index 1e5284e3d..8cbef0cfd 100644 --- a/src/libnnp/NeuralNetwork.h +++ b/src/libnnp/NeuralNetwork.h @@ -393,24 +393,24 @@ class NeuralNetwork * Counters and summation variables for neuron statistics are reset. */ void resetNeuronStatistics(); - /** Get layer boundaries in combined weight vector (for Kalman filter + /** Get layer limits in combined weight vector (for Kalman filter * decoupling setup). * - * @return Vector with layer boundaries as pair (begin, end), + * @return Vector with layer limits as pair (begin, end), * (numLayers - 1) points. */ std::vector> getLayerBoundaries() const; - /** Get neuron boundaries in combined weight vector (for Kalman filter + std::size_t>> getLayerLimits() const; + /** Get neuron limits in combined weight vector (for Kalman filter * decoupling setup). * - * @return Vector with neuron boundaries as pair (begin, end), + * @return Vector with neuron limits as pair (begin, end), * (numNeurons - inputLayer->numNeurons) points. */ std::vector> getNeuronBoundaries() const; + std::size_t>> getNeuronLimits() const; //void writeStatus(int, int); long getMemoryUsage(); /** Print neural network architecture. diff --git a/src/libnnptrain/KalmanFilter.cpp b/src/libnnptrain/KalmanFilter.cpp index 345116072..221f82806 100644 --- a/src/libnnptrain/KalmanFilter.cpp +++ b/src/libnnptrain/KalmanFilter.cpp @@ -16,6 +16,7 @@ #include "KalmanFilter.h" #include "utility.h" +#include "mpi-extra.h" #include #include #include @@ -32,6 +33,7 @@ KalmanFilter::KalmanFilter(size_t const sizeState, numProcs (0 ), sizeObservation(0 ), numUpdates (0 ), + sizeP (0 ), epsilon (0.0 ), q (0.0 ), q0 (0.0 ), @@ -65,14 +67,6 @@ KalmanFilter::KalmanFilter(size_t const sizeState, w = new Map(0, sizeState); xi = new Map(0, sizeObservation); H = new Map(0, sizeState, sizeObservation); - P.resize(sizeState, sizeState); - P.setIdentity(); - // Prevent problems with unallocated K when log starts. - K.resize(sizeState, sizeObservation); - K.setZero(); - - // Set default decoupling group mask. - groupMask = new Map(0, sizeState); } KalmanFilter::~KalmanFilter() @@ -88,30 +82,59 @@ void KalmanFilter::setupMPI(MPI_Comm* communicator) return; } -void KalmanFilter::setupDecoupling(int const* const mask) +void KalmanFilter::setupDecoupling(vector> groupLimits) { - new (groupMask) Map(mask, sizeState); + this->groupLimits = groupLimits; - for (long i = 0; i < groupMask->size(); ++i) + // Check consistency of decoupling group limits. + if (groupLimits.at(0).first != 0) { - groupMap[(*groupMask)(i)].push_back(i); - } + auto const& l = groupLimits.at(0); + throw runtime_error(strpr("ERROR: Inconsistent decoupling group " + "limits, first group must start with index " + "0 (is %zu).\n", + l.first)); - // Check consistency of group map. - for (size_t i = 0; i < groupMap.size(); ++i) + } + for (size_t i = 0; i < groupLimits.size(); ++i) { - if (groupMap.find(i) == groupMap.end()) + auto const& l = groupLimits.at(i); + if (l.first > l.second) { - throw runtime_error(strpr("ERROR: Kalman filter decoupling group " - "mask is inconsistent, group %zu not " - "present.\n", i)); + throw runtime_error( + strpr("ERROR: Inconsistent decoupling group limits, " + "group %zu: start %zu > end %zu.\n", + i, l.first, l.second)); } } + for (size_t i = 0; i < groupLimits.size() - 1; ++i) + { + auto const& l = groupLimits.at(i); + auto const& r = groupLimits.at(i + 1); + if (l.second + 1 != r.first) + { + throw runtime_error( + strpr("ERROR: Inconsistent decoupling group limits, " + "group %zu end %zu + 1 != group %zu start %zu.\n", + i, l.second, i + 1, r.first)); + } + } + if (groupLimits.back().second != sizeState - 1) + { + auto const& l = groupLimits.back(); + throw runtime_error(strpr("ERROR: Inconsistent decoupling group " + "limits, last group must end with index " + "%zu (is %zu).\n", + sizeState - 1, + l.second)); + + } + // Distribute groups to MPI procs. numGroupsPerProc.resize(numProcs, 0); groupOffsetPerProc.resize(numProcs, 0); - int quotient = groupMap.size() / numProcs; - int remainder = groupMap.size() % numProcs; + int quotient = groupLimits.size() / numProcs; + int remainder = groupLimits.size() % numProcs; int groupSum = 0; for (int i = 0; i < numProcs; i++) { @@ -125,9 +148,29 @@ void KalmanFilter::setupDecoupling(int const* const mask) } for (size_t i = 0; i < numGroupsPerProc.at(myRank); ++i) { - myGroups.push_back(groupOffsetPerProc.at(myRank) + i); + size_t index = groupOffsetPerProc.at(myRank) + i; + size_t size = groupLimits.at(index).second + - groupLimits.at(index).first + 1; + myGroups.push_back(make_pair(index, size)); } + // Allocate per-processor matrices for all groups. + for (auto g : myGroups) + { + P.push_back(Eigen::MatrixXd(g.second, g.second)); + P.back().setIdentity(); + + X.push_back(Eigen::MatrixXd(g.second, sizeObservation)); + + // Prevent problems with unallocated K when log starts. + K.push_back(Eigen::MatrixXd(g.second, sizeObservation)); + K.back().setZero(); + + sizeP += g.second * g.second; + } + + MPI_Allreduce(MPI_IN_PLACE, &sizeP, 1, MPI_SIZE_T, MPI_SUM, comm); + return; } @@ -183,15 +226,15 @@ void KalmanFilter::update() void KalmanFilter::update(size_t const sizeObservation) { - X.resize(sizeState, sizeObservation); + X.at(0).resize(sizeState, sizeObservation); // Calculate temporary result. // X = P . H - X = P.selfadjointView() * (*H); + X.at(0) = P.at(0).selfadjointView() * (*H); // Calculate scaling matrix. // A = H^T . X - MatrixXd A = H->transpose() * X; + MatrixXd A = H->transpose() * X.at(0); // Increase learning rate. // eta(n) = eta(0) * exp(n * tau) @@ -210,25 +253,25 @@ void KalmanFilter::update(size_t const sizeObservation) // Calculate Kalman gain matrix. // K = X . A^-1 - K.resize(sizeState, sizeObservation); - K = X * A.inverse(); + K.at(0).resize(sizeState, sizeObservation); + K.at(0) = X.at(0) * A.inverse(); // Update error covariance matrix. // P = P - K . X^T - P.noalias() -= K * X.transpose(); + P.at(0).noalias() -= K.at(0) * X.at(0).transpose(); // Apply forgetting factor. if (type == KT_FADINGMEMORY) { - P *= 1.0 / lambda; + P.at(0) *= 1.0 / lambda; } // Add process noise. // P = P + Q - P.diagonal() += VectorXd::Constant(sizeState, q); + P.at(0).diagonal() += VectorXd::Constant(sizeState, q); // Update state vector. // w = w + K . xi - (*w) += K * (*xi); + (*w) += K.at(0) * (*xi); // Anneal process noise. // q(n) = q(0) * exp(-n * tau) @@ -264,7 +307,10 @@ void KalmanFilter::setParametersStandard(double const epsilon, q = q0; eta = eta0; - P /= epsilon; + for (auto& p : P) + { + p /= epsilon; + } return; } @@ -284,7 +330,10 @@ void KalmanFilter::setParametersFadingMemory(double const epsilon, this->nu = nu ; q = q0; - P /= epsilon; + for (auto& p : P) + { + p /= epsilon; + } gamma = 1.0; return; @@ -293,12 +342,30 @@ void KalmanFilter::setParametersFadingMemory(double const epsilon, string KalmanFilter::status(size_t epoch) const { - double Pasym = 0.5 * (P - P.transpose()).array().abs().mean(); - double Pdiag = P.diagonal().array().abs().sum(); - double Poffdiag = (P.array().abs().sum() - Pdiag) - / (sizeState * (sizeState - 1)); + double Pasym = 0.0; + double Pdiag = 0.0; + double Poffdiag = 0.0; + double Kmean = 0.0; + + for (size_t i = 0; i < myGroups.size(); ++i) + { + auto const& p = P.at(i); + auto const& k = K.at(i); + Pasym += 0.5 * (p - p.transpose()).array().abs().sum(); + double pdiag = p.diagonal().array().abs().sum(); + Pdiag += pdiag; + Poffdiag += p.array().abs().sum() - pdiag; + Kmean += k.array().abs().sum(); + } + + MPI_Allreduce(MPI_IN_PLACE, &Pasym , 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(MPI_IN_PLACE, &Pdiag , 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(MPI_IN_PLACE, &Poffdiag, 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(MPI_IN_PLACE, &Kmean , 1, MPI_DOUBLE, MPI_SUM, comm); + Pasym /= (sizeP - sizeState); Pdiag /= sizeState; - double Kmean = K.array().abs().mean(); + Poffdiag /= (sizeP - sizeState); + Kmean /= (sizeState * sizeObservation); string s = strpr("%10zu %10zu %16.8E %16.8E %16.8E %16.8E %16.8E", epoch, numUpdates, Pdiag, Poffdiag, Pasym, Kmean, q); @@ -400,18 +467,22 @@ vector KalmanFilter::info() const v.push_back(strpr("sizeState = %zu\n", sizeState)); v.push_back(strpr("sizeObservation = %zu\n", sizeObservation)); v.push_back(strpr("OpenMP threads used: %d\n", nbThreads())); - v.push_back(strpr("Number of decoupling groups: %zu\n", groupMap.size())); - for (size_t i = 0; i < groupMap.size(); ++i) + v.push_back(strpr("Number of decoupling groups: %zu\n", + groupLimits.size())); + v.push_back(strpr("P matrix size ratio (compared to GEKF): %6.2f\n", + 100.0 * sizeP / (sizeState * sizeState))); + for (size_t i = 0; i < groupLimits.size(); ++i) { v.push_back(strpr(" - group %5zu size: %zu\n", - i, groupMap.at(i).size())); + i, + groupLimits.at(i).second + - groupLimits.at(i).first + 1)); } v.push_back(strpr("Number of decoupling groups of proc %d: %zu\n", myRank, myGroups.size())); - for (size_t i = 0; i < myGroups.size(); ++i) + for (auto g : myGroups) { - v.push_back(strpr(" - group %5zu size: %zu\n", - i, groupMap.at(myGroups.at(i)).size())); + v.push_back(strpr(" - group %5zu size: %zu\n", g.first, g.second)); } return v; diff --git a/src/libnnptrain/KalmanFilter.h b/src/libnnptrain/KalmanFilter.h index e1c1f2723..75e926983 100644 --- a/src/libnnptrain/KalmanFilter.h +++ b/src/libnnptrain/KalmanFilter.h @@ -22,6 +22,7 @@ #include #include #include +#include #include namespace nnp @@ -55,11 +56,14 @@ class KalmanFilter : public Updater * @param[in] communicator Input communicator of calling program. */ void setupMPI(MPI_Comm* communicator); - /** Set decoupling group mask. + /** Set up Kalman filter decoupling. * - * @param[in] mask Input group mask provided via array of integers. + * @param[in] groupLimits Vector containing starting and ending indices of + * each decoupling group. */ - void setupDecoupling(int const* const mask); + void setupDecoupling( + std::vector> groupLimits); /** Set observation vector size. * * @param[in] size Size of the observation vector. @@ -212,6 +216,8 @@ class KalmanFilter : public Updater std::size_t sizeObservation; /// Total number of updates performed. std::size_t numUpdates; + /// Number of covariance matrix entries in memory (sum over all groups). + std::size_t sizeP; /// Error covariance initialization parameter @f$\epsilon@f$. double epsilon; /// Process noise @f$q@f$. @@ -236,28 +242,28 @@ class KalmanFilter : public Updater double nu; /// Forgetting gain factor gamma for fading memory Kalman filter. double gamma; - /// List of group indices of this processor. - std::vector myGroups; + /// List of (group indices, group sizes) of this processor. + std::vector> myGroups; /// Number of groups for each processor. std::vector numGroupsPerProc; /// Offset in terms of groups for each processor. std::vector groupOffsetPerProc; - /// Decoupling group map. - std::map> groupMap; - /// Decoupling group mask vector. - Eigen::Map* groupMask; + /// Decoupling group limits, i.e. starting end ending indices. + std::vector> groupLimits; /// State vector. Eigen::Map* w; /// Error vector. Eigen::Map* xi; /// Derivative matrix. Eigen::Map* H; - /// Error covariance matrix. - Eigen::MatrixXd P; + /// Error covariance matrices for all groups. + std::vector P; /// Kalman gain matrix. - Eigen::MatrixXd K; + std::vector K; /// Intermediate result X = P . H. - Eigen::MatrixXd X; + std::vector X; /// Global MPI communicator. MPI_Comm comm; }; diff --git a/src/libnnptrain/Training.cpp b/src/libnnptrain/Training.cpp index 80feeb754..c6a780889 100644 --- a/src/libnnptrain/Training.cpp +++ b/src/libnnptrain/Training.cpp @@ -302,7 +302,8 @@ void Training::initializeWeights() * (maxWeights - minWeights); } #ifndef ALTERNATIVE_WEIGHT_ORDERING - // FIX!!! + // To be consistent this should actually be the non-AO version, + // but this way results stay comparable. elements.at(i).neuralNetwork->setConnectionsAO(&(w.at(i).front())); #else elements.at(i).neuralNetwork->setConnectionsAO(&(w.at(i).front())); @@ -1029,14 +1030,11 @@ void Training::setupTraining() throw runtime_error("ERROR: Unknown Kalman filter decoupling " "type.\n"); } - if (decouplingType != DT_GLOBAL && - (parallelMode != PM_TRAIN_ALL || updateStrategy != US_COMBINED)) + if (decouplingType != DT_GLOBAL && updateStrategy != US_COMBINED) { throw runtime_error(strpr("ERROR: Kalman filter decoupling works " - "only in conjunction with" - "\"parallel_mode %d\" and " - "\"update_strategy %d\".\n", - PM_TRAIN_ALL, US_COMBINED)); + "only in conjunction with " + "update_strategy %d\".\n", US_COMBINED)); } } @@ -1056,57 +1054,62 @@ void Training::setupTraining() (Updater*)new KalmanFilter(numWeightsPerUpdater.at(i), kalmanType)); KalmanFilter* u = dynamic_cast(updaters.back()); - u->setupMPI(&comm); - vector groupMask(weights.at(i).size(), - numeric_limits::max()); + if (parallelMode == PM_TRAIN_ALL) + { + u->setupMPI(&comm); + } + else + { + MPI_Group groupWorld; + MPI_Comm_group(comm, &groupWorld); + int const size0 = 1; + int const rank0[1] = {0}; + MPI_Group groupRank0; + MPI_Group_incl(groupWorld, size0, rank0, &groupRank0); + MPI_Comm commRank0; + MPI_Comm_create_group(comm, groupRank0, 0, &commRank0); + u->setupMPI(&commRank0); + } + vector> limits; if (decouplingType == DT_GLOBAL) { - fill(groupMask.begin(), groupMask.end(), 0); + limits.push_back( + make_pair(0, numWeightsPerUpdater.at(i) - 1)); } else if (decouplingType == DT_ELEMENT) { - for (size_t i = 0; i < numElements; ++i) + for (size_t j = 0; j < numElements; ++j) { - fill(groupMask.begin() + weightsOffset.at(i), - groupMask.begin() + weightsOffset.at(i) - + elements.at(i). - neuralNetwork->getNumConnections(), - i); + limits.push_back(make_pair( + weightsOffset.at(j), + weightsOffset.at(j) + elements.at(j). + neuralNetwork->getNumConnections() - 1)); } } else if (decouplingType == DT_LAYER) { - size_t index = 0; - for (size_t i = 0; i < numElements; ++i) + for (size_t j = 0; j < numElements; ++j) { - for (auto b : elements.at(i).neuralNetwork - ->getLayerBoundaries()) + for (auto l : elements.at(j).neuralNetwork + ->getLayerLimits()) { - fill(groupMask.begin() - + weightsOffset.at(i) + b.first, - groupMask.begin() - + weightsOffset.at(i) + b.second + 1, - index); - index++; + limits.push_back(make_pair( + weightsOffset.at(j) + l.first, + weightsOffset.at(j) + l.second)); } } } else if (decouplingType == DT_NODE) { #ifndef ALTERNATIVE_WEIGHT_ORDERING - size_t index = 0; - for (size_t i = 0; i < numElements; ++i) + for (size_t j = 0; j < numElements; ++j) { - for (auto b : elements.at(i).neuralNetwork - ->getNeuronBoundaries()) + for (auto l : elements.at(j).neuralNetwork + ->getNeuronLimits()) { - log << strpr("%zu %zu %zu\n", i, b.first, b.second); - fill(groupMask.begin() - + weightsOffset.at(i) + b.first, - groupMask.begin() - + weightsOffset.at(i) + b.second + 1, - index); - index++; + limits.push_back(make_pair( + weightsOffset.at(j) + l.first, + weightsOffset.at(j) + l.second)); } } #else @@ -1119,14 +1122,12 @@ void Training::setupTraining() } else if (decouplingType == DT_FULL) { - size_t index = 0; - for (auto& m : groupMask) + for (size_t j = 0; j < numWeightsPerUpdater.at(i); ++j) { - m = index; - index++; + limits.push_back(make_pair(j, j)); } } - u->setupDecoupling(groupMask.data()); + u->setupDecoupling(limits); } updaters.back()->setState(&(weights.at(i).front())); } @@ -1551,7 +1552,7 @@ void Training::calculateErrorEpoch() fileNameForcesTest = strpr("testforces.%06zu.out", epoch); } - // Calculate RMSE and write comparison files. + // Calculate error and write comparison files. calculateError(true, identifier, fileNameEnergiesTrain, @@ -1750,7 +1751,7 @@ void Training::writeNeuronStatistics(string const fileName) const vector colInfo; vector colSize; title.push_back("Statistics for individual neurons gathered during " - "RMSE calculation."); + "error calculation."); colSize.push_back(10); colName.push_back("element"); colInfo.push_back("Element index."); @@ -2005,7 +2006,7 @@ void Training::loop() else if (errorMetricForces == 1) metric = "MAE"; if (errorMetricEnergies % 2 == 0) peratom = "per atom "; - log << "The training loop output covers different RMSEs, update and\n"; + log << "The training loop output covers different errors, update and\n"; log << "timing information. The following quantities are organized\n"; log << "according to the matrix scheme below:\n"; log << "-------------------------------------------------------------------\n"; @@ -2032,7 +2033,7 @@ void Training::loop() log << "Abbreviations:\n"; log << " p. u. = physical units.\n"; log << " i. u. = internal units.\n"; - log << "Note: RMSEs in internal units (columns 5 + 6) are only present \n"; + log << "Note: Errors in internal units (columns 5 + 6) are only present \n"; log << " if data set normalization is used.\n"; log << "-------------------------------------------------------------------\n"; log << " 1 2 3 4 5 6\n"; diff --git a/test/cpp/Example_nnp_train.h b/test/cpp/Example_nnp_train.h index fe4aa3d22..88ed24a76 100644 --- a/test/cpp/Example_nnp_train.h +++ b/test/cpp/Example_nnp_train.h @@ -28,13 +28,13 @@ void BoostDataContainer::setup() e->rmseForcesTrain = 3.77302629E-02; e->rmseForcesTest = 2.25135617E-01; - //examples.push_back(Example_nnp_train("H2O_RPBE-D3")); - //e = &(examples.back()); - //e->lastEpoch = 10; - //e->rmseEnergyTrain = 1.02312914E-04; - //e->rmseEnergyTest = 5.28837597E-04; - //e->rmseForcesTrain = 1.67069652E-02; - //e->rmseForcesTest = 1.07708383E-02; + examples.push_back(Example_nnp_train("H2O_RPBE-D3")); + e = &(examples.back()); + e->lastEpoch = 5; + e->rmseEnergyTrain = 9.19395999E-04; + e->rmseEnergyTest = 7.42520683E-04; + e->rmseForcesTrain = 2.40682415E-02; + e->rmseForcesTest = 1.44776060E-02; return; } From 0a95ce8882b929ab67a911ac63fcd0254c1474c3 Mon Sep 17 00:00:00 2001 From: Andreas Singraber Date: Wed, 16 Sep 2020 18:33:28 +0200 Subject: [PATCH 8/9] Decoupling on single processor works --- src/libnnptrain/KalmanFilter.cpp | 97 +++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 33 deletions(-) diff --git a/src/libnnptrain/KalmanFilter.cpp b/src/libnnptrain/KalmanFilter.cpp index 221f82806..fd4d18224 100644 --- a/src/libnnptrain/KalmanFilter.cpp +++ b/src/libnnptrain/KalmanFilter.cpp @@ -226,15 +226,32 @@ void KalmanFilter::update() void KalmanFilter::update(size_t const sizeObservation) { - X.at(0).resize(sizeState, sizeObservation); + MatrixXd A(sizeObservation, sizeObservation); + A.setZero(); - // Calculate temporary result. - // X = P . H - X.at(0) = P.at(0).selfadjointView() * (*H); + for (size_t i = 0; i < myGroups.size(); ++i) + { + size_t const& j = myGroups.at(i).first; // group index + size_t const& n = myGroups.at(i).second; // group size + size_t const& s = groupLimits.at(j).first; // group weight start index + + auto& x = X.at(i); + auto& p = P.at(i); + + x.resize(n, sizeObservation); + + // Calculate temporary result. + // X = P . H + x = p.selfadjointView() * H->block(s, 0, n, sizeObservation); + + // Calculate scaling matrix. + // A = H^T . X + // For decoupling summarize scaling matrix here (locally). + A += H->block(s, 0, n, sizeObservation).transpose() * x; + } - // Calculate scaling matrix. - // A = H^T . X - MatrixXd A = H->transpose() * X.at(0); + // Summarize scaling matrix globally. + MPI_Allreduce(MPI_IN_PLACE, A.data(), A.size(), MPI_DOUBLE, MPI_SUM, comm); // Increase learning rate. // eta(n) = eta(0) * exp(n * tau) @@ -251,39 +268,53 @@ void KalmanFilter::update(size_t const sizeObservation) A.diagonal() += VectorXd::Constant(sizeObservation, lambda); } - // Calculate Kalman gain matrix. - // K = X . A^-1 - K.at(0).resize(sizeState, sizeObservation); - K.at(0) = X.at(0) * A.inverse(); + for (size_t i = 0; i < myGroups.size(); ++i) + { + size_t const& j = myGroups.at(i).first; // group index + size_t const& n = myGroups.at(i).second; // group size + size_t const& s = groupLimits.at(j).first; // group weight start index - // Update error covariance matrix. - // P = P - K . X^T - P.at(0).noalias() -= K.at(0) * X.at(0).transpose(); + auto& x = X.at(i); + auto& p = P.at(i); + auto& k = K.at(i); - // Apply forgetting factor. - if (type == KT_FADINGMEMORY) - { - P.at(0) *= 1.0 / lambda; - } - // Add process noise. - // P = P + Q - P.at(0).diagonal() += VectorXd::Constant(sizeState, q); + // Calculate Kalman gain matrix. + // K = X . A^-1 + k.resize(n, sizeObservation); + k = x * A.inverse(); - // Update state vector. - // w = w + K . xi - (*w) += K.at(0) * (*xi); + // Update error covariance matrix. + // P = P - K . X^T + p.noalias() -= k * x.transpose(); - // Anneal process noise. - // q(n) = q(0) * exp(-n * tau) - if (q > qmin) q *= exp(-qtau); + // Apply forgetting factor. + if (type == KT_FADINGMEMORY) + { + p *= 1.0 / lambda; + } + // Add process noise. + // P = P + Q + p.diagonal() += VectorXd::Constant(n, q); - // Update forgetting factor. - if (type == KT_FADINGMEMORY) - { - lambda = nu * lambda + 1.0 - nu; - gamma = 1.0 / (1.0 + lambda / gamma); + // Update state vector. + // w = w + K . xi + w->segment(s, n) += k * (*xi); + + // Anneal process noise. + // q(n) = q(0) * exp(-n * tau) + if (q > qmin) q *= exp(-qtau); + + // Update forgetting factor. + if (type == KT_FADINGMEMORY) + { + lambda = nu * lambda + 1.0 - nu; + gamma = 1.0 / (1.0 + lambda / gamma); + } } + // Communicate new weight segments. + // ???? + numUpdates++; return; From 11dc11641080a07fae1d98f79a4387fec60d3b1f Mon Sep 17 00:00:00 2001 From: Andreas Singraber Date: Thu, 17 Sep 2020 14:10:20 +0200 Subject: [PATCH 9/9] Communicate weight segments correctly --- src/libnnptrain/KalmanFilter.cpp | 71 ++++++++++++++++++++------------ src/libnnptrain/KalmanFilter.h | 4 ++ src/libnnptrain/Training.cpp | 42 +++++++++++-------- src/makefile.gnu | 3 +- 4 files changed, 75 insertions(+), 45 deletions(-) diff --git a/src/libnnptrain/KalmanFilter.cpp b/src/libnnptrain/KalmanFilter.cpp index fd4d18224..5d02b44c2 100644 --- a/src/libnnptrain/KalmanFilter.cpp +++ b/src/libnnptrain/KalmanFilter.cpp @@ -136,7 +136,7 @@ void KalmanFilter::setupDecoupling(vector> groupLimits) int quotient = groupLimits.size() / numProcs; int remainder = groupLimits.size() % numProcs; int groupSum = 0; - for (int i = 0; i < numProcs; i++) + for (int i = 0; i < numProcs; ++i) { numGroupsPerProc.at(i) = quotient; if (remainder > 0 && i < remainder) @@ -146,6 +146,23 @@ void KalmanFilter::setupDecoupling(vector> groupLimits) groupOffsetPerProc.at(i) = groupSum; groupSum += numGroupsPerProc.at(i); } + + // Compute state vector segment length and offset for each processor. + for (int i = 0; i < numProcs - 1; ++i) + { + sizeStatePerProc.push_back( + groupLimits.at(groupOffsetPerProc.at(i+1)).first - + groupLimits.at(groupOffsetPerProc.at(i)).first); + offsetStatePerProc.push_back( + groupLimits.at(groupOffsetPerProc.at(i)).first); + } + sizeStatePerProc.push_back( + groupLimits.back().second + 1 - + groupLimits.at(groupOffsetPerProc.at(numProcs - 1)).first); + offsetStatePerProc.push_back( + groupLimits.at(groupOffsetPerProc.at(numProcs - 1)).first); + + // Store group index and segment length for this processor. for (size_t i = 0; i < numGroupsPerProc.at(myRank); ++i) { size_t index = groupOffsetPerProc.at(myRank) + i; @@ -313,7 +330,7 @@ void KalmanFilter::update(size_t const sizeObservation) } // Communicate new weight segments. - // ???? + MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DOUBLE, w->data(), sizeStatePerProc.data(), offsetStatePerProc.data(), MPI_DOUBLE, comm); numUpdates++; @@ -470,11 +487,24 @@ vector KalmanFilter::info() const { vector v; + v.push_back("Extended Kalman Filter (EKF) optimizer:\n"); + v.push_back(strpr("sizeState = %zu\n", sizeState)); + v.push_back(strpr("sizeObservation = %zu\n", sizeObservation)); + v.push_back(strpr("myRank = %d\n", myRank)); + v.push_back(strpr("numProcs = %d\n", numProcs)); + v.push_back(strpr("OpenMP threads used : %d\n", + nbThreads())); + v.push_back(strpr("Number of decoupling groups : %zu\n", + groupLimits.size())); + v.push_back(strpr("Number of decoupling groups of proc %3d: %zu\n", + myRank, myGroups.size())); + v.push_back(strpr("P matrix size ratio (compared to GEKF) : %6.2f %%\n", + 100.0 * sizeP / (sizeState * sizeState))); + v.push_back("-----------------------------------------" + "--------------------------------------\n"); if (type == KT_STANDARD) { v.push_back(strpr("KalmanType::KT_STANDARD (%d)\n", type)); - v.push_back(strpr("myRank = %d\n", myRank)); - v.push_back(strpr("numProcs = %d\n", numProcs)); v.push_back(strpr("epsilon = %12.4E\n", epsilon)); v.push_back(strpr("q0 = %12.4E\n", q0 )); v.push_back(strpr("qtau = %12.4E\n", qtau )); @@ -486,8 +516,6 @@ vector KalmanFilter::info() const else if (type == KT_FADINGMEMORY) { v.push_back(strpr("KalmanType::KT_FADINGMEMORY (%d)\n", type)); - v.push_back(strpr("sizeState = %zu\n", sizeState)); - v.push_back(strpr("sizeObservation = %zu\n", sizeObservation)); v.push_back(strpr("epsilon = %12.4E\n", epsilon)); v.push_back(strpr("q0 = %12.4E\n", q0 )); v.push_back(strpr("qtau = %12.4E\n", qtau )); @@ -495,26 +523,17 @@ vector KalmanFilter::info() const v.push_back(strpr("lambda = %12.4E\n", lambda)); v.push_back(strpr("nu = %12.4E\n", nu )); } - v.push_back(strpr("sizeState = %zu\n", sizeState)); - v.push_back(strpr("sizeObservation = %zu\n", sizeObservation)); - v.push_back(strpr("OpenMP threads used: %d\n", nbThreads())); - v.push_back(strpr("Number of decoupling groups: %zu\n", - groupLimits.size())); - v.push_back(strpr("P matrix size ratio (compared to GEKF): %6.2f\n", - 100.0 * sizeP / (sizeState * sizeState))); - for (size_t i = 0; i < groupLimits.size(); ++i) - { - v.push_back(strpr(" - group %5zu size: %zu\n", - i, - groupLimits.at(i).second - - groupLimits.at(i).first + 1)); - } - v.push_back(strpr("Number of decoupling groups of proc %d: %zu\n", - myRank, myGroups.size())); - for (auto g : myGroups) - { - v.push_back(strpr(" - group %5zu size: %zu\n", g.first, g.second)); - } + //for (size_t i = 0; i < groupLimits.size(); ++i) + //{ + // v.push_back(strpr(" - group %5zu size: %zu\n", + // i, + // groupLimits.at(i).second + // - groupLimits.at(i).first + 1)); + //} + //for (auto g : myGroups) + //{ + // v.push_back(strpr(" - group %5zu size: %zu\n", g.first, g.second)); + //} return v; } diff --git a/src/libnnptrain/KalmanFilter.h b/src/libnnptrain/KalmanFilter.h index 75e926983..0d409ac1c 100644 --- a/src/libnnptrain/KalmanFilter.h +++ b/src/libnnptrain/KalmanFilter.h @@ -242,6 +242,10 @@ class KalmanFilter : public Updater double nu; /// Forgetting gain factor gamma for fading memory Kalman filter. double gamma; + /// Size of state vector segment handled by each processor. + std::vector sizeStatePerProc; + /// Offset of state vector segment handled by each processor. + std::vector offsetStatePerProc; /// List of (group indices, group sizes) of this processor. std::vector> myGroups; diff --git a/src/libnnptrain/Training.cpp b/src/libnnptrain/Training.cpp index c6a780889..990754450 100644 --- a/src/libnnptrain/Training.cpp +++ b/src/libnnptrain/Training.cpp @@ -1865,24 +1865,31 @@ void Training::writeUpdaterStatus(bool append, for (size_t i = 0; i < numUpdaters; ++i) { - string fileName; - if (updateStrategy == US_COMBINED) - { - fileName = strpr(fileNameFormat.c_str(), 0); - } - else if (updateStrategy == US_ELEMENT) + if (myRank == 0) { - fileName = strpr(fileNameFormat.c_str(), - elementMap.atomicNumber(i)); + string fileName; + if (updateStrategy == US_COMBINED) + { + fileName = strpr(fileNameFormat.c_str(), 0); + } + else if (updateStrategy == US_ELEMENT) + { + fileName = strpr(fileNameFormat.c_str(), + elementMap.atomicNumber(i)); + } + if (append) file.open(fileName.c_str(), ofstream::app); + else + { + file.open(fileName.c_str()); + appendLinesToFile(file, updaters.at(i)->statusHeader()); + } + file << updaters.at(i)->status(epoch); + file.close(); } - if (append) file.open(fileName.c_str(), ofstream::app); - else + else if (parallelMode == PM_TRAIN_ALL) { - file.open(fileName.c_str()); - appendLinesToFile(file, updaters.at(i)->statusHeader()); + updaters.at(i)->status(epoch); } - file << updaters.at(i)->status(epoch); - file.close(); } return; @@ -2062,8 +2069,9 @@ void Training::loop() // Write learning curve. if (myRank == 0) writeLearningCurve(false); - // Write updater status to file. - if (myRank == 0) writeUpdaterStatus(false); + // Write updater status to file (decoupled KF requires all tasks to execute + // the status() function. + writeUpdaterStatus(false); // Write neuron statistics. writeNeuronStatisticsEpoch(); @@ -2135,7 +2143,7 @@ void Training::loop() if (myRank == 0) writeLearningCurve(true); // Write updater status to file. - if (myRank == 0) writeUpdaterStatus(true); + writeUpdaterStatus(true); // Write neuron statistics. writeNeuronStatisticsEpoch(); diff --git a/src/makefile.gnu b/src/makefile.gnu index f35aeb0b6..ece0147ce 100644 --- a/src/makefile.gnu +++ b/src/makefile.gnu @@ -14,8 +14,7 @@ PROJECT_EIGEN=/usr/include/eigen3/ PROJECT_CC=g++ PROJECT_MPICC=mpic++ # OpenMP parallelization is disabled by default, add flag "-fopenmp" to enable. -PROJECT_CFLAGS=-O0 -#3 -march=native -std=c++11 +PROJECT_CFLAGS=-O3 -march=native -std=c++11 PROJECT_CFLAGS_MPI=-Wno-long-long PROJECT_DEBUG=-g -pedantic-errors -Wall -Wextra PROJECT_TEST=--coverage -fno-default-inline -fno-inline -fno-inline-small-functions -fno-elide-constructors