[FSU] update layer weight load logic at fsu

at case of fsu, layer's load not needed - add swap parm - when swap enabled : load not work - update SimpleFC Application **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghak PARK <[email protected]>
nnstreamer · Jan 3, 2025 · 08e32b3 · 08e32b3
1 parent 56f0146
commit 08e32b3
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 26 deletions.
diff --git a/Applications/SimpleFC/jni/main.cpp b/Applications/SimpleFC/jni/main.cpp
@@ -12,10 +12,10 @@
 #include <array>
 #include <chrono>
 #include <ctime>
+#include <fstream>
 #include <iostream>
 #include <memory>
 #include <sstream>
-#include <fstream>
 #include <vector>
 
 #include <layer.h>
@@ -30,7 +30,6 @@
 using LayerHandle = std::shared_ptr<ml::train::Layer>;
 using ModelHandle = std::unique_ptr<ml::train::Model>;
 
-
 /**
  * @brief make "key=value" from key and value
  *
@@ -79,10 +78,10 @@ std::vector<LayerHandle> createGraph() {
     createLayer("input", {withKey("name", "input0"),
                           withKey("input_shape", "1:1024:1440")}));
 
-  for (int i = 0; i < 5; i++) {
+  for (int i = 0; i < 30; i++) {
     layers.push_back(createLayer(
       "fully_connected",
-      {withKey("unit", 1440), withKey("weight_initializer", "xavier_uniform"),
+      {withKey("unit", 10000), withKey("weight_initializer", "xavier_uniform"),
        withKey("bias_initializer", "zeros")}));
   }
   layers.push_back(createLayer("fully_connected",
@@ -158,12 +157,16 @@ void createAndRun(unsigned int epochs, unsigned int batch_size,
   std::string filePath = "./simplefc_weight_fp16_fp16_100.bin";
   if (access(filePath.c_str(), F_OK) == 0) {
     model->load(filePath);
-
+    auto load_end = std::chrono::system_clock::now();
+    std::chrono::duration<double> load_elapsed_seconds = load_end - start;
+    std::time_t load_end_time = std::chrono::system_clock::to_time_t(load_end);
+    std::cout << "Load finished computation at " << std::ctime(&load_end_time)
+              << "elapsed time: " << load_elapsed_seconds.count() << "s\n";
   } else {
     model->save(filePath, ml::train::ModelFormat::MODEL_FORMAT_BIN);
     model->load(filePath);
   }
-
+  exit(0);
   // model->summarize(std::cout, ML_TRAIN_SUMMARY_MODEL);
 
   answer = model->inference(1, in, l);

diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp
@@ -499,7 +499,7 @@ void LayerNode::exportTo(Exporter &exporter,
 }
 
 void LayerNode::read(std::ifstream &file, bool opt_var,
-                     ml::train::ExecutionMode mode) {
+                     ml::train::ExecutionMode mode, bool swap) {
   NNTR_THROW_IF(!run_context, std::runtime_error)
     << __func__ << " layer needs to be finalized first!";
 
@@ -518,24 +518,21 @@ void LayerNode::read(std::ifstream &file, bool opt_var,
       /// @note shared weights are only be read at the first acecss
       //      if (run_context->isGradientLastAccess(i)) {
       if (run_context->isGradientFirstAccess(i)) {
-        if (layer->getType() == BatchNormalizationLayer::type) {
-          if ((mode == ml::train::ExecutionMode::TRAIN) &&
-              (this->getWeightDataType() != TensorDim::DataType::FP32)) {
-
-            /** @note for batch normalization layer, we do need full precision
-             * for training. but weight can be saved with other type. for
-             * training, bn weight type is fixed with full precsion */
-
-            TensorDim dim = run_context->getWeight(i).getDim();
-            dim.setDataType(this->getWeightDataType());
-            Tensor T_read(dim, true);
-            T_read.read(file);
-            run_context->getWeight(i).copyData(T_read);
-          } else {
-            run_context->getWeight(i).read(file);
-          }
+        if (layer->getType() == BatchNormalizationLayer::type &&
+            mode == ml::train::ExecutionMode::TRAIN &&
+            (this->getWeightDataType() != TensorDim::DataType::FP32)) {
+          /** @note for batch normalization layer, we do need full precision
+           * for training. but weight can be saved with other type. for
+           * training, bn weight type is fixed with full precsion */
+
+          TensorDim dim = run_context->getWeight(i).getDim();
+          dim.setDataType(this->getWeightDataType());
+          Tensor T_read(dim, true);
+          T_read.read(file);
+          run_context->getWeight(i).copyData(T_read);
         } else {
-          run_context->getWeight(i).read(file);
+          if (!swap)
+            run_context->getWeight(i).read(file);
         }
 
         if (run_context->isMixedPrecision(i) && getTrainable() &&

diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h
@@ -753,7 +753,8 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
    * @param bool read optimizer variables
    */
   void read(std::ifstream &file, bool opt_var = false,
-            ml::train::ExecutionMode mode = ml::train::ExecutionMode::TRAIN);
+            ml::train::ExecutionMode mode = ml::train::ExecutionMode::TRAIN,
+            bool swap = false);
 
   /**
    * @brief     save layer Weight & Bias data from file

diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp
@@ -651,6 +651,7 @@ void NeuralNetwork::load(const std::string &file_path,
                          ml::train::ModelFormat format) {
   /// @todo this switch case should be delegating the function call only. It's
   /// not delegating for now as required logics are manageable for now.
+  bool swap_mode = std::get<props::MemorySwap>(model_flex_props);
   switch (format) {
   case ml::train::ModelFormat::MODEL_FORMAT_BIN: {
     NNTR_THROW_IF(!initialized, std::runtime_error)
@@ -660,7 +661,7 @@ void NeuralNetwork::load(const std::string &file_path,
     auto model_file = checkedOpenStream<std::ifstream>(
       file_path, std::ios::in | std::ios::binary);
     for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
-      (*iter)->read(model_file, false, exec_mode);
+      (*iter)->read(model_file, false, exec_mode, swap_mode);
     }
     try {
       /// this is assuming that the failure is allowed at the end of the file