From 08e32b322eb1009571f5b56192d4dbc5bfa6d8e5 Mon Sep 17 00:00:00 2001
From: Donghak PARK <donghak.park@samsung.com>
Date: Fri, 3 Jan 2025 15:43:18 +0900
Subject: [PATCH] [FSU] update layer weight load logic at fsu

at case of fsu, layer's load not needed
- add swap parm
- when swap enabled : load not work
- update SimpleFC Application

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghak PARK <donghak.park@samsung.com>
---
 Applications/SimpleFC/jni/main.cpp | 15 ++++++++------
 nntrainer/layers/layer_node.cpp    | 33 ++++++++++++++----------------
 nntrainer/layers/layer_node.h      |  3 ++-
 nntrainer/models/neuralnet.cpp     |  3 ++-
 4 files changed, 28 insertions(+), 26 deletions(-)
diff --git a/Applications/SimpleFC/jni/main.cpp b/Applications/SimpleFC/jni/main.cpp
index c6bbfa9e7..1667eb3e4 100644
--- a/Applications/SimpleFC/jni/main.cpp
+++ b/Applications/SimpleFC/jni/main.cpp
@@ -12,10 +12,10 @@
 #include <array>
 #include <chrono>
 #include <ctime>
+#include <fstream>
 #include <iostream>
 #include <memory>
 #include <sstream>
-#include <fstream>
 #include <vector>
 
 #include <layer.h>
@@ -30,7 +30,6 @@
 using LayerHandle = std::shared_ptr<ml::train::Layer>;
 using ModelHandle = std::unique_ptr<ml::train::Model>;
 
-
 /**
  * @brief make "key=value" from key and value
  *
@@ -79,10 +78,10 @@ std::vector<LayerHandle> createGraph() {
     createLayer("input", {withKey("name", "input0"),
                           withKey("input_shape", "1:1024:1440")}));
 
-  for (int i = 0; i < 5; i++) {
+  for (int i = 0; i < 30; i++) {
     layers.push_back(createLayer(
       "fully_connected",
-      {withKey("unit", 1440), withKey("weight_initializer", "xavier_uniform"),
+      {withKey("unit", 10000), withKey("weight_initializer", "xavier_uniform"),
        withKey("bias_initializer", "zeros")}));
   }
   layers.push_back(createLayer("fully_connected",
@@ -158,12 +157,16 @@ void createAndRun(unsigned int epochs, unsigned int batch_size,
   std::string filePath = "./simplefc_weight_fp16_fp16_100.bin";
   if (access(filePath.c_str(), F_OK) == 0) {
     model->load(filePath);
-
+    auto load_end = std::chrono::system_clock::now();
+    std::chrono::duration<double> load_elapsed_seconds = load_end - start;
+    std::time_t load_end_time = std::chrono::system_clock::to_time_t(load_end);
+    std::cout << "Load finished computation at " << std::ctime(&load_end_time)
+              << "elapsed time: " << load_elapsed_seconds.count() << "s\n";
   } else {
     model->save(filePath, ml::train::ModelFormat::MODEL_FORMAT_BIN);
     model->load(filePath);
   }
-
+  exit(0);
   // model->summarize(std::cout, ML_TRAIN_SUMMARY_MODEL);
 
   answer = model->inference(1, in, l);
diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp
index a045c8c7c..c9c05f9f5 100644
--- a/nntrainer/layers/layer_node.cpp
+++ b/nntrainer/layers/layer_node.cpp
@@ -499,7 +499,7 @@ void LayerNode::exportTo(Exporter &exporter,
 }
 
 void LayerNode::read(std::ifstream &file, bool opt_var,
-                     ml::train::ExecutionMode mode) {
+                     ml::train::ExecutionMode mode, bool swap) {
   NNTR_THROW_IF(!run_context, std::runtime_error)
     << __func__ << " layer needs to be finalized first!";
 
@@ -518,24 +518,21 @@ void LayerNode::read(std::ifstream &file, bool opt_var,
       /// @note shared weights are only be read at the first acecss
       //      if (run_context->isGradientLastAccess(i)) {
       if (run_context->isGradientFirstAccess(i)) {
-        if (layer->getType() == BatchNormalizationLayer::type) {
-          if ((mode == ml::train::ExecutionMode::TRAIN) &&
-              (this->getWeightDataType() != TensorDim::DataType::FP32)) {
-
-            /** @note for batch normalization layer, we do need full precision
-             * for training. but weight can be saved with other type. for
-             * training, bn weight type is fixed with full precsion */
-
-            TensorDim dim = run_context->getWeight(i).getDim();
-            dim.setDataType(this->getWeightDataType());
-            Tensor T_read(dim, true);
-            T_read.read(file);
-            run_context->getWeight(i).copyData(T_read);
-          } else {
-            run_context->getWeight(i).read(file);
-          }
+        if (layer->getType() == BatchNormalizationLayer::type &&
+            mode == ml::train::ExecutionMode::TRAIN &&
+            (this->getWeightDataType() != TensorDim::DataType::FP32)) {
+          /** @note for batch normalization layer, we do need full precision
+           * for training. but weight can be saved with other type. for
+           * training, bn weight type is fixed with full precsion */
+
+          TensorDim dim = run_context->getWeight(i).getDim();
+          dim.setDataType(this->getWeightDataType());
+          Tensor T_read(dim, true);
+          T_read.read(file);
+          run_context->getWeight(i).copyData(T_read);
         } else {
-          run_context->getWeight(i).read(file);
+          if (!swap)
+            run_context->getWeight(i).read(file);
         }
 
         if (run_context->isMixedPrecision(i) && getTrainable() &&
diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h
index 6e6084968..00db03e9b 100644
--- a/nntrainer/layers/layer_node.h
+++ b/nntrainer/layers/layer_node.h
@@ -753,7 +753,8 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
    * @param bool read optimizer variables
    */
   void read(std::ifstream &file, bool opt_var = false,
-            ml::train::ExecutionMode mode = ml::train::ExecutionMode::TRAIN);
+            ml::train::ExecutionMode mode = ml::train::ExecutionMode::TRAIN,
+            bool swap = false);
 
   /**
    * @brief     save layer Weight & Bias data from file
diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp
index 6128cf060..ed0b579f8 100644
--- a/nntrainer/models/neuralnet.cpp
+++ b/nntrainer/models/neuralnet.cpp
@@ -651,6 +651,7 @@ void NeuralNetwork::load(const std::string &file_path,
                          ml::train::ModelFormat format) {
   /// @todo this switch case should be delegating the function call only. It's
   /// not delegating for now as required logics are manageable for now.
+  bool swap_mode = std::get<props::MemorySwap>(model_flex_props);
   switch (format) {
   case ml::train::ModelFormat::MODEL_FORMAT_BIN: {
     NNTR_THROW_IF(!initialized, std::runtime_error)
@@ -660,7 +661,7 @@ void NeuralNetwork::load(const std::string &file_path,
     auto model_file = checkedOpenStream<std::ifstream>(
       file_path, std::ios::in | std::ios::binary);
     for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
-      (*iter)->read(model_file, false, exec_mode);
+      (*iter)->read(model_file, false, exec_mode, swap_mode);
     }
     try {
       /// this is assuming that the failure is allowed at the end of the file