From 08e32b322eb1009571f5b56192d4dbc5bfa6d8e5 Mon Sep 17 00:00:00 2001 From: Donghak PARK Date: Fri, 3 Jan 2025 15:43:18 +0900 Subject: [PATCH] [FSU] update layer weight load logic at fsu at case of fsu, layer's load not needed - add swap parm - when swap enabled : load not work - update SimpleFC Application **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghak PARK --- Applications/SimpleFC/jni/main.cpp | 15 ++++++++------ nntrainer/layers/layer_node.cpp | 33 ++++++++++++++---------------- nntrainer/layers/layer_node.h | 3 ++- nntrainer/models/neuralnet.cpp | 3 ++- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/Applications/SimpleFC/jni/main.cpp b/Applications/SimpleFC/jni/main.cpp index c6bbfa9e7..1667eb3e4 100644 --- a/Applications/SimpleFC/jni/main.cpp +++ b/Applications/SimpleFC/jni/main.cpp @@ -12,10 +12,10 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -30,7 +30,6 @@ using LayerHandle = std::shared_ptr; using ModelHandle = std::unique_ptr; - /** * @brief make "key=value" from key and value * @@ -79,10 +78,10 @@ std::vector createGraph() { createLayer("input", {withKey("name", "input0"), withKey("input_shape", "1:1024:1440")})); - for (int i = 0; i < 5; i++) { + for (int i = 0; i < 30; i++) { layers.push_back(createLayer( "fully_connected", - {withKey("unit", 1440), withKey("weight_initializer", "xavier_uniform"), + {withKey("unit", 10000), withKey("weight_initializer", "xavier_uniform"), withKey("bias_initializer", "zeros")})); } layers.push_back(createLayer("fully_connected", @@ -158,12 +157,16 @@ void createAndRun(unsigned int epochs, unsigned int batch_size, std::string filePath = "./simplefc_weight_fp16_fp16_100.bin"; if (access(filePath.c_str(), F_OK) == 0) { model->load(filePath); - + auto load_end = std::chrono::system_clock::now(); + std::chrono::duration load_elapsed_seconds = load_end - start; + std::time_t load_end_time = std::chrono::system_clock::to_time_t(load_end); + std::cout << "Load finished computation at " << std::ctime(&load_end_time) + << "elapsed time: " << load_elapsed_seconds.count() << "s\n"; } else { model->save(filePath, ml::train::ModelFormat::MODEL_FORMAT_BIN); model->load(filePath); } - + exit(0); // model->summarize(std::cout, ML_TRAIN_SUMMARY_MODEL); answer = model->inference(1, in, l); diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp index a045c8c7c..c9c05f9f5 100644 --- a/nntrainer/layers/layer_node.cpp +++ b/nntrainer/layers/layer_node.cpp @@ -499,7 +499,7 @@ void LayerNode::exportTo(Exporter &exporter, } void LayerNode::read(std::ifstream &file, bool opt_var, - ml::train::ExecutionMode mode) { + ml::train::ExecutionMode mode, bool swap) { NNTR_THROW_IF(!run_context, std::runtime_error) << __func__ << " layer needs to be finalized first!"; @@ -518,24 +518,21 @@ void LayerNode::read(std::ifstream &file, bool opt_var, /// @note shared weights are only be read at the first acecss // if (run_context->isGradientLastAccess(i)) { if (run_context->isGradientFirstAccess(i)) { - if (layer->getType() == BatchNormalizationLayer::type) { - if ((mode == ml::train::ExecutionMode::TRAIN) && - (this->getWeightDataType() != TensorDim::DataType::FP32)) { - - /** @note for batch normalization layer, we do need full precision - * for training. but weight can be saved with other type. for - * training, bn weight type is fixed with full precsion */ - - TensorDim dim = run_context->getWeight(i).getDim(); - dim.setDataType(this->getWeightDataType()); - Tensor T_read(dim, true); - T_read.read(file); - run_context->getWeight(i).copyData(T_read); - } else { - run_context->getWeight(i).read(file); - } + if (layer->getType() == BatchNormalizationLayer::type && + mode == ml::train::ExecutionMode::TRAIN && + (this->getWeightDataType() != TensorDim::DataType::FP32)) { + /** @note for batch normalization layer, we do need full precision + * for training. but weight can be saved with other type. for + * training, bn weight type is fixed with full precsion */ + + TensorDim dim = run_context->getWeight(i).getDim(); + dim.setDataType(this->getWeightDataType()); + Tensor T_read(dim, true); + T_read.read(file); + run_context->getWeight(i).copyData(T_read); } else { - run_context->getWeight(i).read(file); + if (!swap) + run_context->getWeight(i).read(file); } if (run_context->isMixedPrecision(i) && getTrainable() && diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h index 6e6084968..00db03e9b 100644 --- a/nntrainer/layers/layer_node.h +++ b/nntrainer/layers/layer_node.h @@ -753,7 +753,8 @@ class LayerNode final : public ml::train::Layer, public GraphNode { * @param bool read optimizer variables */ void read(std::ifstream &file, bool opt_var = false, - ml::train::ExecutionMode mode = ml::train::ExecutionMode::TRAIN); + ml::train::ExecutionMode mode = ml::train::ExecutionMode::TRAIN, + bool swap = false); /** * @brief save layer Weight & Bias data from file diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index 6128cf060..ed0b579f8 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -651,6 +651,7 @@ void NeuralNetwork::load(const std::string &file_path, ml::train::ModelFormat format) { /// @todo this switch case should be delegating the function call only. It's /// not delegating for now as required logics are manageable for now. + bool swap_mode = std::get(model_flex_props); switch (format) { case ml::train::ModelFormat::MODEL_FORMAT_BIN: { NNTR_THROW_IF(!initialized, std::runtime_error) @@ -660,7 +661,7 @@ void NeuralNetwork::load(const std::string &file_path, auto model_file = checkedOpenStream( file_path, std::ios::in | std::ios::binary); for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) { - (*iter)->read(model_file, false, exec_mode); + (*iter)->read(model_file, false, exec_mode, swap_mode); } try { /// this is assuming that the failure is allowed at the end of the file