Skip to content

Commit

Permalink
[FSU] update layer weight load logic at fsu
Browse files Browse the repository at this point in the history
at case of fsu, layer's load not needed
- add swap parm
- when swap enabled : load not work
- update SimpleFC Application

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghak PARK <[email protected]>
  • Loading branch information
DonghakPark committed Jan 3, 2025
1 parent 56f0146 commit 08e32b3
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 26 deletions.
15 changes: 9 additions & 6 deletions Applications/SimpleFC/jni/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
#include <array>
#include <chrono>
#include <ctime>
#include <fstream>
#include <iostream>
#include <memory>
#include <sstream>
#include <fstream>
#include <vector>

#include <layer.h>
Expand All @@ -30,7 +30,6 @@
using LayerHandle = std::shared_ptr<ml::train::Layer>;
using ModelHandle = std::unique_ptr<ml::train::Model>;


/**
* @brief make "key=value" from key and value
*
Expand Down Expand Up @@ -79,10 +78,10 @@ std::vector<LayerHandle> createGraph() {
createLayer("input", {withKey("name", "input0"),
withKey("input_shape", "1:1024:1440")}));

for (int i = 0; i < 5; i++) {
for (int i = 0; i < 30; i++) {
layers.push_back(createLayer(
"fully_connected",
{withKey("unit", 1440), withKey("weight_initializer", "xavier_uniform"),
{withKey("unit", 10000), withKey("weight_initializer", "xavier_uniform"),
withKey("bias_initializer", "zeros")}));
}
layers.push_back(createLayer("fully_connected",
Expand Down Expand Up @@ -158,12 +157,16 @@ void createAndRun(unsigned int epochs, unsigned int batch_size,
std::string filePath = "./simplefc_weight_fp16_fp16_100.bin";
if (access(filePath.c_str(), F_OK) == 0) {
model->load(filePath);

auto load_end = std::chrono::system_clock::now();
std::chrono::duration<double> load_elapsed_seconds = load_end - start;
std::time_t load_end_time = std::chrono::system_clock::to_time_t(load_end);
std::cout << "Load finished computation at " << std::ctime(&load_end_time)
<< "elapsed time: " << load_elapsed_seconds.count() << "s\n";
} else {
model->save(filePath, ml::train::ModelFormat::MODEL_FORMAT_BIN);
model->load(filePath);
}

exit(0);
// model->summarize(std::cout, ML_TRAIN_SUMMARY_MODEL);

answer = model->inference(1, in, l);
Expand Down
33 changes: 15 additions & 18 deletions nntrainer/layers/layer_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ void LayerNode::exportTo(Exporter &exporter,
}

void LayerNode::read(std::ifstream &file, bool opt_var,
ml::train::ExecutionMode mode) {
ml::train::ExecutionMode mode, bool swap) {
NNTR_THROW_IF(!run_context, std::runtime_error)
<< __func__ << " layer needs to be finalized first!";

Expand All @@ -518,24 +518,21 @@ void LayerNode::read(std::ifstream &file, bool opt_var,
/// @note shared weights are only be read at the first acecss
// if (run_context->isGradientLastAccess(i)) {
if (run_context->isGradientFirstAccess(i)) {
if (layer->getType() == BatchNormalizationLayer::type) {
if ((mode == ml::train::ExecutionMode::TRAIN) &&
(this->getWeightDataType() != TensorDim::DataType::FP32)) {

/** @note for batch normalization layer, we do need full precision
* for training. but weight can be saved with other type. for
* training, bn weight type is fixed with full precsion */

TensorDim dim = run_context->getWeight(i).getDim();
dim.setDataType(this->getWeightDataType());
Tensor T_read(dim, true);
T_read.read(file);
run_context->getWeight(i).copyData(T_read);
} else {
run_context->getWeight(i).read(file);
}
if (layer->getType() == BatchNormalizationLayer::type &&
mode == ml::train::ExecutionMode::TRAIN &&
(this->getWeightDataType() != TensorDim::DataType::FP32)) {
/** @note for batch normalization layer, we do need full precision
* for training. but weight can be saved with other type. for
* training, bn weight type is fixed with full precsion */

TensorDim dim = run_context->getWeight(i).getDim();
dim.setDataType(this->getWeightDataType());
Tensor T_read(dim, true);
T_read.read(file);
run_context->getWeight(i).copyData(T_read);
} else {
run_context->getWeight(i).read(file);
if (!swap)
run_context->getWeight(i).read(file);
}

if (run_context->isMixedPrecision(i) && getTrainable() &&
Expand Down
3 changes: 2 additions & 1 deletion nntrainer/layers/layer_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,8 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
* @param bool read optimizer variables
*/
void read(std::ifstream &file, bool opt_var = false,
ml::train::ExecutionMode mode = ml::train::ExecutionMode::TRAIN);
ml::train::ExecutionMode mode = ml::train::ExecutionMode::TRAIN,
bool swap = false);

/**
* @brief save layer Weight & Bias data from file
Expand Down
3 changes: 2 additions & 1 deletion nntrainer/models/neuralnet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,7 @@ void NeuralNetwork::load(const std::string &file_path,
ml::train::ModelFormat format) {
/// @todo this switch case should be delegating the function call only. It's
/// not delegating for now as required logics are manageable for now.
bool swap_mode = std::get<props::MemorySwap>(model_flex_props);
switch (format) {
case ml::train::ModelFormat::MODEL_FORMAT_BIN: {
NNTR_THROW_IF(!initialized, std::runtime_error)
Expand All @@ -660,7 +661,7 @@ void NeuralNetwork::load(const std::string &file_path,
auto model_file = checkedOpenStream<std::ifstream>(
file_path, std::ios::in | std::ios::binary);
for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
(*iter)->read(model_file, false, exec_mode);
(*iter)->read(model_file, false, exec_mode, swap_mode);
}
try {
/// this is assuming that the failure is allowed at the end of the file
Expand Down

0 comments on commit 08e32b3

Please sign in to comment.