From 06784701d8358d94280bd9f08be2967b20e0bf24 Mon Sep 17 00:00:00 2001 From: "jijoong.moon" Date: Tue, 7 Jan 2025 10:00:59 +0900 Subject: [PATCH] [ Layer ] Move the Weight Read Function to Layer object It is more flexible to moe Weight Read Fuction to Layer Object itself, rather than having layer_node. Sometimes, it is necessary to change the read weight depending on Layer Type, such as Batch normalization layer. This PR move general read implementation to Layer_devel and layer is able to override it as it needs. **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: jijoong.moon --- nntrainer/layers/bn_layer.cpp | 45 +++++++++++++++++++++++++++++++++ nntrainer/layers/bn_layer.h | 9 +++++++ nntrainer/layers/layer_devel.h | 40 ++++++++++++++++++++++++++++- nntrainer/layers/layer_node.cpp | 45 +++------------------------------ 4 files changed, 96 insertions(+), 43 deletions(-) diff --git a/nntrainer/layers/bn_layer.cpp b/nntrainer/layers/bn_layer.cpp index bd0b2f7fce..8334a9a1c3 100644 --- a/nntrainer/layers/bn_layer.cpp +++ b/nntrainer/layers/bn_layer.cpp @@ -376,4 +376,49 @@ void BatchNormalizationLayer::setBatch(RunLayerContext &context, } } +void BatchNormalizationLayer::read(std::ifstream &file, + RunLayerContext &run_context, bool opt_var, + ml::train::ExecutionMode mode, + bool trainable, + TensorDim::DataType definedWeightDataType) { + if (opt_var) { + for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) { + if (run_context.isGradientLastAccess(i) && trainable) { + /// @note read optimizer variables + for (unsigned int j = 0; j < run_context.getNumWeightOptVar(i); ++j) { + run_context.getWeightOptVar(i, j).read(file); + } + } + } + } else { + for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) { + /// @note shared weights are only be read at the first acecss + // if (run_context->isGradientLastAccess(i)) { + if (run_context.isGradientFirstAccess(i)) { + if ((mode == ml::train::ExecutionMode::TRAIN) && + (definedWeightDataType != TensorDim::DataType::FP32)) { + + /** @note for batch normalization layer, we do need full + precision + * for training. but weight can be saved with other type. for + * training, bn weight type is fixed with full precsion */ + + TensorDim dim = run_context.getWeight(i).getDim(); + dim.setDataType(definedWeightDataType); + Tensor T_read(dim, true); + T_read.read(file); + run_context.getWeight(i).copyData(T_read); + } else { + run_context.getWeight(i).read(file); + } + + if (run_context.isMixedPrecision(i) && trainable && + !run_context.getWeightFP32(i).empty()) { + run_context.getWeightFP32(i).copyData(run_context.getWeight(i)); + } + } + } + } +} + } /* namespace nntrainer */ diff --git a/nntrainer/layers/bn_layer.h b/nntrainer/layers/bn_layer.h index d61370d187..4d9692e7b0 100644 --- a/nntrainer/layers/bn_layer.h +++ b/nntrainer/layers/bn_layer.h @@ -124,6 +124,15 @@ class BatchNormalizationLayer : public Layer { inline static const std::string type = "batch_normalization"; + /** + * @copydoc Layer::read(std::ifstream &file, RunLayerContext &context, bool + * opt_var, ml::train::ExecutionMode mode, bool trainable, TensorDim::DataType + * definedWeightDataType) + */ + void read(std::ifstream &file, RunLayerContext &context, bool opt_var, + ml::train::ExecutionMode mode, bool trainable, + TensorDim::DataType definedWeightDataType) override; + private: float divider; /**< size of the axes of the reduced */ diff --git a/nntrainer/layers/layer_devel.h b/nntrainer/layers/layer_devel.h index ef0f181708..2dcfd9e6d4 100644 --- a/nntrainer/layers/layer_devel.h +++ b/nntrainer/layers/layer_devel.h @@ -29,6 +29,7 @@ #include #include +#include #include namespace ml::train { @@ -38,7 +39,6 @@ class Layer; namespace nntrainer { class InitLayerContext; -class RunLayerContext; class Exporter; /** @@ -315,6 +315,44 @@ class Layer { */ virtual bool supportBackwarding() const = 0; + /** + * @brief read layer Weight & Bias data from file + * @param file input file stream + * @param run context for layer + * @param bool read optimizer variables + * @param mode execution mode + * @param bool trainable + * @param type Required Weight Tensor Type from Network + * + */ + virtual void read(std::ifstream &file, RunLayerContext &run_context, + bool opt_var, ml::train::ExecutionMode mode, bool trainable, + TensorDim::DataType defineWeightDataType) { + if (opt_var) { + for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) { + if (run_context.isGradientLastAccess(i) && trainable) { + /// @note read optimizer variables + for (unsigned int j = 0; j < run_context.getNumWeightOptVar(i); ++j) { + run_context.getWeightOptVar(i, j).read(file); + } + } + } + } else { + + for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) { + /// @note shared weights are only be read at the first acecss + if (run_context.isGradientFirstAccess(i)) { + run_context.getWeight(i).read(file); + + if (run_context.isMixedPrecision(i) && trainable && + !run_context.getWeightFP32(i).empty()) { + run_context.getWeightFP32(i).copyData(run_context.getWeight(i)); + } + } + } + } + } + protected: bool is_inplace = false; /**< whether this layer is in-place or not */ }; diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp index a045c8c7c7..0bbb3dd804 100644 --- a/nntrainer/layers/layer_node.cpp +++ b/nntrainer/layers/layer_node.cpp @@ -500,51 +500,12 @@ void LayerNode::exportTo(Exporter &exporter, void LayerNode::read(std::ifstream &file, bool opt_var, ml::train::ExecutionMode mode) { + NNTR_THROW_IF(!run_context, std::runtime_error) << __func__ << " layer needs to be finalized first!"; - if (opt_var) { - for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { - if (run_context->isGradientLastAccess(i) && getTrainable()) { - /// @note read optimizer variables - for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) { - run_context->getWeightOptVar(i, j).read(file); - } - } - } - } else { - - for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { - /// @note shared weights are only be read at the first acecss - // if (run_context->isGradientLastAccess(i)) { - if (run_context->isGradientFirstAccess(i)) { - if (layer->getType() == BatchNormalizationLayer::type) { - if ((mode == ml::train::ExecutionMode::TRAIN) && - (this->getWeightDataType() != TensorDim::DataType::FP32)) { - - /** @note for batch normalization layer, we do need full precision - * for training. but weight can be saved with other type. for - * training, bn weight type is fixed with full precsion */ - - TensorDim dim = run_context->getWeight(i).getDim(); - dim.setDataType(this->getWeightDataType()); - Tensor T_read(dim, true); - T_read.read(file); - run_context->getWeight(i).copyData(T_read); - } else { - run_context->getWeight(i).read(file); - } - } else { - run_context->getWeight(i).read(file); - } - - if (run_context->isMixedPrecision(i) && getTrainable() && - !run_context->getWeightFP32(i).empty()) { - run_context->getWeightFP32(i).copyData(run_context->getWeight(i)); - } - } - } - } + getLayer()->read(file, *run_context, opt_var, mode, getTrainable(), + getWeightDataType()); } void LayerNode::save(std::ofstream &file, bool opt_var,