diff --git a/nntrainer/tensor/char_tensor.cpp b/nntrainer/tensor/char_tensor.cpp index 0d42ccded..29a67e9b9 100644 --- a/nntrainer/tensor/char_tensor.cpp +++ b/nntrainer/tensor/char_tensor.cpp @@ -17,18 +17,18 @@ namespace nntrainer { -CharTensor::CharTensor(std::string name_, Tformat fm) : +CharTensor::CharTensor(std::string name_, Tformat fm, QScheme qscheme_) : TensorBase(name_, fm, Tdatatype::QINT8) {} CharTensor::CharTensor(const TensorDim &d, bool alloc_now, Initializer init, - std::string name) : - TensorBase(d, alloc_now, init, name) { + std::string name, QScheme qscheme_) : + TensorBase(d, alloc_now, init, name), qscheme(qscheme_) { if (alloc_now) allocate(); } -CharTensor::CharTensor(const TensorDim &d, const void *buf) : - CharTensor(d, true) { +CharTensor::CharTensor(const TensorDim &d, const void *buf, QScheme qscheme_) : + CharTensor(d, true, Initializer::NONE, "", qscheme_) { if (d.getDataLen() != 0) { if (buf != nullptr) copy(buf); @@ -37,7 +37,7 @@ CharTensor::CharTensor(const TensorDim &d, const void *buf) : CharTensor::CharTensor( std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d, - Tformat fm) { + std::vector<float> const &scales, Tformat fm, QScheme qscheme_) { if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) { throw std::out_of_range( "[Tensor] trying to initialize CharTensor from empty vector"); @@ -59,9 +59,14 @@ CharTensor::CharTensor( strides = dim.computeStrides(); contiguous = true; initializer = Initializer::NONE; + qscheme = qscheme_; - MemoryData *mem_data = - new MemoryData((void *)(new int8_t[dim.getDataLen()]())); + NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument) + << "invalid scale factor size " << scales.size(); + + /// @note 4 * scale_size() assumes scale factors are in full-precision fp. + MemoryData *mem_data = new MemoryData( + (void *)(new int8_t[dim.getDataLen() + sizeof(float) * scale_size()]())); data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) { delete[] mem_data->getAddr<int8_t>(); }); @@ -84,9 +89,16 @@ CharTensor::CharTensor( for (unsigned int l = 0; l < channel(); ++l) this->setValue(i, l, j, k, d[i][j][k][l]); } + + // copy scale factors + scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1); } bool CharTensor::operator==(const CharTensor &rhs) const { + if (qscheme != rhs.qscheme) + return false; + + // compare quantized data const int8_t *_data = (int8_t *)getData(); const int8_t *_rdata = (int8_t *)rhs.getData(); for (size_t i = 0; i < size(); ++i) { @@ -94,6 +106,14 @@ bool CharTensor::operator==(const CharTensor &rhs) const { return false; } + // compare scale factors + const float *_scales = (float *)getScale(); + const float *_rscales = (float *)rhs.getScale(); + for (size_t i = 0; i < scale_size(); ++i) { + if (std::fabs(_scales[i] - _rscales[i]) > 1e-5) + return false; + } + return true; } @@ -109,7 +129,8 @@ void CharTensor::allocate() { /// allocate new memory for the tensor data MemoryData *mem_data; - mem_data = new MemoryData((void *)(new int8_t[dim.getDataLen()]{})); + mem_data = new MemoryData( + (void *)(new int8_t[dim.getDataLen() + 4 * scale_size()]{})); data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) { delete[] mem_data->template getAddr<int8_t>(); delete mem_data; @@ -141,6 +162,25 @@ void *CharTensor::getData(size_t idx) const { return data->getAddr<int8_t>() + offset + idx; } +void *CharTensor::getScale() const { + if (!data) + return nullptr; + + data->validate(); + return ((int8_t *)getData()) + size(); +} + +void *CharTensor::getScale(size_t idx) const { + NNTR_THROW_IF(idx > scale_size(), std::invalid_argument) + << "Tensor::getScale() index is not valid"; + + if (!data) + return nullptr; + + data->validate(); + return ((float *)getScale()) + idx; +} + void *CharTensor::getAddress(unsigned int i) { size_t index = getIndex(batch(), channel(), height(), width()); if (i > index) { @@ -349,9 +389,39 @@ void CharTensor::print(std::ostream &out) const { out.copyfmt(init); } - /// @todo print quantization information + /// print quantization information + const float *q_scales = (float *)getScale(); + + if (scale_size() > 50) { + out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' ' + << q_scales[2] << " ... " << q_scales[len - 3] << ' ' + << q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl; + return; + } + + out << "Scale factors: "; + for (unsigned i = 0; i < scale_size(); ++i) { + out << q_scales[i] << " "; + } + out << std::endl; +} + +size_t CharTensor::scale_size() const { + switch (qscheme) { + case QScheme::PER_TENSOR_AFFINE: + return 1; + break; + case QScheme::PER_CHANNEL_AFFINE: + return width(); + break; + default: + break; + } + return 0; } +QScheme CharTensor::q_scheme() const { return qscheme; } + void CharTensor::copy(const void *buf) { NNTR_THROW_IF(!contiguous, std::invalid_argument) << getName() << " is not contiguous, cannot copy."; @@ -360,17 +430,19 @@ void CharTensor::copy(const void *buf) { return; } - /// @todo need to optimize scopy(size(), (int8_t *)buf, 1, (int8_t *)getData(), 1); + + float *scales = (float *)(((int8_t *)buf) + size()); + scopy(scale_size(), scales, 1, (float *)getScale(), 1); } void CharTensor::save_quantization_info(std::ostream &file) { - checkedWrite(file, (char *)&axis, sizeof(uint8_t), + checkedWrite(file, (char *)&qscheme, sizeof(uint8_t), "[CharTensor::save] failed to write quantization information"); } void CharTensor::read_quantization_info(std::ifstream &file) { - checkedRead(file, (char *)&axis, sizeof(uint8_t), + checkedRead(file, (char *)&qscheme, sizeof(uint8_t), "[CharTensor::read] failed to read quantization information"); } diff --git a/nntrainer/tensor/char_tensor.h b/nntrainer/tensor/char_tensor.h index f46bb2298..cfa7b0589 100644 --- a/nntrainer/tensor/char_tensor.h +++ b/nntrainer/tensor/char_tensor.h @@ -12,6 +12,7 @@ #define __CHAR_TENSOR_H__ #ifdef __cplusplus +#include <quantizer.h> #include <tensor_base.h> namespace nntrainer { @@ -25,7 +26,8 @@ class CharTensor : public TensorBase { /** * @brief Basic Constructor of Tensor */ - CharTensor(std::string name_ = "", Tformat fm = Tformat::NCHW); + CharTensor(std::string name_ = "", Tformat fm = Tformat::NCHW, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Construct a new CharTensor object @@ -34,27 +36,33 @@ class CharTensor : public TensorBase { * @param alloc_now Allocate memory to this tensor or not * @param init Initializer for the tensor * @param name Name of the tensor + * @param qscheme_ Quantization scheme of the tensor */ CharTensor(const TensorDim &d, bool alloc_now, - Initializer init = Initializer::NONE, std::string name = ""); + Initializer init = Initializer::NONE, std::string name = "", + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Construct a new CharTensor object * * @param d Tensor dim for this tensor * @param buf buffer + * @param qscheme_ quantization scheme of the tensor */ - CharTensor(const TensorDim &d, const void *buf = nullptr); + CharTensor(const TensorDim &d, const void *buf = nullptr, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Construct a new CharTensor object * * @param d data for the Tensor + * @param scales scale factors for the Tensor * @param fm format for the Tensor + * @param qscheme_ quantization scheme of the tensor */ CharTensor( std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d, - Tformat fm); + std::vector<float> const &scales, Tformat fm, QScheme qscheme_); /** * @brief Construct a new CharTensor object @@ -101,6 +109,16 @@ class CharTensor : public TensorBase { */ void *getData(size_t idx) const override; + /** + * @copydoc Tensor::getScale() + */ + void *getScale() const override; + + /** + * @copydoc Tensor::getScale(size_t idx) + */ + void *getScale(size_t idx) const override; + /** * @brief i data index * @retval address of ith data @@ -227,11 +245,21 @@ class CharTensor : public TensorBase { */ void read_quantization_info(std::ifstream &file) override; + /** + * @copydoc Tensor::scale_size() + */ + size_t scale_size() const override; + + /** + * @copydoc Tensor::scale_size() + */ + QScheme q_scheme() const; + private: /** - * @brief quantization axis + * @brief quantization scheme */ - uint8_t axis; + QScheme qscheme; /** * @brief copy a buffer to @a this, the caller has to ensure that @a this is diff --git a/nntrainer/tensor/quantizer.cpp b/nntrainer/tensor/quantizer.cpp index 08ea039fc..b2beb6602 100644 --- a/nntrainer/tensor/quantizer.cpp +++ b/nntrainer/tensor/quantizer.cpp @@ -10,6 +10,7 @@ #include <math.h> #include <quantizer.h> +#include <tensor.h> namespace nntrainer { @@ -64,6 +65,8 @@ Tensor PerTensorAffineQuantizer::quantize(const Tensor &input, } } + *output.getScale<float>() = scale; + return output; } diff --git a/nntrainer/tensor/quantizer.h b/nntrainer/tensor/quantizer.h index 8ec075c84..f8cd3bcde 100644 --- a/nntrainer/tensor/quantizer.h +++ b/nntrainer/tensor/quantizer.h @@ -12,11 +12,16 @@ #define __QUANTIZER_H__ #ifdef __cplusplus -#include <tensor.h> +#include <memory> +#include <stdexcept> #include <unordered_map> +#include <tensor_dim.h> + namespace nntrainer { +class Tensor; + /** * @brief defines the quantization scheme * @details NNTrainer provides basic quantization schemes (e.g., Per tensor @@ -73,7 +78,8 @@ class Quantizer { * @param input Input tensor * @param qtype quantized data type */ - virtual void calculateQParams(const Tensor &input, Tdatatype qtype) = 0; + virtual void calculateQParams(const Tensor &input, + ml::train::TensorDim::DataType qtype) = 0; public: /** @@ -112,14 +118,16 @@ class Quantizer { * @param[in] input Floating point tensor to quantize * @return Tensor quantized tensor */ - virtual Tensor quantize(const Tensor &input, Tdatatype qtype) = 0; + virtual Tensor quantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) = 0; /** * @brief Dequantize a quantized tensor into a tensor. * @param[in] input Quantized tensor to dequantize * @return Tensor dequantized tensor */ - virtual Tensor dequantize(const Tensor &input, Tdatatype qtype) = 0; + virtual Tensor dequantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) = 0; /** * @brief Get quantization Scheme type. @@ -172,12 +180,14 @@ class PerTensorAffineQuantizer : public UniformQuantizer { /** * @copydoc Quantizer::quantize(const Tensor &input) */ - Tensor quantize(const Tensor &input, Tdatatype qtype) override; + Tensor quantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) override; /** * @copydoc Quantizer::dequantize(const Tensor &input) */ - Tensor dequantize(const Tensor &input, Tdatatype dtype) override; + Tensor dequantize(const Tensor &input, + ml::train::TensorDim::DataType dtype) override; /** * @copydoc Quantizer::qscheme() @@ -191,9 +201,11 @@ class PerTensorAffineQuantizer : public UniformQuantizer { long int quant_max; /** - * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype) + * @copydoc Quantizer::calculateQParams(const Tensor &input, + * ml::train::TensorDim::DataType qtype) */ - void calculateQParams(const Tensor &input, Tdatatype qtype) override; + void calculateQParams(const Tensor &input, + ml::train::TensorDim::DataType qtype) override; }; /** @@ -220,12 +232,14 @@ class PerChannelAffineQuantizer : public UniformQuantizer { /** * @copydoc Quantizer::quantize(const Tensor &input) */ - Tensor quantize(const Tensor &input, Tdatatype qtype) override; + Tensor quantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) override; /** * @copydoc Quantizer::dequantize(const Tensor &input) */ - Tensor dequantize(const Tensor &input, Tdatatype dtype) override; + Tensor dequantize(const Tensor &input, + ml::train::TensorDim::DataType dtype) override; /** * @copydoc Quantizer::qscheme() @@ -239,9 +253,11 @@ class PerChannelAffineQuantizer : public UniformQuantizer { long int quant_max; /** - * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype) + * @copydoc Quantizer::calculateQParams(const Tensor &input, + * ml::train::TensorDim::DataType qtype) */ - void calculateQParams(const Tensor &input, Tdatatype qtype) override {} + void calculateQParams(const Tensor &input, + ml::train::TensorDim::DataType qtype) override {} }; /** @@ -265,12 +281,14 @@ class BinaryCodeBasedQuantizer : public NonUniformQuantizer { /** * @copydoc Quantizer::quantize(const Tensor &input) */ - Tensor quantize(const Tensor &input, Tdatatype qtype) override; + Tensor quantize(const Tensor &input, + ml::train::TensorDim::DataType qtype) override; /** * @copydoc Quantizer::dequantize(const Tensor &input) */ - Tensor dequantize(const Tensor &input, Tdatatype dtype) override; + Tensor dequantize(const Tensor &input, + ml::train::TensorDim::DataType dtype) override; /** * @copydoc Quantizer::qscheme() @@ -279,9 +297,11 @@ class BinaryCodeBasedQuantizer : public NonUniformQuantizer { private: /** - * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype) + * @copydoc Quantizer::calculateQParams(const Tensor &input, + * ml::train::TensorDim::DataType qtype) */ - void calculateQParams(const Tensor &input, Tdatatype qtype) override {} + void calculateQParams(const Tensor &input, + ml::train::TensorDim::DataType qtype) override {} }; /** diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp index b0cbae110..af39cb641 100644 --- a/nntrainer/tensor/tensor.cpp +++ b/nntrainer/tensor/tensor.cpp @@ -27,9 +27,11 @@ namespace nntrainer { Tensor::Tensor( std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d, - ml::train::TensorDim::TensorType t_type) { - itensor = std::shared_ptr<CharTensor>(new CharTensor(d, t_type.format), - std::default_delete<CharTensor>()); + std::vector<float> const &scales, ml::train::TensorDim::TensorType t_type, + QScheme qscheme_) { + itensor = std::shared_ptr<CharTensor>( + new CharTensor(d, scales, t_type.format, qscheme_), + std::default_delete<CharTensor>()); } Tensor::Tensor( @@ -102,7 +104,7 @@ Tensor::Tensor(std::string name_, Tformat fm, Tdatatype d_type) { } Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init, - std::string name) { + std::string name, QScheme qscheme) { itensor = nullptr; if (d.getDataType() == Tdatatype::FP32) { @@ -130,9 +132,9 @@ Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init, std::shared_ptr<UInt32Tensor>(new UInt32Tensor(d, alloc_now, init, name), std::default_delete<UInt32Tensor>()); } else if (d.getDataType() == Tdatatype::QINT8) { - itensor = - std::shared_ptr<CharTensor>(new CharTensor(d, alloc_now, init, name), - std::default_delete<CharTensor>()); + itensor = std::shared_ptr<CharTensor>( + new CharTensor(d, alloc_now, init, name, qscheme), + std::default_delete<CharTensor>()); } else if (d.getDataType() == Tdatatype::BCQ) { #ifdef ENABLE_BIQGEMM itensor = @@ -150,7 +152,7 @@ Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init, } } -Tensor::Tensor(const TensorDim &d, const void *buf) { +Tensor::Tensor(const TensorDim &d, const void *buf, QScheme qscheme) { itensor = nullptr; if (d.getDataType() == Tdatatype::FP32) { @@ -173,7 +175,7 @@ Tensor::Tensor(const TensorDim &d, const void *buf) { itensor = std::shared_ptr<UInt32Tensor>( new UInt32Tensor(d, buf), std::default_delete<UInt32Tensor>()); } else if (d.getDataType() == Tdatatype::QINT8) { - itensor = std::shared_ptr<CharTensor>(new CharTensor(d, buf), + itensor = std::shared_ptr<CharTensor>(new CharTensor(d, buf, qscheme), std::default_delete<CharTensor>()); } else if (d.getDataType() == Tdatatype::BCQ) { #ifdef ENABLE_BIQGEMM @@ -1038,6 +1040,7 @@ void Tensor::copy(const Tensor &from) { } if (from.size() != 0 && size() == from.size() && + scale_size() == from.scale_size() && getDataType() == from.getDataType()) { // if tensor size and data type match, copy data itensor->copy(from); @@ -1253,6 +1256,8 @@ size_t Tensor::width() const { return itensor->width(); } size_t Tensor::scale_size() const { return itensor->scale_size(); } +QScheme Tensor::q_scheme() const { return itensor->q_scheme(); } + void Tensor::mergeAxis(unsigned int axis1, unsigned int axis2) { NNTR_THROW_IF(!getContiguous(), std::invalid_argument) << getName() << " is not contiguous, cannot merge axis"; diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h index 0e79ff10e..3b025ea61 100644 --- a/nntrainer/tensor/tensor.h +++ b/nntrainer/tensor/tensor.h @@ -63,17 +63,21 @@ class Tensor { * @param alloc_now If the memory of the tensor must be allocated * @param init Initializer for the tensor * @param name Name of the tensor + * @param qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(const TensorDim &d, bool alloc_now, - Initializer init = Initializer::NONE, std::string name = ""); + Initializer init = Initializer::NONE, std::string name = "", + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Constructor of Tensor with dimension/buf * @param d Tensor dim for this tensor * @param buf buffer + * @param qscheme_ Quantization scheme (only applies to Quantized Tensor) * @note Memory for this tensor is instantaneously allocated */ - Tensor(const TensorDim &d, const void *buf = nullptr); + Tensor(const TensorDim &d, const void *buf = nullptr, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE); /** * @brief Constructor of Tensor @@ -83,10 +87,12 @@ class Tensor { * @param[in] d3 Width * @param[in] fm Tensor Format * @param[in] d_type Tensor Data Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d0, size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - Tensor(TensorDim(d0, d1, d2, d3, fm, d_type), nullptr){}; + Tdatatype d_type = Tdatatype::FP32, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(TensorDim(d0, d1, d2, d3, fm, d_type), nullptr, qscheme_){}; /** * @brief Constructor of Tensor @@ -95,10 +101,12 @@ class Tensor { * @param[in] d3 Width * @param[in] fm Tensor Format * @param[in] d_type Tensor Data Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - Tensor(1, d1, d2, d3, fm, d_type){}; + Tdatatype d_type = Tdatatype::FP32, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(1, d1, d2, d3, fm, d_type, qscheme_){}; /** * @brief Constructor of Tensor with batch size one and d1 size one @@ -106,20 +114,24 @@ class Tensor { * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] fm Tensor Format * @param[in] d_type Tensor Data Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d2, size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - Tensor(1, 1, d2, d3, fm, d_type){}; + Tdatatype d_type = Tdatatype::FP32, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(1, 1, d2, d3, fm, d_type, qscheme_){}; /** * @brief Constructor of Tensor with just Width or Channel * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] fm Tensor Format * @param[in] d_type Tensor Data Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ explicit Tensor(size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - Tensor(1, 1, 1, d3, fm, d_type){}; + Tdatatype d_type = Tdatatype::FP32, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(1, 1, 1, d3, fm, d_type, qscheme_){}; /** * @brief Constructor of Tensor @@ -128,10 +140,12 @@ class Tensor { * @param[in] d2 Height (NCHW) or Width (NHWC) * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d0, size_t d1, size_t d2, size_t d3, - ml::train::TensorDim::TensorType t_type) : - Tensor(TensorDim(d0, d1, d2, d3, t_type), nullptr){}; + ml::train::TensorDim::TensorType t_type, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : + Tensor(TensorDim(d0, d1, d2, d3, t_type), nullptr, qscheme_){}; /** * @brief Constructor of Tensor @@ -139,9 +153,11 @@ class Tensor { * @param[in] d2 Height * @param[in] d3 Width * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(size_t d1, size_t d2, size_t d3, - ml::train::TensorDim::TensorType t_type) : + ml::train::TensorDim::TensorType t_type, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : Tensor(1, d1, d2, d3, t_type){}; /** @@ -149,19 +165,23 @@ class Tensor { * @param[in] d2 Height (NCHW) or Width (NHWC) * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ - Tensor(size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type) : + Tensor(size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3, (t_type.format == Tformat::NCHW) ? d2 : 1, - (t_type.format == Tformat::NCHW) ? d3 : d2, t_type){}; + (t_type.format == Tformat::NCHW) ? d3 : d2, t_type, qscheme_){}; /** * @brief Constructor of Tensor with just Width or Channel * @param[in] d3 Width (NCHW) or Channel (NHWC) * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ - explicit Tensor(size_t d3, ml::train::TensorDim::TensorType t_type) : + explicit Tensor(size_t d3, ml::train::TensorDim::TensorType t_type, + QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) : Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3, 1, - (t_type.format == Tformat::NCHW) ? d3 : 1, t_type){}; + (t_type.format == Tformat::NCHW) ? d3 : 1, t_type, qscheme_){}; /** * @brief Constructor of Tensor @@ -312,32 +332,43 @@ class Tensor { Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){}; /** - * @brief Constructor of Tensor + * @brief Constructor of CharTensor (QINT8) * @param[in] d data for the Tensor. It needs to set format properly. + * @param[in] scales scale factors for the Tensor. * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d, - ml::train::TensorDim::TensorType t_type); + std::vector<float> const &scales, + ml::train::TensorDim::TensorType t_type, QScheme qscheme_); /** - * @brief Constructor of Tensor + * @brief Constructor of CharTensor (QINT8) * @note This constructor copies vector again. needs refactoring * @param[in] d data for the Tensor. It needs to set format properly. + * @param[in] scales scale factors for the Tensor. * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(std::vector<std::vector<std::vector<int8_t>>> const &d, - ml::train::TensorDim::TensorType t_type) : - Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){}; + std::vector<float> const &scales, + ml::train::TensorDim::TensorType t_type, QScheme qscheme_) : + Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, t_type, + qscheme_){}; /** - * @brief Constructor of Tensor + * @brief Constructor of CharTensor (QINT8) * @note This constructor copies vector again. needs refactoring * @param[in] d data for the Tensor with batch size one + * @param[in] scales scale factors for the Tensor. * @param[in] t_type Tensor Type + * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor) */ Tensor(std::vector<std::vector<int8_t>> const &d, - ml::train::TensorDim::TensorType t_type) : - Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){}; + std::vector<float> const &scales, + ml::train::TensorDim::TensorType t_type, QScheme qscheme_) : + Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, t_type, + qscheme_){}; /** * @brief Constructor of Tensor by directly assigning TensorBase. @@ -1617,6 +1648,12 @@ class Tensor { */ size_t scale_size() const; + /** + * @brief return Tensor quantization scheme + * @retval Qscheme qscheme + */ + QScheme q_scheme() const; + /** * @brief Merge the given two axis for tensor at second axis inplace * diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h index cc6ad0c2b..53af87de0 100644 --- a/nntrainer/tensor/tensor_base.h +++ b/nntrainer/tensor/tensor_base.h @@ -18,6 +18,7 @@ #include <memory_data.h> #include <nntrainer_error.h> +#include <quantizer.h> #include <tensor_dim.h> #include <util_func.h> @@ -641,6 +642,17 @@ class TensorBase { */ virtual size_t scale_size() const { return 0; } + /** + * @brief return Tensor quantization scheme + * @retval Qscheme qscheme + * @note Override for quantize tensor + */ + virtual QScheme q_scheme() const { + throw std::invalid_argument( + "Tensor::q_scheme() is not supported in tensor data type " + + getStringDataType()); + } + /** * @brief Merge the given two axis for tensor at second axis inplace * diff --git a/test/unittest/unittest_nntrainer_quantizer.cpp b/test/unittest/unittest_nntrainer_quantizer.cpp index ab4a1b6fd..6b6d98d1c 100644 --- a/test/unittest/unittest_nntrainer_quantizer.cpp +++ b/test/unittest/unittest_nntrainer_quantizer.cpp @@ -52,10 +52,16 @@ TEST(nntrainer_Quantizer, per_tensor_affine_03_p) { -0.07760239, -0.28348053, -0.37242615, 0.30941701}; nntrainer::Tensor input({1, 1, 4, 4}, input_data); - int8_t qdata[] = {-47, -28, 87, -1, 123, -42, 39, -22, - -59, -97, 127, -96, -21, -78, -102, 85}; + std::vector<int8_t> qdata = {-47, -28, 87, -1, 123, -42, 39, -22, + -59, -97, 127, -96, -21, -78, -102, 85}; + float qscale = 0.00363567f; + int8_t *scale_array = reinterpret_cast<int8_t *>(&qscale); + for (unsigned int i = 0; i < 4; ++i) { + qdata.push_back(scale_array[i]); + } nntrainer::Tensor quant_answer( - {1, 1, 4, 4, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, qdata); + {1, 1, 4, 4, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, + qdata.data()); float output_data[] = {-0.17087643, -0.10179872, 0.31630316, -0.00363567, 0.44718724, -0.15269808, 0.14179108, -0.07998471, @@ -96,14 +102,20 @@ TEST(nntrainer_Quantizer, per_tensor_affine_04_p) { -0.20489319, 0.33036807, 0.27226517, -0.25207010}; nntrainer::Tensor input({1, 1, 8, 8}, input_data); - int8_t qdata[] = {-109, 9, 16, 14, 66, 16, 56, -58, -29, 127, 61, - -35, -104, 121, -92, -122, 51, 68, -97, 114, 31, -33, - 33, -110, -98, -60, -69, -118, 25, -18, 62, 8, 39, - -107, 60, -33, 91, -99, 61, 85, -58, -86, 98, -41, - -76, 110, 89, -33, 82, 120, -38, 12, 91, 102, 12, - -1, 103, -90, -71, -96, -76, 122, 101, -93}; + std::vector<int8_t> qdata = { + -109, 9, 16, 14, 66, 16, 56, -58, -29, 127, 61, -35, -104, + 121, -92, -122, 51, 68, -97, 114, 31, -33, 33, -110, -98, -60, + -69, -118, 25, -18, 62, 8, 39, -107, 60, -33, 91, -99, 61, + 85, -58, -86, 98, -41, -76, 110, 89, -33, 82, 120, -38, 12, + 91, 102, 12, -1, 103, -90, -71, -96, -76, 122, 101, -93}; + float qscale = 0.00270727f; + int8_t *scale_array = reinterpret_cast<int8_t *>(&qscale); + for (unsigned int i = 0; i < 4; ++i) { + qdata.push_back(scale_array[i]); + } nntrainer::Tensor quant_answer( - {1, 1, 8, 8, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, qdata); + {1, 1, 8, 8, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, + qdata.data()); float output_data[] = { -0.29509223, 0.02436541, 0.04331629, 0.03790175, 0.17867969, diff --git a/test/unittest/unittest_nntrainer_tensor.cpp b/test/unittest/unittest_nntrainer_tensor.cpp index 73cf6bd46..4202d164a 100644 --- a/test/unittest/unittest_nntrainer_tensor.cpp +++ b/test/unittest/unittest_nntrainer_tensor.cpp @@ -200,12 +200,23 @@ TEST(nntrainer_Tensor, Tensor_04_p) { in.push_back(ttv); } + std::vector<float> scales = {1.349f, 3.135f, 6.196f, 2.105f, 6.125f, + 4.106f, 0.916f, 7.014f, 9.814f, 5.556f}; + nntrainer::Tensor tensor = nntrainer::Tensor( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); + in, scales, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, + nntrainer::QScheme::PER_CHANNEL_AFFINE); ASSERT_NE(nullptr, tensor.getData<int8_t>(0)); if (tensor.getValue<int8_t>(0, 0, 0, 1) != 1) status = ML_ERROR_INVALID_PARAMETER; + + float *scale_data = tensor.getScale<float>(); + + for (unsigned int idx = 0; idx < scales.size(); ++idx) { + ASSERT_FLOAT_EQ(scale_data[idx], scales[idx]); + } + EXPECT_EQ(status, ML_ERROR_NONE); } @@ -335,9 +346,11 @@ TEST(nntrainer_Tensor, Tensor_08_n) { in.push_back(ttv); } - EXPECT_THROW(nntrainer::Tensor( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}), - std::out_of_range); + EXPECT_THROW( + nntrainer::Tensor(in, {3.561f}, + {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, + nntrainer::QScheme::PER_TENSOR_AFFINE), + std::out_of_range); } TEST(nntrainer_Tensor, Tensor_09_n) { @@ -3815,7 +3828,7 @@ TEST(nntrainer_Tensor, print_small_size_02) { << " 1 1 \n" << " 1 1 \n" << "\n" - << "-------\n"; + << "-------\nScale factors: 0 \n"; EXPECT_EQ(ss.str(), expected.str()); }