diff --git a/nntrainer/tensor/char_tensor.cpp b/nntrainer/tensor/char_tensor.cpp
index 0d42ccded..29a67e9b9 100644
--- a/nntrainer/tensor/char_tensor.cpp
+++ b/nntrainer/tensor/char_tensor.cpp
@@ -17,18 +17,18 @@
 
 namespace nntrainer {
 
-CharTensor::CharTensor(std::string name_, Tformat fm) :
+CharTensor::CharTensor(std::string name_, Tformat fm, QScheme qscheme_) :
   TensorBase(name_, fm, Tdatatype::QINT8) {}
 
 CharTensor::CharTensor(const TensorDim &d, bool alloc_now, Initializer init,
-                       std::string name) :
-  TensorBase(d, alloc_now, init, name) {
+                       std::string name, QScheme qscheme_) :
+  TensorBase(d, alloc_now, init, name), qscheme(qscheme_) {
   if (alloc_now)
     allocate();
 }
 
-CharTensor::CharTensor(const TensorDim &d, const void *buf) :
-  CharTensor(d, true) {
+CharTensor::CharTensor(const TensorDim &d, const void *buf, QScheme qscheme_) :
+  CharTensor(d, true, Initializer::NONE, "", qscheme_) {
   if (d.getDataLen() != 0) {
     if (buf != nullptr)
       copy(buf);
@@ -37,7 +37,7 @@ CharTensor::CharTensor(const TensorDim &d, const void *buf) :
 
 CharTensor::CharTensor(
   std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d,
-  Tformat fm) {
+  std::vector<float> const &scales, Tformat fm, QScheme qscheme_) {
   if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
     throw std::out_of_range(
       "[Tensor] trying to initialize CharTensor from empty vector");
@@ -59,9 +59,14 @@ CharTensor::CharTensor(
   strides = dim.computeStrides();
   contiguous = true;
   initializer = Initializer::NONE;
+  qscheme = qscheme_;
 
-  MemoryData *mem_data =
-    new MemoryData((void *)(new int8_t[dim.getDataLen()]()));
+  NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument)
+    << "invalid scale factor size " << scales.size();
+
+  /// @note 4 * scale_size() assumes scale factors are in full-precision fp.
+  MemoryData *mem_data = new MemoryData(
+    (void *)(new int8_t[dim.getDataLen() + sizeof(float) * scale_size()]()));
   data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
     delete[] mem_data->getAddr<int8_t>();
   });
@@ -84,9 +89,16 @@ CharTensor::CharTensor(
           for (unsigned int l = 0; l < channel(); ++l)
             this->setValue(i, l, j, k, d[i][j][k][l]);
   }
+
+  // copy scale factors
+  scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1);
 }
 
 bool CharTensor::operator==(const CharTensor &rhs) const {
+  if (qscheme != rhs.qscheme)
+    return false;
+
+  // compare quantized data
   const int8_t *_data = (int8_t *)getData();
   const int8_t *_rdata = (int8_t *)rhs.getData();
   for (size_t i = 0; i < size(); ++i) {
@@ -94,6 +106,14 @@ bool CharTensor::operator==(const CharTensor &rhs) const {
       return false;
   }
 
+  // compare scale factors
+  const float *_scales = (float *)getScale();
+  const float *_rscales = (float *)rhs.getScale();
+  for (size_t i = 0; i < scale_size(); ++i) {
+    if (std::fabs(_scales[i] - _rscales[i]) > 1e-5)
+      return false;
+  }
+
   return true;
 }
 
@@ -109,7 +129,8 @@ void CharTensor::allocate() {
     /// allocate new memory for the tensor data
     MemoryData *mem_data;
 
-    mem_data = new MemoryData((void *)(new int8_t[dim.getDataLen()]{}));
+    mem_data = new MemoryData(
+      (void *)(new int8_t[dim.getDataLen() + 4 * scale_size()]{}));
     data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
       delete[] mem_data->template getAddr<int8_t>();
       delete mem_data;
@@ -141,6 +162,25 @@ void *CharTensor::getData(size_t idx) const {
   return data->getAddr<int8_t>() + offset + idx;
 }
 
+void *CharTensor::getScale() const {
+  if (!data)
+    return nullptr;
+
+  data->validate();
+  return ((int8_t *)getData()) + size();
+}
+
+void *CharTensor::getScale(size_t idx) const {
+  NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
+    << "Tensor::getScale() index is not valid";
+
+  if (!data)
+    return nullptr;
+
+  data->validate();
+  return ((float *)getScale()) + idx;
+}
+
 void *CharTensor::getAddress(unsigned int i) {
   size_t index = getIndex(batch(), channel(), height(), width());
   if (i > index) {
@@ -349,9 +389,39 @@ void CharTensor::print(std::ostream &out) const {
     out.copyfmt(init);
   }
 
-  /// @todo print quantization information
+  /// print quantization information
+  const float *q_scales = (float *)getScale();
+
+  if (scale_size() > 50) {
+    out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' '
+        << q_scales[2] << " ... " << q_scales[len - 3] << ' '
+        << q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl;
+    return;
+  }
+
+  out << "Scale factors: ";
+  for (unsigned i = 0; i < scale_size(); ++i) {
+    out << q_scales[i] << " ";
+  }
+  out << std::endl;
+}
+
+size_t CharTensor::scale_size() const {
+  switch (qscheme) {
+  case QScheme::PER_TENSOR_AFFINE:
+    return 1;
+    break;
+  case QScheme::PER_CHANNEL_AFFINE:
+    return width();
+    break;
+  default:
+    break;
+  }
+  return 0;
 }
 
+QScheme CharTensor::q_scheme() const { return qscheme; }
+
 void CharTensor::copy(const void *buf) {
   NNTR_THROW_IF(!contiguous, std::invalid_argument)
     << getName() << " is not contiguous, cannot copy.";
@@ -360,17 +430,19 @@ void CharTensor::copy(const void *buf) {
     return;
   }
 
-  /// @todo need to optimize
   scopy(size(), (int8_t *)buf, 1, (int8_t *)getData(), 1);
+
+  float *scales = (float *)(((int8_t *)buf) + size());
+  scopy(scale_size(), scales, 1, (float *)getScale(), 1);
 }
 
 void CharTensor::save_quantization_info(std::ostream &file) {
-  checkedWrite(file, (char *)&axis, sizeof(uint8_t),
+  checkedWrite(file, (char *)&qscheme, sizeof(uint8_t),
                "[CharTensor::save] failed to write quantization information");
 }
 
 void CharTensor::read_quantization_info(std::ifstream &file) {
-  checkedRead(file, (char *)&axis, sizeof(uint8_t),
+  checkedRead(file, (char *)&qscheme, sizeof(uint8_t),
               "[CharTensor::read] failed to read quantization information");
 }
 
diff --git a/nntrainer/tensor/char_tensor.h b/nntrainer/tensor/char_tensor.h
index f46bb2298..cfa7b0589 100644
--- a/nntrainer/tensor/char_tensor.h
+++ b/nntrainer/tensor/char_tensor.h
@@ -12,6 +12,7 @@
 #define __CHAR_TENSOR_H__
 #ifdef __cplusplus
 
+#include <quantizer.h>
 #include <tensor_base.h>
 
 namespace nntrainer {
@@ -25,7 +26,8 @@ class CharTensor : public TensorBase {
   /**
    * @brief     Basic Constructor of Tensor
    */
-  CharTensor(std::string name_ = "", Tformat fm = Tformat::NCHW);
+  CharTensor(std::string name_ = "", Tformat fm = Tformat::NCHW,
+             QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);
 
   /**
    * @brief Construct a new CharTensor object
@@ -34,27 +36,33 @@ class CharTensor : public TensorBase {
    * @param alloc_now Allocate memory to this tensor or not
    * @param init Initializer for the tensor
    * @param name Name of the tensor
+   * @param qscheme_ Quantization scheme of the tensor
    */
   CharTensor(const TensorDim &d, bool alloc_now,
-             Initializer init = Initializer::NONE, std::string name = "");
+             Initializer init = Initializer::NONE, std::string name = "",
+             QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);
 
   /**
    * @brief Construct a new CharTensor object
    *
    * @param d Tensor dim for this tensor
    * @param buf buffer
+   * @param qscheme_ quantization scheme of the tensor
    */
-  CharTensor(const TensorDim &d, const void *buf = nullptr);
+  CharTensor(const TensorDim &d, const void *buf = nullptr,
+             QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);
 
   /**
    * @brief Construct a new CharTensor object
    *
    * @param d data for the Tensor
+   * @param scales scale factors for the Tensor
    * @param fm format for the Tensor
+   * @param qscheme_ quantization scheme of the tensor
    */
   CharTensor(
     std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d,
-    Tformat fm);
+    std::vector<float> const &scales, Tformat fm, QScheme qscheme_);
 
   /**
    * @brief Construct a new CharTensor object
@@ -101,6 +109,16 @@ class CharTensor : public TensorBase {
    */
   void *getData(size_t idx) const override;
 
+  /**
+   * @copydoc Tensor::getScale()
+   */
+  void *getScale() const override;
+
+  /**
+   * @copydoc Tensor::getScale(size_t idx)
+   */
+  void *getScale(size_t idx) const override;
+
   /**
    * @brief     i data index
    * @retval    address of ith data
@@ -227,11 +245,21 @@ class CharTensor : public TensorBase {
    */
   void read_quantization_info(std::ifstream &file) override;
 
+  /**
+   * @copydoc Tensor::scale_size()
+   */
+  size_t scale_size() const override;
+
+  /**
+   * @copydoc Tensor::scale_size()
+   */
+  QScheme q_scheme() const;
+
 private:
   /**
-   * @brief quantization axis
+   * @brief quantization scheme
    */
-  uint8_t axis;
+  QScheme qscheme;
 
   /**
    * @brief copy a buffer to @a this, the caller has to ensure that @a this is
diff --git a/nntrainer/tensor/quantizer.cpp b/nntrainer/tensor/quantizer.cpp
index 08ea039fc..b2beb6602 100644
--- a/nntrainer/tensor/quantizer.cpp
+++ b/nntrainer/tensor/quantizer.cpp
@@ -10,6 +10,7 @@
 
 #include <math.h>
 #include <quantizer.h>
+#include <tensor.h>
 
 namespace nntrainer {
 
@@ -64,6 +65,8 @@ Tensor PerTensorAffineQuantizer::quantize(const Tensor &input,
     }
   }
 
+  *output.getScale<float>() = scale;
+
   return output;
 }
 
diff --git a/nntrainer/tensor/quantizer.h b/nntrainer/tensor/quantizer.h
index 8ec075c84..f8cd3bcde 100644
--- a/nntrainer/tensor/quantizer.h
+++ b/nntrainer/tensor/quantizer.h
@@ -12,11 +12,16 @@
 #define __QUANTIZER_H__
 #ifdef __cplusplus
 
-#include <tensor.h>
+#include <memory>
+#include <stdexcept>
 #include <unordered_map>
 
+#include <tensor_dim.h>
+
 namespace nntrainer {
 
+class Tensor;
+
 /**
  * @brief defines the quantization scheme
  * @details NNTrainer provides basic quantization schemes (e.g., Per tensor
@@ -73,7 +78,8 @@ class Quantizer {
    * @param input Input tensor
    * @param qtype quantized data type
    */
-  virtual void calculateQParams(const Tensor &input, Tdatatype qtype) = 0;
+  virtual void calculateQParams(const Tensor &input,
+                                ml::train::TensorDim::DataType qtype) = 0;
 
 public:
   /**
@@ -112,14 +118,16 @@ class Quantizer {
    * @param[in] input Floating point tensor to quantize
    * @return Tensor quantized tensor
    */
-  virtual Tensor quantize(const Tensor &input, Tdatatype qtype) = 0;
+  virtual Tensor quantize(const Tensor &input,
+                          ml::train::TensorDim::DataType qtype) = 0;
 
   /**
    * @brief Dequantize a quantized tensor into a tensor.
    * @param[in] input Quantized tensor to dequantize
    * @return Tensor dequantized tensor
    */
-  virtual Tensor dequantize(const Tensor &input, Tdatatype qtype) = 0;
+  virtual Tensor dequantize(const Tensor &input,
+                            ml::train::TensorDim::DataType qtype) = 0;
 
   /**
    * @brief Get quantization Scheme type.
@@ -172,12 +180,14 @@ class PerTensorAffineQuantizer : public UniformQuantizer {
   /**
    * @copydoc Quantizer::quantize(const Tensor &input)
    */
-  Tensor quantize(const Tensor &input, Tdatatype qtype) override;
+  Tensor quantize(const Tensor &input,
+                  ml::train::TensorDim::DataType qtype) override;
 
   /**
    * @copydoc Quantizer::dequantize(const Tensor &input)
    */
-  Tensor dequantize(const Tensor &input, Tdatatype dtype) override;
+  Tensor dequantize(const Tensor &input,
+                    ml::train::TensorDim::DataType dtype) override;
 
   /**
    * @copydoc Quantizer::qscheme()
@@ -191,9 +201,11 @@ class PerTensorAffineQuantizer : public UniformQuantizer {
   long int quant_max;
 
   /**
-   * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype)
+   * @copydoc Quantizer::calculateQParams(const Tensor &input,
+   * ml::train::TensorDim::DataType qtype)
    */
-  void calculateQParams(const Tensor &input, Tdatatype qtype) override;
+  void calculateQParams(const Tensor &input,
+                        ml::train::TensorDim::DataType qtype) override;
 };
 
 /**
@@ -220,12 +232,14 @@ class PerChannelAffineQuantizer : public UniformQuantizer {
   /**
    * @copydoc Quantizer::quantize(const Tensor &input)
    */
-  Tensor quantize(const Tensor &input, Tdatatype qtype) override;
+  Tensor quantize(const Tensor &input,
+                  ml::train::TensorDim::DataType qtype) override;
 
   /**
    * @copydoc Quantizer::dequantize(const Tensor &input)
    */
-  Tensor dequantize(const Tensor &input, Tdatatype dtype) override;
+  Tensor dequantize(const Tensor &input,
+                    ml::train::TensorDim::DataType dtype) override;
 
   /**
    * @copydoc Quantizer::qscheme()
@@ -239,9 +253,11 @@ class PerChannelAffineQuantizer : public UniformQuantizer {
   long int quant_max;
 
   /**
-   * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype)
+   * @copydoc Quantizer::calculateQParams(const Tensor &input,
+   * ml::train::TensorDim::DataType qtype)
    */
-  void calculateQParams(const Tensor &input, Tdatatype qtype) override {}
+  void calculateQParams(const Tensor &input,
+                        ml::train::TensorDim::DataType qtype) override {}
 };
 
 /**
@@ -265,12 +281,14 @@ class BinaryCodeBasedQuantizer : public NonUniformQuantizer {
   /**
    * @copydoc Quantizer::quantize(const Tensor &input)
    */
-  Tensor quantize(const Tensor &input, Tdatatype qtype) override;
+  Tensor quantize(const Tensor &input,
+                  ml::train::TensorDim::DataType qtype) override;
 
   /**
    * @copydoc Quantizer::dequantize(const Tensor &input)
    */
-  Tensor dequantize(const Tensor &input, Tdatatype dtype) override;
+  Tensor dequantize(const Tensor &input,
+                    ml::train::TensorDim::DataType dtype) override;
 
   /**
    * @copydoc Quantizer::qscheme()
@@ -279,9 +297,11 @@ class BinaryCodeBasedQuantizer : public NonUniformQuantizer {
 
 private:
   /**
-   * @copydoc Quantizer::calculateQParams(const Tensor &input, Tdatatype qtype)
+   * @copydoc Quantizer::calculateQParams(const Tensor &input,
+   * ml::train::TensorDim::DataType qtype)
    */
-  void calculateQParams(const Tensor &input, Tdatatype qtype) override {}
+  void calculateQParams(const Tensor &input,
+                        ml::train::TensorDim::DataType qtype) override {}
 };
 
 /**
diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp
index b0cbae110..af39cb641 100644
--- a/nntrainer/tensor/tensor.cpp
+++ b/nntrainer/tensor/tensor.cpp
@@ -27,9 +27,11 @@ namespace nntrainer {
 
 Tensor::Tensor(
   std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d,
-  ml::train::TensorDim::TensorType t_type) {
-  itensor = std::shared_ptr<CharTensor>(new CharTensor(d, t_type.format),
-                                        std::default_delete<CharTensor>());
+  std::vector<float> const &scales, ml::train::TensorDim::TensorType t_type,
+  QScheme qscheme_) {
+  itensor = std::shared_ptr<CharTensor>(
+    new CharTensor(d, scales, t_type.format, qscheme_),
+    std::default_delete<CharTensor>());
 }
 
 Tensor::Tensor(
@@ -102,7 +104,7 @@ Tensor::Tensor(std::string name_, Tformat fm, Tdatatype d_type) {
 }
 
 Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init,
-               std::string name) {
+               std::string name, QScheme qscheme) {
   itensor = nullptr;
 
   if (d.getDataType() == Tdatatype::FP32) {
@@ -130,9 +132,9 @@ Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init,
       std::shared_ptr<UInt32Tensor>(new UInt32Tensor(d, alloc_now, init, name),
                                     std::default_delete<UInt32Tensor>());
   } else if (d.getDataType() == Tdatatype::QINT8) {
-    itensor =
-      std::shared_ptr<CharTensor>(new CharTensor(d, alloc_now, init, name),
-                                  std::default_delete<CharTensor>());
+    itensor = std::shared_ptr<CharTensor>(
+      new CharTensor(d, alloc_now, init, name, qscheme),
+      std::default_delete<CharTensor>());
   } else if (d.getDataType() == Tdatatype::BCQ) {
 #ifdef ENABLE_BIQGEMM
     itensor =
@@ -150,7 +152,7 @@ Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init,
   }
 }
 
-Tensor::Tensor(const TensorDim &d, const void *buf) {
+Tensor::Tensor(const TensorDim &d, const void *buf, QScheme qscheme) {
   itensor = nullptr;
 
   if (d.getDataType() == Tdatatype::FP32) {
@@ -173,7 +175,7 @@ Tensor::Tensor(const TensorDim &d, const void *buf) {
     itensor = std::shared_ptr<UInt32Tensor>(
       new UInt32Tensor(d, buf), std::default_delete<UInt32Tensor>());
   } else if (d.getDataType() == Tdatatype::QINT8) {
-    itensor = std::shared_ptr<CharTensor>(new CharTensor(d, buf),
+    itensor = std::shared_ptr<CharTensor>(new CharTensor(d, buf, qscheme),
                                           std::default_delete<CharTensor>());
   } else if (d.getDataType() == Tdatatype::BCQ) {
 #ifdef ENABLE_BIQGEMM
@@ -1038,6 +1040,7 @@ void Tensor::copy(const Tensor &from) {
   }
 
   if (from.size() != 0 && size() == from.size() &&
+      scale_size() == from.scale_size() &&
       getDataType() == from.getDataType()) {
     // if tensor size and data type match, copy data
     itensor->copy(from);
@@ -1253,6 +1256,8 @@ size_t Tensor::width() const { return itensor->width(); }
 
 size_t Tensor::scale_size() const { return itensor->scale_size(); }
 
+QScheme Tensor::q_scheme() const { return itensor->q_scheme(); }
+
 void Tensor::mergeAxis(unsigned int axis1, unsigned int axis2) {
   NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
     << getName() << " is not contiguous, cannot merge axis";
diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h
index 0e79ff10e..3b025ea61 100644
--- a/nntrainer/tensor/tensor.h
+++ b/nntrainer/tensor/tensor.h
@@ -63,17 +63,21 @@ class Tensor {
    * @param alloc_now If the memory of the tensor must be allocated
    * @param init Initializer for the tensor
    * @param name Name of the tensor
+   * @param qscheme_ Quantization scheme (only applies to Quantized Tensor)
    */
   Tensor(const TensorDim &d, bool alloc_now,
-         Initializer init = Initializer::NONE, std::string name = "");
+         Initializer init = Initializer::NONE, std::string name = "",
+         QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);
 
   /**
    * @brief     Constructor of Tensor with dimension/buf
    * @param d Tensor dim for this tensor
    * @param buf buffer
+   * @param qscheme_ Quantization scheme (only applies to Quantized Tensor)
    * @note Memory for this tensor is instantaneously allocated
    */
-  Tensor(const TensorDim &d, const void *buf = nullptr);
+  Tensor(const TensorDim &d, const void *buf = nullptr,
+         QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);
 
   /**
    * @brief     Constructor of Tensor
@@ -83,10 +87,12 @@ class Tensor {
    * @param[in] d3 Width
    * @param[in] fm Tensor Format
    * @param[in] d_type Tensor Data Type
+   * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor)
    */
   Tensor(size_t d0, size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW,
-         Tdatatype d_type = Tdatatype::FP32) :
-    Tensor(TensorDim(d0, d1, d2, d3, fm, d_type), nullptr){};
+         Tdatatype d_type = Tdatatype::FP32,
+         QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) :
+    Tensor(TensorDim(d0, d1, d2, d3, fm, d_type), nullptr, qscheme_){};
 
   /**
    * @brief     Constructor of Tensor
@@ -95,10 +101,12 @@ class Tensor {
    * @param[in] d3 Width
    * @param[in] fm Tensor Format
    * @param[in] d_type Tensor Data Type
+   * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor)
    */
   Tensor(size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW,
-         Tdatatype d_type = Tdatatype::FP32) :
-    Tensor(1, d1, d2, d3, fm, d_type){};
+         Tdatatype d_type = Tdatatype::FP32,
+         QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) :
+    Tensor(1, d1, d2, d3, fm, d_type, qscheme_){};
 
   /**
    * @brief     Constructor of Tensor with batch size one and d1 size one
@@ -106,20 +114,24 @@ class Tensor {
    * @param[in] d3 Width (NCHW) or Channel (NHWC)
    * @param[in] fm Tensor Format
    * @param[in] d_type Tensor Data Type
+   * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor)
    */
   Tensor(size_t d2, size_t d3, Tformat fm = Tformat::NCHW,
-         Tdatatype d_type = Tdatatype::FP32) :
-    Tensor(1, 1, d2, d3, fm, d_type){};
+         Tdatatype d_type = Tdatatype::FP32,
+         QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) :
+    Tensor(1, 1, d2, d3, fm, d_type, qscheme_){};
 
   /**
    * @brief     Constructor of Tensor with just Width or Channel
    * @param[in] d3 Width (NCHW) or Channel (NHWC)
    * @param[in] fm Tensor Format
    * @param[in] d_type Tensor Data Type
+   * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor)
    */
   explicit Tensor(size_t d3, Tformat fm = Tformat::NCHW,
-                  Tdatatype d_type = Tdatatype::FP32) :
-    Tensor(1, 1, 1, d3, fm, d_type){};
+                  Tdatatype d_type = Tdatatype::FP32,
+                  QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) :
+    Tensor(1, 1, 1, d3, fm, d_type, qscheme_){};
 
   /**
    * @brief     Constructor of Tensor
@@ -128,10 +140,12 @@ class Tensor {
    * @param[in] d2 Height (NCHW) or Width (NHWC)
    * @param[in] d3 Width (NCHW) or Channel (NHWC)
    * @param[in] t_type Tensor Type
+   * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor)
    */
   Tensor(size_t d0, size_t d1, size_t d2, size_t d3,
-         ml::train::TensorDim::TensorType t_type) :
-    Tensor(TensorDim(d0, d1, d2, d3, t_type), nullptr){};
+         ml::train::TensorDim::TensorType t_type,
+         QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) :
+    Tensor(TensorDim(d0, d1, d2, d3, t_type), nullptr, qscheme_){};
 
   /**
    * @brief     Constructor of Tensor
@@ -139,9 +153,11 @@ class Tensor {
    * @param[in] d2 Height
    * @param[in] d3 Width
    * @param[in] t_type Tensor Type
+   * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor)
    */
   Tensor(size_t d1, size_t d2, size_t d3,
-         ml::train::TensorDim::TensorType t_type) :
+         ml::train::TensorDim::TensorType t_type,
+         QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) :
     Tensor(1, d1, d2, d3, t_type){};
 
   /**
@@ -149,19 +165,23 @@ class Tensor {
    * @param[in] d2 Height (NCHW) or Width (NHWC)
    * @param[in] d3 Width (NCHW) or Channel (NHWC)
    * @param[in] t_type Tensor Type
+   * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor)
    */
-  Tensor(size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type) :
+  Tensor(size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type,
+         QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) :
     Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3,
            (t_type.format == Tformat::NCHW) ? d2 : 1,
-           (t_type.format == Tformat::NCHW) ? d3 : d2, t_type){};
+           (t_type.format == Tformat::NCHW) ? d3 : d2, t_type, qscheme_){};
   /**
    * @brief     Constructor of Tensor with just Width or Channel
    * @param[in] d3 Width (NCHW) or Channel (NHWC)
    * @param[in] t_type Tensor Type
+   * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor)
    */
-  explicit Tensor(size_t d3, ml::train::TensorDim::TensorType t_type) :
+  explicit Tensor(size_t d3, ml::train::TensorDim::TensorType t_type,
+                  QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE) :
     Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3, 1,
-           (t_type.format == Tformat::NCHW) ? d3 : 1, t_type){};
+           (t_type.format == Tformat::NCHW) ? d3 : 1, t_type, qscheme_){};
 
   /**
    * @brief     Constructor of Tensor
@@ -312,32 +332,43 @@ class Tensor {
     Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
 
   /**
-   * @brief     Constructor of Tensor
+   * @brief     Constructor of CharTensor (QINT8)
    * @param[in] d data for the Tensor. It needs to set format properly.
+   * @param[in] scales scale factors for the Tensor.
    * @param[in] t_type Tensor Type
+   * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor)
    */
   Tensor(std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d,
-         ml::train::TensorDim::TensorType t_type);
+         std::vector<float> const &scales,
+         ml::train::TensorDim::TensorType t_type, QScheme qscheme_);
 
   /**
-   * @brief     Constructor of Tensor
+   * @brief     Constructor of CharTensor (QINT8)
    * @note      This constructor copies vector again. needs refactoring
    * @param[in] d data for the Tensor. It needs to set format properly.
+   * @param[in] scales scale factors for the Tensor.
    * @param[in] t_type Tensor Type
+   * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor)
    */
   Tensor(std::vector<std::vector<std::vector<int8_t>>> const &d,
-         ml::train::TensorDim::TensorType t_type) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+         std::vector<float> const &scales,
+         ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, t_type,
+           qscheme_){};
 
   /**
-   * @brief     Constructor of Tensor
+   * @brief     Constructor of CharTensor (QINT8)
    * @note      This constructor copies vector again. needs refactoring
    * @param[in] d data for the Tensor with batch size one
+   * @param[in] scales scale factors for the Tensor.
    * @param[in] t_type Tensor Type
+   * @param[in] qscheme_ Quantization scheme (only applies to Quantized Tensor)
    */
   Tensor(std::vector<std::vector<int8_t>> const &d,
-         ml::train::TensorDim::TensorType t_type) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+         std::vector<float> const &scales,
+         ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, t_type,
+           qscheme_){};
 
   /**
    *  @brief  Constructor of Tensor by directly assigning TensorBase.
@@ -1617,6 +1648,12 @@ class Tensor {
    */
   size_t scale_size() const;
 
+  /**
+   * @brief     return Tensor quantization scheme
+   * @retval    Qscheme qscheme
+   */
+  QScheme q_scheme() const;
+
   /**
    * @brief Merge the given two axis for tensor at second axis inplace
    *
diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h
index cc6ad0c2b..53af87de0 100644
--- a/nntrainer/tensor/tensor_base.h
+++ b/nntrainer/tensor/tensor_base.h
@@ -18,6 +18,7 @@
 
 #include <memory_data.h>
 #include <nntrainer_error.h>
+#include <quantizer.h>
 #include <tensor_dim.h>
 #include <util_func.h>
 
@@ -641,6 +642,17 @@ class TensorBase {
    */
   virtual size_t scale_size() const { return 0; }
 
+  /**
+   * @brief     return Tensor quantization scheme
+   * @retval    Qscheme qscheme
+   * @note      Override for quantize tensor
+   */
+  virtual QScheme q_scheme() const {
+    throw std::invalid_argument(
+      "Tensor::q_scheme() is not supported in tensor data type " +
+      getStringDataType());
+  }
+
   /**
    * @brief Merge the given two axis for tensor at second axis inplace
    *
diff --git a/test/unittest/unittest_nntrainer_quantizer.cpp b/test/unittest/unittest_nntrainer_quantizer.cpp
index ab4a1b6fd..6b6d98d1c 100644
--- a/test/unittest/unittest_nntrainer_quantizer.cpp
+++ b/test/unittest/unittest_nntrainer_quantizer.cpp
@@ -52,10 +52,16 @@ TEST(nntrainer_Quantizer, per_tensor_affine_03_p) {
                         -0.07760239, -0.28348053, -0.37242615, 0.30941701};
   nntrainer::Tensor input({1, 1, 4, 4}, input_data);
 
-  int8_t qdata[] = {-47, -28, 87,  -1,  123, -42, 39,   -22,
-                    -59, -97, 127, -96, -21, -78, -102, 85};
+  std::vector<int8_t> qdata = {-47, -28, 87,  -1,  123, -42, 39,   -22,
+                               -59, -97, 127, -96, -21, -78, -102, 85};
+  float qscale = 0.00363567f;
+  int8_t *scale_array = reinterpret_cast<int8_t *>(&qscale);
+  for (unsigned int i = 0; i < 4; ++i) {
+    qdata.push_back(scale_array[i]);
+  }
   nntrainer::Tensor quant_answer(
-    {1, 1, 4, 4, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, qdata);
+    {1, 1, 4, 4, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8},
+    qdata.data());
 
   float output_data[] = {-0.17087643, -0.10179872, 0.31630316,  -0.00363567,
                          0.44718724,  -0.15269808, 0.14179108,  -0.07998471,
@@ -96,14 +102,20 @@ TEST(nntrainer_Quantizer, per_tensor_affine_04_p) {
     -0.20489319, 0.33036807,  0.27226517,  -0.25207010};
   nntrainer::Tensor input({1, 1, 8, 8}, input_data);
 
-  int8_t qdata[] = {-109, 9,    16,  14,  66,   16,   56,  -58, -29, 127, 61,
-                    -35,  -104, 121, -92, -122, 51,   68,  -97, 114, 31,  -33,
-                    33,   -110, -98, -60, -69,  -118, 25,  -18, 62,  8,   39,
-                    -107, 60,   -33, 91,  -99,  61,   85,  -58, -86, 98,  -41,
-                    -76,  110,  89,  -33, 82,   120,  -38, 12,  91,  102, 12,
-                    -1,   103,  -90, -71, -96,  -76,  122, 101, -93};
+  std::vector<int8_t> qdata = {
+    -109, 9,    16,   14,  66,  16,  56,  -58,  -29, 127, 61,   -35, -104,
+    121,  -92,  -122, 51,  68,  -97, 114, 31,   -33, 33,  -110, -98, -60,
+    -69,  -118, 25,   -18, 62,  8,   39,  -107, 60,  -33, 91,   -99, 61,
+    85,   -58,  -86,  98,  -41, -76, 110, 89,   -33, 82,  120,  -38, 12,
+    91,   102,  12,   -1,  103, -90, -71, -96,  -76, 122, 101,  -93};
+  float qscale = 0.00270727f;
+  int8_t *scale_array = reinterpret_cast<int8_t *>(&qscale);
+  for (unsigned int i = 0; i < 4; ++i) {
+    qdata.push_back(scale_array[i]);
+  }
   nntrainer::Tensor quant_answer(
-    {1, 1, 8, 8, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}, qdata);
+    {1, 1, 8, 8, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8},
+    qdata.data());
 
   float output_data[] = {
     -0.29509223, 0.02436541,  0.04331629,  0.03790175,  0.17867969,
diff --git a/test/unittest/unittest_nntrainer_tensor.cpp b/test/unittest/unittest_nntrainer_tensor.cpp
index 73cf6bd46..4202d164a 100644
--- a/test/unittest/unittest_nntrainer_tensor.cpp
+++ b/test/unittest/unittest_nntrainer_tensor.cpp
@@ -200,12 +200,23 @@ TEST(nntrainer_Tensor, Tensor_04_p) {
     in.push_back(ttv);
   }
 
+  std::vector<float> scales = {1.349f, 3.135f, 6.196f, 2.105f, 6.125f,
+                               4.106f, 0.916f, 7.014f, 9.814f, 5.556f};
+
   nntrainer::Tensor tensor = nntrainer::Tensor(
-    in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8});
+    in, scales, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8},
+    nntrainer::QScheme::PER_CHANNEL_AFFINE);
   ASSERT_NE(nullptr, tensor.getData<int8_t>(0));
 
   if (tensor.getValue<int8_t>(0, 0, 0, 1) != 1)
     status = ML_ERROR_INVALID_PARAMETER;
+
+  float *scale_data = tensor.getScale<float>();
+
+  for (unsigned int idx = 0; idx < scales.size(); ++idx) {
+    ASSERT_FLOAT_EQ(scale_data[idx], scales[idx]);
+  }
+
   EXPECT_EQ(status, ML_ERROR_NONE);
 }
 
@@ -335,9 +346,11 @@ TEST(nntrainer_Tensor, Tensor_08_n) {
     in.push_back(ttv);
   }
 
-  EXPECT_THROW(nntrainer::Tensor(
-                 in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}),
-               std::out_of_range);
+  EXPECT_THROW(
+    nntrainer::Tensor(in, {3.561f},
+                      {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8},
+                      nntrainer::QScheme::PER_TENSOR_AFFINE),
+    std::out_of_range);
 }
 
 TEST(nntrainer_Tensor, Tensor_09_n) {
@@ -3815,7 +3828,7 @@ TEST(nntrainer_Tensor, print_small_size_02) {
            << "         1          1 \n"
            << "         1          1 \n"
            << "\n"
-           << "-------\n";
+           << "-------\nScale factors: 0 \n";
 
   EXPECT_EQ(ss.str(), expected.str());
 }