Skip to content

Commit

Permalink
[onert] Support dconv2d hybrid (Samsung#11479)
Browse files Browse the repository at this point in the history
It supports dconv2d hybrid - weights: i8 symm, in/out: float.

ONE-DCO-1.0-Signed-off-by: Sanggyu Lee <[email protected]>
  • Loading branch information
glistening authored Sep 5, 2023
1 parent a836fa5 commit 82de9c2
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 2 deletions.
1 change: 1 addition & 0 deletions compute/cker/include/cker/operation/DepthwiseConv.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "cker/operation/optimized/DepthwiseConvUint8.h"
#include "cker/operation/optimized/integer_ops/DepthwiseConvInt8.h"
#include "cker/operation/reference/integer_ops/DepthwiseConvUInt8.h"
#include "cker/operation/reference/integer_ops/DepthwiseConvHybrid.h"
#include "cker/CpuBackendThreadpool.h"

namespace nnfw
Expand Down
86 changes: 84 additions & 2 deletions runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "DepthwiseConvolutionLayer.h"

#include "cker/PortableTensorUtils.h"
#include <cker/operation/DepthwiseConv.h>

namespace onert
Expand Down Expand Up @@ -147,6 +148,50 @@ void DepthwiseConvolutionLayer::convQ8i()
_external_context->ruy_context());
}

void DepthwiseConvolutionLayer::convQ8iHybridPerChannel()
{
if (!_prepared)
{
prepareQ8iHybridPerChannel();
_prepared = true;
}

float output_activation_min = 0, output_activation_max = 0;
CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);

auto input_shape = getShape(_input);
const int batch_size = input_shape.Dims(0);
const int input_size = input_shape.FlatSize() / batch_size;

auto scaling_factors_ptr = _input_scaling_factors.data();
auto input_offsets_ptr = _input_offsets.data();

for (int b = 0; b < batch_size; ++b)
{
const int offset = b * input_size;
nnfw::cker::PortableAsymmetricQuantizeFloats(getBuffer<float>(_input) + offset, input_size,
_input_quantized.data() + offset,
&scaling_factors_ptr[b], &input_offsets_ptr[b]);
}

nnfw::cker::DepthwiseConvParams op_params;
op_params.padding_values.width = _paddingLeft;
op_params.padding_values.height = _paddingTop;
op_params.depth_multiplier = _multiplier;
op_params.stride_width = _strideWidth;
op_params.stride_height = _strideHeight;
op_params.dilation_width_factor = _dilationWidth;
op_params.dilation_height_factor = _dilationHeight;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;

nnfw::cker::reference_integer_ops::DepthwiseConvHybridPerChannel(
op_params, _input_scaling_factors.data(), getShape(_input), _input_quantized.data(),
getShape(_kernel), getBuffer<int8_t>(_kernel), getShape(_bias), getBuffer<float>(_bias),
getShape(_output), getBuffer<float>(_output), _kernel->data_scales().data(),
_input_offsets.data());
}

void DepthwiseConvolutionLayer::prepareQ8i()
{
GetQuantizedConvolutionMultipliersAndShifts(
Expand All @@ -163,6 +208,31 @@ void DepthwiseConvolutionLayer::prepareQ8uPerChannel()
_per_channel_output_shift);
}

void DepthwiseConvolutionLayer::prepareQ8iHybridPerChannel()
{
// allocate memory for activation quantization.
// - quantized values (int8_t type and same shape of original input)
// - quantization params (= scale/zeropoint for each input)
auto input_shape = getShape(_input);
const int batch_size = input_shape.Dims(0);
const int input_size = input_shape.FlatSize() / batch_size;
_input_quantized.resize(input_size);
// TODO: Optimize the case of batch_size = 1
_input_scaling_factors.resize(batch_size);
_input_offsets.resize(batch_size);
}

void DepthwiseConvolutionLayer::ensureQ8iHybridPerChannel()
{
// ensure weight is per-channel quantized.
int32_t kernel_input_channel = getShape(_kernel).Dims(3);
// zero_points comes from flatbuffer vector. Its size is within uint32_t range.
size_t kernel_zerop_cnt = _kernel->data_scales().size();
// promote to int64_t to compare int32_t and uint32_t
if ((int64_t)kernel_input_channel != (int64_t)kernel_zerop_cnt)
throw std::runtime_error{"DConv2D hybrid supports only per-channel quantized weight."};
}

void DepthwiseConvolutionLayer::configure(
const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
Expand All @@ -186,8 +256,16 @@ void DepthwiseConvolutionLayer::configure(
_activation = activation;
_output = output;
_external_context = external_context;
_is_hybrid = _input->data_type() == OperandType::FLOAT32 &&
_kernel->data_type() == OperandType::QUANT_INT8_SYMM;

if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
if (_is_hybrid)
{
ensureQ8iHybridPerChannel();
prepareQ8iHybridPerChannel();
_prepared = true;
}
else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
{
if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
{
Expand All @@ -209,7 +287,11 @@ void DepthwiseConvolutionLayer::configure(

void DepthwiseConvolutionLayer::run()
{
if (_input->data_type() == OperandType::FLOAT32)
if (_is_hybrid)
{
convQ8iHybridPerChannel();
}
else if (_input->data_type() == OperandType::FLOAT32)
{
convFloat32();
}
Expand Down
9 changes: 9 additions & 0 deletions runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class DepthwiseConvolutionLayer : public ::onert::exec::IFunction
void convQ8uPerChannel();

void convQ8i();
void convQ8iHybridPerChannel();

void configure(const IPortableTensor *input, const IPortableTensor *kernel,
const IPortableTensor *bias, const uint32_t paddingLeft,
Expand All @@ -58,6 +59,8 @@ class DepthwiseConvolutionLayer : public ::onert::exec::IFunction
private:
void prepareQ8i();
void prepareQ8uPerChannel();
void prepareQ8iHybridPerChannel();
void ensureQ8iHybridPerChannel();

private:
const IPortableTensor *_input{nullptr};
Expand Down Expand Up @@ -87,6 +90,12 @@ class DepthwiseConvolutionLayer : public ::onert::exec::IFunction
// Per channel output multiplier and shift.
std::vector<int32_t> _per_channel_output_multiplier;
std::vector<int> _per_channel_output_shift;

// For hybrid
bool _is_hybrid;
std::vector<int8_t> _input_quantized;
std::vector<float> _input_scaling_factors;
std::vector<int32_t> _input_offsets;
};

} // namespace ops
Expand Down
65 changes: 65 additions & 0 deletions tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,45 @@ TEST_F(GenModelTest, OneOp_DepthwiseConv2D_U8_PerChannel)
SUCCEED();
}

TEST_F(GenModelTest, OneOp_DepthwiseConv2D_I8_Hybrid_PerChannel)
{
CircleGen cgen;
// weight
// clang-format off
std::vector<int8_t> weight_data{1, 2, 1, 2, -9, 10, -9, 10,
5, 6, 5, 6, 13, -14, 13, -14};
// clang-format on
uint32_t weight_buf = cgen.addBuffer(weight_data);
std::vector<float> weight_scales = {1, 1, 1, 1};
std::vector<int64_t> weight_zeropoints = {0, 0, 0, 0};
int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_INT8, weight_buf},
weight_scales, weight_zeropoints);
// bias
std::vector<float> bias_data{0, 1, 2, 3};
uint32_t bias_buf = cgen.addBuffer(bias_data);
int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});

// in and out
int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});

cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});

_context = std::make_unique<GenModelTestContext>(cgen.finish());
// clang-format off
_context->addTestCase(uniformTCD<float>({{0, 1, 2, 3,
0, 1, 2, 3,
0, 1, 2, 3}},
{{8, -7, 20, -1,
8, -7, 20, -1}}));
// clang-format on
_context->setBackends({"cpu"});

SUCCEED();
}

TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Stride)
{
CircleGen cgen;
Expand Down Expand Up @@ -500,3 +539,29 @@ TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_I8_NonZero_ZeroPoints)

SUCCEED();
}

TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_I8_Hybrid_PerTensor)
{
// PerTensor Quantized Weight is not supported
CircleGen cgen;
std::vector<int8_t> weight_data{1, 2, 3};
uint32_t weight_buf = cgen.addBuffer(weight_data);
std::vector<float> bias_data{0, 2, 4};
uint32_t bias_buf = cgen.addBuffer(bias_data);
int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
// Hybrid does not support per-tensor.
std::vector<float> weight_scales = {0.5};
std::vector<int64_t> weight_zeropoints = {0};
int weight = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
weight_scales, weight_zeropoints);
int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32, bias_buf});
int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
/* depth_multiplier */ 1, circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});

_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->expectFailCompile();
_context->setBackends({"cpu"});
SUCCEED();
}

0 comments on commit 82de9c2

Please sign in to comment.