Skip to content

Commit

Permalink
[onert-micro] Add cmsis-nn Pooling kernels (#11570)
Browse files Browse the repository at this point in the history
This commit adds cmsis-nn Pooling kernels: AvgPool2D and MaxPool2D.

ONE-DCO-1.0-Signed-off-by: Artem Balyshev <[email protected]>

Co-authored-by: Artem Balyshev <[email protected]>
  • Loading branch information
BalyshevArtem and Artem Balyshev authored Sep 26, 2023
1 parent 53195e7 commit c3fb7e8
Show file tree
Hide file tree
Showing 8 changed files with 256 additions and 77 deletions.
2 changes: 2 additions & 0 deletions onert-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
REGISTER_KERNEL(ABS, Abs)
REGISTER_KERNEL(ADD, Add)
REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)
REGISTER_KERNEL(ARG_MAX, ArgMax)
REGISTER_KERNEL(ARG_MIN, ArgMin)
REGISTER_KERNEL(DIV, Div)
Expand Down Expand Up @@ -29,6 +30,7 @@ REGISTER_KERNEL(LOGICAL_AND, LogicalAnd)
REGISTER_KERNEL(LOGICAL_OR, LogicalOr)
REGISTER_KERNEL(LEAKY_RELU, LeakyRelu)
REGISTER_KERNEL(MUL, Mul)
REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)
REGISTER_KERNEL(CONCATENATION, Concatenation)
REGISTER_KERNEL(SHAPE, Shape)
REGISTER_KERNEL(NOT_EQUAL, NotEqual)
Expand Down
93 changes: 93 additions & 0 deletions onert-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2D.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef LUCI_INTERPRETER_PAL_CMSIS_NN_AVERAGE_POOL_2D_H
#define LUCI_INTERPRETER_PAL_CMSIS_NN_AVERAGE_POOL_2D_H

#include "PALAveragePool2DCommon.h"

#include <arm_nnfunctions.h>

namespace luci_interpreter_pal
{
inline void AveragePool(const PoolParams &params, const luci_interpreter::RuntimeShape &input_shape,
const uint8_t *input_data,
const luci_interpreter::RuntimeShape &output_shape, uint8_t *output_data,
luci_interpreter::DataType data_type)
{
cmsis_nn_dims input_dims;
cmsis_nn_dims output_dims;
cmsis_nn_pool_params pool_params;
cmsis_nn_dims filter_dims;
cmsis_nn_context ctx;

const int depth = input_shape.dims(3);
const int output_width = output_shape.dims(2);

input_dims.n = 1;
input_dims.h = input_shape.dims(1);
input_dims.w = input_shape.dims(2);
input_dims.c = depth;

output_dims.n = 1;
output_dims.h = output_shape.dims(1);
output_dims.w = output_width;
output_dims.c = depth;

pool_params.stride.h = params.stride_height;
pool_params.stride.w = params.stride_width;
pool_params.padding.h = params.padding_values.height;
pool_params.padding.w = params.padding_values.width;
pool_params.activation.min = params.quantized_activation_min;
pool_params.activation.max = params.quantized_activation_max;

filter_dims.n = 1;
filter_dims.h = params.filter_height;
filter_dims.w = params.filter_width;
filter_dims.c = 1;

const int32_t buffer_size = data_type == luci_interpreter::DataType::S16
? arm_avgpool_s16_get_buffer_size(output_width, depth)
: arm_avgpool_s8_get_buffer_size(output_width, depth);
int8_t *buffer = nullptr;
if (buffer_size > 0)
{
buffer = new int8_t[buffer_size];
}

ctx.buf = buffer;
ctx.size = buffer_size;

if (data_type == luci_interpreter::DataType::S8)
{
arm_avgpool_s8(&ctx, &pool_params, &input_dims,
luci_interpreter::kernels::getTensorData<int8_t>(input_data), &filter_dims,
&output_dims, luci_interpreter::kernels::getTensorData<int8_t>(output_data));
}
else
{
arm_avgpool_s16(&ctx, &pool_params, &input_dims,
luci_interpreter::kernels::getTensorData<int16_t>(input_data), &filter_dims,
&output_dims, luci_interpreter::kernels::getTensorData<int16_t>(output_data));
}

if (buffer_size > 0)
delete[] buffer;
}
} // namespace luci_interpreter_pal

#endif // LUCI_INTERPRETER_PAL_CMSIS_NN_AVERAGE_POOL_2D_H
78 changes: 78 additions & 0 deletions onert-micro/luci-interpreter/pal/cmsisnn/PALMaxPool2D.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef LUCI_INTERPRETER_PAL_CMSIS_NN_MAX_POOL_2D_H
#define LUCI_INTERPRETER_PAL_CMSIS_NN_MAX_POOL_2D_H

#include "PALMaxPool2DCommon.h"

#include <arm_nnfunctions.h>

namespace luci_interpreter_pal
{

inline void MaxPool(const PoolParams &params, const luci_interpreter::RuntimeShape &input_shape,
const uint8_t *input_data, const luci_interpreter::RuntimeShape &output_shape,
uint8_t *output_data, luci_interpreter::DataType data_type)
{
cmsis_nn_dims input_dims;
cmsis_nn_dims output_dims;
cmsis_nn_pool_params pool_params;
cmsis_nn_dims filter_dims;
cmsis_nn_context ctx;

const int depth = input_shape.dims(3);
const int output_width = output_shape.dims(2);

input_dims.n = 1;
input_dims.h = input_shape.dims(1);
input_dims.w = input_shape.dims(2);
input_dims.c = depth;

output_dims.n = 1;
output_dims.h = output_shape.dims(1);
output_dims.w = output_width;
output_dims.c = depth;

pool_params.stride.h = params.stride_height;
pool_params.stride.w = params.stride_width;
pool_params.padding.h = params.padding_values.height;
pool_params.padding.w = params.padding_values.width;
pool_params.activation.min = params.quantized_activation_min;
pool_params.activation.max = params.quantized_activation_max;

filter_dims.n = 1;
filter_dims.h = params.filter_height;
filter_dims.w = params.filter_width;
filter_dims.c = 1;

if (data_type == luci_interpreter::DataType::S8)
{
arm_max_pool_s8(&ctx, &pool_params, &input_dims,
luci_interpreter::kernels::getTensorData<int8_t>(input_data), &filter_dims,
&output_dims, luci_interpreter::kernels::getTensorData<int8_t>(output_data));
}
else
{
arm_max_pool_s16(&ctx, &pool_params, &input_dims,
luci_interpreter::kernels::getTensorData<int16_t>(input_data), &filter_dims,
&output_dims, luci_interpreter::kernels::getTensorData<int16_t>(output_data));
}
}
} // namespace luci_interpreter_pal

#endif // LUCI_INTERPRETER_PAL_CMSIS_NN_MAX_POOL_2D_H
60 changes: 0 additions & 60 deletions onert-micro/luci-interpreter/pal/common/PALMaxPool2DCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,66 +81,6 @@ inline void MaxPool(const PoolParams &params, const luci_interpreter::RuntimeSha
}
}

template <typename T>
inline void MaxPool(const PoolParams &params, const luci_interpreter::RuntimeShape &input_shape,
const T *input_data, const luci_interpreter::RuntimeShape &output_shape,
T *output_data)
{
const int batches = input_shape.dims(0);
const int depth = output_shape.dims(3);
const int input_height = input_shape.dims(1);
const int input_width = input_shape.dims(2);
const int output_height = output_shape.dims(1);
const int output_width = output_shape.dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch)
{
for (int out_y = 0; out_y < output_height; ++out_y)
{
for (int out_x = 0; out_x < output_width; ++out_x)
{
for (int channel = 0; channel < depth; ++channel)
{
const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
T max = std::numeric_limits<T>::lowest();
for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
{
for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
{
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;

const int input_data_offset =
((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
input_shape.dims(3) +
channel;

max = std::max(max, input_data[input_data_offset]);
}
}
max = std::max<T>(max, params.quantized_activation_min);
max = std::min<T>(max, params.quantized_activation_max);

const int output_data_offset =
((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
output_shape.dims(3) +
channel;

output_data[output_data_offset] = static_cast<T>(max);
}
}
}
}
}

} // namespace luci_interpreter_pal

#endif // LUCI_INTERPRETER_PAL_MAX_POOL_2D_COMMON_H
9 changes: 8 additions & 1 deletion onert-micro/luci-interpreter/pal/mcu/PALAveragePool2D.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,14 @@

namespace luci_interpreter_pal
{
// TODO: add S8 and S16 kernel

inline void AveragePool(const PoolParams &, const luci_interpreter::RuntimeShape &, const uint8_t *,
const luci_interpreter::RuntimeShape &, uint8_t *,
luci_interpreter::DataType)
{
assert(false && "Not impl yet");
}

} // namespace luci_interpreter_pal

#endif // LUCI_INTERPRETER_PAL_AVERAGE_POOL_2D_H
6 changes: 5 additions & 1 deletion onert-micro/luci-interpreter/pal/mcu/PALMaxPool2D.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@

namespace luci_interpreter_pal
{
// TODO: Add INT8, INT16 kernels
inline void MaxPool(const PoolParams &, const luci_interpreter::RuntimeShape &, const uint8_t *,
const luci_interpreter::RuntimeShape &, uint8_t *, luci_interpreter::DataType)
{
assert(false && "Not impl yet");
}
} // namespace luci_interpreter_pal

#endif // LUCI_INTERPRETER_PAL_MAX_POOL_2D_H
42 changes: 38 additions & 4 deletions onert-micro/luci-interpreter/src/kernels/AveragePool2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ void configure_kernel_CircleAveragePool2D(const circle::Operator *cur_op,
const auto output = runtime_graph->getCircleTensorByIndex(output_index);

LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
assert(Tensor::num_dims(input) == 4);
LUCI_INTERPRETER_CHECK(Tensor::num_dims(input) == 4);
}

void execute_kernel_CircleAveragePool2D(const circle::Operator *cur_op,
Expand Down Expand Up @@ -69,10 +69,34 @@ void execute_kernel_CircleAveragePool2D(const circle::Operator *cur_op,
const auto *input_data = runtime_graph->getDataByTensor(input);
auto *output_data = runtime_graph->getDataByTensor(output);

const DataType input_type = Tensor::element_type(input);

float activation_min{};
float activation_max{};
kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
&activation_min, &activation_max);

int32_t quantized_activation_min{};
int32_t quantized_activation_max{};

if (input_type == DataType::S8 or input_type == DataType::S16)
{
#ifndef DIS_QUANT
kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
output, &quantized_activation_min,
&quantized_activation_max);
#endif // DIS_QUANT
}
else if (input_type == DataType::FLOAT32)
{
#ifndef DIS_FLOAT
kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
&activation_min, &activation_max);
#endif // DIS_FLOAT
}
else
{
assert(false && "Not supported type");
}

luci_interpreter_pal::PoolParams params{};
params.padding_values.height = padding_height;
params.padding_values.width = padding_width;
Expand All @@ -82,8 +106,10 @@ void execute_kernel_CircleAveragePool2D(const circle::Operator *cur_op,
params.filter_width = options->filter_width();
params.float_activation_min = activation_min;
params.float_activation_max = activation_max;
params.quantized_activation_max = quantized_activation_max;
params.quantized_activation_min = quantized_activation_min;

switch (Tensor::element_type(input))
switch (input_type)
{
#ifndef DIS_FLOAT
case DataType::FLOAT32:
Expand All @@ -92,6 +118,14 @@ void execute_kernel_CircleAveragePool2D(const circle::Operator *cur_op,
kernels::getTensorShape(output), kernels::getTensorData<float>(output_data));
break;
#endif // DIS_FLOAT
#ifndef DIS_QUANT
case DataType::S8:
case DataType::S16:
luci_interpreter_pal::AveragePool(
params, kernels::getTensorShape(input), kernels::getTensorData<uint8_t>(input_data),
kernels::getTensorShape(output), kernels::getTensorData<uint8_t>(output_data), input_type);
break;
#endif // DIS_QUANT
default:
assert(false && "Unsupported type.");
}
Expand Down
Loading

0 comments on commit c3fb7e8

Please sign in to comment.