Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[onert-micro] Add cmsis-nn Pooling kernels #11570

Merged
merged 1 commit into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions onert-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
REGISTER_KERNEL(ABS, Abs)
REGISTER_KERNEL(ADD, Add)
REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)
REGISTER_KERNEL(ARG_MAX, ArgMax)
REGISTER_KERNEL(ARG_MIN, ArgMin)
REGISTER_KERNEL(DIV, Div)
Expand Down Expand Up @@ -28,6 +29,7 @@ REGISTER_KERNEL(LOGICAL_AND, LogicalAnd)
REGISTER_KERNEL(LOGICAL_OR, LogicalOr)
REGISTER_KERNEL(LEAKY_RELU, LeakyRelu)
REGISTER_KERNEL(MUL, Mul)
REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)
REGISTER_KERNEL(CONCATENATION, Concatenation)
REGISTER_KERNEL(SHAPE, Shape)
REGISTER_KERNEL(NOT_EQUAL, NotEqual)
Expand Down
93 changes: 93 additions & 0 deletions onert-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2D.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef LUCI_INTERPRETER_PAL_CMSIS_NN_AVERAGE_POOL_2D_H
#define LUCI_INTERPRETER_PAL_CMSIS_NN_AVERAGE_POOL_2D_H

#include "PALAveragePool2DCommon.h"

#include <arm_nnfunctions.h>

namespace luci_interpreter_pal
{
inline void AveragePool(const PoolParams &params, const luci_interpreter::RuntimeShape &input_shape,
const uint8_t *input_data,
const luci_interpreter::RuntimeShape &output_shape, uint8_t *output_data,
luci_interpreter::DataType data_type)
{
cmsis_nn_dims input_dims;
cmsis_nn_dims output_dims;
cmsis_nn_pool_params pool_params;
cmsis_nn_dims filter_dims;
cmsis_nn_context ctx;

const int depth = input_shape.dims(3);
const int output_width = output_shape.dims(2);

input_dims.n = 1;
input_dims.h = input_shape.dims(1);
input_dims.w = input_shape.dims(2);
input_dims.c = depth;

output_dims.n = 1;
output_dims.h = output_shape.dims(1);
output_dims.w = output_width;
output_dims.c = depth;

pool_params.stride.h = params.stride_height;
pool_params.stride.w = params.stride_width;
pool_params.padding.h = params.padding_values.height;
pool_params.padding.w = params.padding_values.width;
pool_params.activation.min = params.quantized_activation_min;
pool_params.activation.max = params.quantized_activation_max;

filter_dims.n = 1;
filter_dims.h = params.filter_height;
filter_dims.w = params.filter_width;
filter_dims.c = 1;

const int32_t buffer_size = data_type == luci_interpreter::DataType::S16
? arm_avgpool_s16_get_buffer_size(output_width, depth)
: arm_avgpool_s8_get_buffer_size(output_width, depth);
int8_t *buffer = nullptr;
if (buffer_size > 0)
{
buffer = new int8_t[buffer_size];
}

ctx.buf = buffer;
ctx.size = buffer_size;

if (data_type == luci_interpreter::DataType::S8)
{
arm_avgpool_s8(&ctx, &pool_params, &input_dims,
luci_interpreter::kernels::getTensorData<int8_t>(input_data), &filter_dims,
&output_dims, luci_interpreter::kernels::getTensorData<int8_t>(output_data));
}
else
{
arm_avgpool_s16(&ctx, &pool_params, &input_dims,
luci_interpreter::kernels::getTensorData<int16_t>(input_data), &filter_dims,
&output_dims, luci_interpreter::kernels::getTensorData<int16_t>(output_data));
}

if (buffer_size > 0)
delete[] buffer;
}
} // namespace luci_interpreter_pal

#endif // LUCI_INTERPRETER_PAL_CMSIS_NN_AVERAGE_POOL_2D_H
78 changes: 78 additions & 0 deletions onert-micro/luci-interpreter/pal/cmsisnn/PALMaxPool2D.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef LUCI_INTERPRETER_PAL_CMSIS_NN_MAX_POOL_2D_H
#define LUCI_INTERPRETER_PAL_CMSIS_NN_MAX_POOL_2D_H

#include "PALMaxPool2DCommon.h"

#include <arm_nnfunctions.h>

namespace luci_interpreter_pal
{

inline void MaxPool(const PoolParams &params, const luci_interpreter::RuntimeShape &input_shape,
const uint8_t *input_data, const luci_interpreter::RuntimeShape &output_shape,
uint8_t *output_data, luci_interpreter::DataType data_type)
{
cmsis_nn_dims input_dims;
cmsis_nn_dims output_dims;
cmsis_nn_pool_params pool_params;
cmsis_nn_dims filter_dims;
cmsis_nn_context ctx;

const int depth = input_shape.dims(3);
const int output_width = output_shape.dims(2);

input_dims.n = 1;
input_dims.h = input_shape.dims(1);
input_dims.w = input_shape.dims(2);
input_dims.c = depth;

output_dims.n = 1;
output_dims.h = output_shape.dims(1);
output_dims.w = output_width;
output_dims.c = depth;

pool_params.stride.h = params.stride_height;
pool_params.stride.w = params.stride_width;
pool_params.padding.h = params.padding_values.height;
pool_params.padding.w = params.padding_values.width;
pool_params.activation.min = params.quantized_activation_min;
pool_params.activation.max = params.quantized_activation_max;

filter_dims.n = 1;
filter_dims.h = params.filter_height;
filter_dims.w = params.filter_width;
filter_dims.c = 1;

if (data_type == luci_interpreter::DataType::S8)
{
arm_max_pool_s8(&ctx, &pool_params, &input_dims,
luci_interpreter::kernels::getTensorData<int8_t>(input_data), &filter_dims,
&output_dims, luci_interpreter::kernels::getTensorData<int8_t>(output_data));
}
else
{
arm_max_pool_s16(&ctx, &pool_params, &input_dims,
luci_interpreter::kernels::getTensorData<int16_t>(input_data), &filter_dims,
&output_dims, luci_interpreter::kernels::getTensorData<int16_t>(output_data));
}
}
} // namespace luci_interpreter_pal

#endif // LUCI_INTERPRETER_PAL_CMSIS_NN_MAX_POOL_2D_H
60 changes: 0 additions & 60 deletions onert-micro/luci-interpreter/pal/common/PALMaxPool2DCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,66 +81,6 @@ inline void MaxPool(const PoolParams &params, const luci_interpreter::RuntimeSha
}
}

template <typename T>
inline void MaxPool(const PoolParams &params, const luci_interpreter::RuntimeShape &input_shape,
const T *input_data, const luci_interpreter::RuntimeShape &output_shape,
T *output_data)
{
const int batches = input_shape.dims(0);
const int depth = output_shape.dims(3);
const int input_height = input_shape.dims(1);
const int input_width = input_shape.dims(2);
const int output_height = output_shape.dims(1);
const int output_width = output_shape.dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch)
{
for (int out_y = 0; out_y < output_height; ++out_y)
{
for (int out_x = 0; out_x < output_width; ++out_x)
{
for (int channel = 0; channel < depth; ++channel)
{
const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
T max = std::numeric_limits<T>::lowest();
for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
{
for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
{
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;

const int input_data_offset =
((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
input_shape.dims(3) +
channel;

max = std::max(max, input_data[input_data_offset]);
}
}
max = std::max<T>(max, params.quantized_activation_min);
max = std::min<T>(max, params.quantized_activation_max);

const int output_data_offset =
((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
output_shape.dims(3) +
channel;

output_data[output_data_offset] = static_cast<T>(max);
}
}
}
}
}

} // namespace luci_interpreter_pal

#endif // LUCI_INTERPRETER_PAL_MAX_POOL_2D_COMMON_H
9 changes: 8 additions & 1 deletion onert-micro/luci-interpreter/pal/mcu/PALAveragePool2D.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,14 @@

namespace luci_interpreter_pal
{
// TODO: add S8 and S16 kernel

inline void AveragePool(const PoolParams &, const luci_interpreter::RuntimeShape &, const uint8_t *,
const luci_interpreter::RuntimeShape &, uint8_t *,
luci_interpreter::DataType)
{
assert(false && "Not impl yet");
}

} // namespace luci_interpreter_pal

#endif // LUCI_INTERPRETER_PAL_AVERAGE_POOL_2D_H
6 changes: 5 additions & 1 deletion onert-micro/luci-interpreter/pal/mcu/PALMaxPool2D.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@

namespace luci_interpreter_pal
{
// TODO: Add INT8, INT16 kernels
inline void MaxPool(const PoolParams &, const luci_interpreter::RuntimeShape &, const uint8_t *,
const luci_interpreter::RuntimeShape &, uint8_t *, luci_interpreter::DataType)
{
assert(false && "Not impl yet");
}
} // namespace luci_interpreter_pal

#endif // LUCI_INTERPRETER_PAL_MAX_POOL_2D_H
42 changes: 38 additions & 4 deletions onert-micro/luci-interpreter/src/kernels/AveragePool2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ void configure_kernel_CircleAveragePool2D(const circle::Operator *cur_op,
const auto output = runtime_graph->getCircleTensorByIndex(output_index);

LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
assert(Tensor::num_dims(input) == 4);
LUCI_INTERPRETER_CHECK(Tensor::num_dims(input) == 4);
}

void execute_kernel_CircleAveragePool2D(const circle::Operator *cur_op,
Expand Down Expand Up @@ -69,10 +69,34 @@ void execute_kernel_CircleAveragePool2D(const circle::Operator *cur_op,
const auto *input_data = runtime_graph->getDataByTensor(input);
auto *output_data = runtime_graph->getDataByTensor(output);

const DataType input_type = Tensor::element_type(input);

float activation_min{};
float activation_max{};
kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
&activation_min, &activation_max);

int32_t quantized_activation_min{};
int32_t quantized_activation_max{};

if (input_type == DataType::S8 or input_type == DataType::S16)
{
#ifndef DIS_QUANT
kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
output, &quantized_activation_min,
&quantized_activation_max);
#endif // DIS_QUANT
}
else if (input_type == DataType::FLOAT32)
{
#ifndef DIS_FLOAT
kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
&activation_min, &activation_max);
#endif // DIS_FLOAT
}
else
{
assert(false && "Not supported type");
}

luci_interpreter_pal::PoolParams params{};
params.padding_values.height = padding_height;
params.padding_values.width = padding_width;
Expand All @@ -82,8 +106,10 @@ void execute_kernel_CircleAveragePool2D(const circle::Operator *cur_op,
params.filter_width = options->filter_width();
params.float_activation_min = activation_min;
params.float_activation_max = activation_max;
params.quantized_activation_max = quantized_activation_max;
params.quantized_activation_min = quantized_activation_min;

switch (Tensor::element_type(input))
switch (input_type)
{
#ifndef DIS_FLOAT
case DataType::FLOAT32:
Expand All @@ -92,6 +118,14 @@ void execute_kernel_CircleAveragePool2D(const circle::Operator *cur_op,
kernels::getTensorShape(output), kernels::getTensorData<float>(output_data));
break;
#endif // DIS_FLOAT
#ifndef DIS_QUANT
case DataType::S8:
case DataType::S16:
luci_interpreter_pal::AveragePool(
params, kernels::getTensorShape(input), kernels::getTensorData<uint8_t>(input_data),
kernels::getTensorShape(output), kernels::getTensorData<uint8_t>(output_data), input_type);
break;
#endif // DIS_QUANT
default:
assert(false && "Unsupported type.");
}
Expand Down
Loading