From 9d1783b632bd64c4b94b41c4d76649e89b5eddc5 Mon Sep 17 00:00:00 2001 From: Artem Balyshev Date: Wed, 4 Oct 2023 14:44:37 +0300 Subject: [PATCH] [onert-micro] Add float Mean kernels This commit adds float Mean kernels for onert-micro. ONE-DCO-1.0-Signed-off-by: Artem Balyshev --- .../test_models/mean/FloatMeanKernel.h | 132 +++++++ .../test_models/mean/NegMeanKernel.h | 92 +++++ .../test_models/mean/TestDataMeanBase.h | 60 +++ .../pal/cmsisnn/KernelsToBuild.lst | 1 + .../luci-interpreter/pal/common/PALMean.h | 205 ++++++++++ .../luci-interpreter/pal/common/Params.h | 6 + .../pal/mcu/KernelsToBuild.lst | 1 + .../luci-interpreter/src/kernels/Mean.cpp | 360 ++++-------------- .../luci-interpreter/src/kernels/Mean.h | 55 --- .../src/kernels/Mean.test.cpp | 233 ++---------- 10 files changed, 603 insertions(+), 542 deletions(-) create mode 100644 onert-micro/luci-interpreter/include/luci_interpreter/test_models/mean/FloatMeanKernel.h create mode 100644 onert-micro/luci-interpreter/include/luci_interpreter/test_models/mean/NegMeanKernel.h create mode 100644 onert-micro/luci-interpreter/include/luci_interpreter/test_models/mean/TestDataMeanBase.h create mode 100644 onert-micro/luci-interpreter/pal/common/PALMean.h delete mode 100644 onert-micro/luci-interpreter/src/kernels/Mean.h diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mean/FloatMeanKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mean/FloatMeanKernel.h new file mode 100644 index 00000000000..909b7410cb1 --- /dev/null +++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mean/FloatMeanKernel.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_MEAN_KERNEL_H +#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_MEAN_KERNEL_H + +#include "TestDataMeanBase.h" + +namespace luci_interpreter +{ +namespace test_kernel +{ +namespace mean_float +{ +/* + * Mean Kernel: + * + * Input(1, 8, 8, 4) + * | + * Mean + * | + * Output(1, 8, 8, 1) + */ +const unsigned char test_kernel_model_circle[] = { + 0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, + 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, + 0x6c, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x1b, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x94, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x72, 0x65, 0x64, 0x75, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x6e, + 0x64, 0x69, 0x63, 0x65, 0x73, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, + 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00}; + +const std::vector input_data = { + -73.19745, -62.66789, -15.868883, -69.99245, -86.77558, -47.07158, -59.42521, 5.4639907, + -15.482954, 58.430527, 30.962307, -8.479264, 64.87171, 67.23879, 54.92413, -75.001656, + 4.095402, -11.012883, 1.7135352, -13.673498, 87.62411, 88.27154, 86.84994, 61.68961, + -67.81691, -36.073383, 54.346165, -83.79197, 35.099308, -23.05919, 26.401726, 20.99549, + -68.63421, -93.027596, 20.0895, -16.020033, 57.642673, 8.66057, 39.191364, 29.198711, + -5.9334397, 11.010835, 82.77485, -34.213863, -38.869553, 16.539444, 51.105484, 25.632273, + -55.436813, -26.42026, 77.96095, -59.019154, -82.52756, -94.416176, -83.77591, 46.43875, + 0.7686069, 57.346397, -89.24597, -8.594538, -98.168755, -33.18969, -41.993664, 13.660449, + 50.10378, 9.801906, -4.2520585, 27.210102, 48.8715, -19.44194, 38.652195, 23.77053, + -82.0674, -93.96652, 99.148094, 22.794533, 0.5715625, 0.84766275, 87.92019, 37.35077, + -32.265865, 67.46462, -24.098558, 87.36311, 90.409134, 33.023712, -15.923093, 40.05901, + -12.006578, 31.039108, -63.882004, -73.78517, -24.940235, 30.9098, 31.745, -89.77378, + -46.777866, 58.79768, -24.669464, 96.29413, 61.62126, 45.743416, 38.30191, 71.805405, + -31.20969, 33.56755, -1.926614, 72.13441, -22.292011, -16.355177, 21.689945, 87.95895, + -98.04168, 93.35264, -12.684541, -18.105795, 30.574284, 42.890903, -94.390366, -47.013157, + -98.465126, 28.63009, -83.54015, 86.82799, 0.6768988, 6.070787, 43.308678, 1.8557712, + -73.0521, -90.86948, 43.77232, 68.301056, 66.867775, 97.34002, -59.342876, -51.359367, + 17.27793, 52.223003, -3.9915564, 29.598532, 34.474148, -80.920456, -30.45005, -17.469683, + -67.02992, -34.23075, -35.53944, 61.557327, -66.91338, -94.03176, -45.88021, 97.36409, + 96.45681, -32.885677, 72.40823, -62.28857, 20.948895, 1.259363, -84.97583, 60.83626, + -94.692535, -15.315798, -99.92936, 40.56625, -8.6356325, -7.3984733, 56.255993, -31.700819, + 62.08311, 52.800938, 32.27374, -99.46793, -40.924038, 24.67266, -58.954403, 42.263252, + -72.13501, -58.40316, 14.619292, -43.400642, -82.13468, -47.54976, -42.642033, -8.409653, + 74.90983, 97.76474, -71.152916, 83.61312, -37.22972, 21.405357, -56.848846, 90.63024, + -70.21143, -29.522697, 94.9647, 74.74478, 37.564766, -40.22343, -63.337795, -65.86191, + -48.546135, -58.20052, 36.73888, 67.78194, -43.096832, 94.7046, 9.798892, -79.97487, + -15.868657, -84.753975, 4.8745494, -18.346195, 54.9818, 75.854, 41.797707, -5.673281, + -36.31264, -73.4931, -41.090492, 6.3805137, -73.66098, 85.20992, 91.28027, -73.26658, + -92.18044, 41.29011, 5.5041995, -73.70062, -16.678818, 30.614132, 92.100555, 11.274231, + -37.915485, 34.91591, 36.32971, -37.70164, -23.708878, 19.026278, -41.71216, 67.325356, + 78.23511, -43.154037, 22.667723, 30.742237, -6.086414, 17.191307, 65.828896, -40.83338, + -18.61725, 23.976517, 80.2347, -92.53064, 71.6477, -38.28841, -60.853157, 24.402542}; + +const std::vector reference_output_data = { + -55.431667, -46.952095, 16.357655, 28.008245, -4.7193613, 81.108795, -33.334023, 14.859333, + -39.398083, 33.673332, 13.409595, 13.601912, -15.728818, -53.57022, -9.9313755, -39.922916, + 20.71593, 22.963072, -13.522823, 31.672546, 24.615828, 36.89219, -29.65866, -13.014804, + 20.91112, 54.368, 18.141413, 17.750427, -8.869844, -16.984585, -16.636799, 12.978033, + -12.962048, 13.376387, 23.776978, -23.59151, -18.810696, -27.365314, 18.422699, -0.4828272, + -42.342857, 2.1302667, 11.922464, -8.235632, -39.82988, -45.184032, 46.28369, 4.489258, + 17.493837, -32.964592, -0.55646133, -4.6420527, -28.523571, 41.74006, -36.128933, 7.3906593, + -29.771688, 29.327526, -1.0928774, 5.232649, 22.122757, 9.025103, -1.7341671, -0.7728319}; + +} // namespace mean_float + +class TestDataFloatMean : public TestDataMeanBase +{ +public: + TestDataFloatMean() + { + _input_data = mean_float::input_data; + _reference_output_data = mean_float::reference_output_data; + _test_kernel_model_circle = mean_float::test_kernel_model_circle; + } + + ~TestDataFloatMean() override = default; +}; + +} // namespace test_kernel +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_MEAN_KERNEL_H diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mean/NegMeanKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mean/NegMeanKernel.h new file mode 100644 index 00000000000..708f05e802d --- /dev/null +++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mean/NegMeanKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_MEAN_KERNEL_H +#define LUCI_INTERPRETER_TEST_MODELS_NEG_MEAN_KERNEL_H + +#include "luci_interpreter/test_models/TestDataBase.h" + +namespace luci_interpreter +{ +namespace test_kernel +{ +namespace neg_input_output_type_mismatch_mean_kernel +{ +/* + * Mean Kernel with input output type mismatch: + * + * Input(1, 8, 8, 4) - Float32 + * | + * Mean + * | + * Output(1, 8, 8, 1) - Int32 + */ +const unsigned char test_kernel_model_circle[] = { + 0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0xb4, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, + 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, + 0x6c, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x1b, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x1c, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x72, 0x65, 0x64, 0x75, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x11, 0x00, 0x00, 0x00, + 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, + 0x65, 0x00, 0x00, 0x00}; +} // namespace neg_input_output_type_mismatch_mean_kernel + +class NegTestDataInputOutputTypeMismatchMeanKernel : public NegTestDataBase +{ +public: + NegTestDataInputOutputTypeMismatchMeanKernel() + { + _test_kernel_model_circle = + neg_input_output_type_mismatch_mean_kernel::test_kernel_model_circle; + } + + ~NegTestDataInputOutputTypeMismatchMeanKernel() override = default; + + const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; } + +protected: + const unsigned char *_test_kernel_model_circle; +}; + +} // namespace test_kernel +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_LOG_KERNEL_H diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mean/TestDataMeanBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mean/TestDataMeanBase.h new file mode 100644 index 00000000000..88928992864 --- /dev/null +++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mean/TestDataMeanBase.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_TEST_MODELS_MEAN_KERNEL_BASE_H +#define LUCI_INTERPRETER_TEST_MODELS_MEAN_KERNEL_BASE_H + +#include "luci_interpreter/test_models/TestDataBase.h" + +namespace luci_interpreter +{ +namespace test_kernel +{ + +template class TestDataMeanBase : public TestDataBase +{ +public: + TestDataMeanBase() = default; + + const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; } + + const std::vector &get_input_data_by_index(int i) override final + { + switch (i) + { + case 0: + return _input_data; + default: + assert(false && "Wrong input index"); + } + } + + const std::vector &get_output_data_by_index(int i) override final + { + assert(i == 0); + return _reference_output_data; + } + +protected: + std::vector _input_data; + std::vector _reference_output_data; + const unsigned char *_test_kernel_model_circle; +}; + +} // namespace test_kernel +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_TEST_MODELS_MEAN_KERNEL_BASE_H diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/onert-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst index e92ce5e85f1..0d135429f22 100644 --- a/onert-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst +++ b/onert-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst @@ -40,6 +40,7 @@ REGISTER_KERNEL(LEAKY_RELU, LeakyRelu) REGISTER_KERNEL(LOG_SOFTMAX, LogSoftmax) REGISTER_KERNEL(MUL, Mul) REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D) +REGISTER_KERNEL(MEAN, Mean) REGISTER_KERNEL(CONCATENATION, Concatenation) REGISTER_KERNEL(SHAPE, Shape) REGISTER_KERNEL(NOT_EQUAL, NotEqual) diff --git a/onert-micro/luci-interpreter/pal/common/PALMean.h b/onert-micro/luci-interpreter/pal/common/PALMean.h new file mode 100644 index 00000000000..f2926af5522 --- /dev/null +++ b/onert-micro/luci-interpreter/pal/common/PALMean.h @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_MEAN_COMMON_H +#define LUCI_INTERPRETER_PAL_MEAN_COMMON_H + +#include "Params.h" +#include "PALUtils.h" + +namespace luci_interpreter_pal +{ +namespace +{ +// This method parses the input 'axis' to remove duplicates and handle negative +// values, and returns a valid 'out_axis' +inline bool resolveAxis(const int num_dims, const int *axis, const int64_t num_axis, int *out_axis, + int *out_num_axis) +{ + *out_num_axis = 0; // Just in case. + // Short-circuit axis resolution for scalars; the axis will go unused. + if (num_dims == 0) + { + return true; + } + // o(n^2) is fine since out_num_axis should be really small, mostly <= 4 + for (int64_t idx = 0; idx < num_axis; ++idx) + { + // Handle negative index. A positive index 'p_idx' can be represented as a + // negative index 'n_idx' as: n_idx = p_idx-num_dims + // eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1] */ + int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx]; + if (current < 0 || current >= num_dims) + { + return false; + } + bool is_dup = false; + for (int j = 0; j < *out_num_axis; ++j) + { + if (out_axis[j] == current) + { + is_dup = true; + break; + } + } + if (!is_dup) + { + out_axis[*out_num_axis] = current; + *out_num_axis += 1; + } + } + return true; +} + +// A generic reduce method that can be used for reduce_sum, reduce_mean, etc. +// This method iterates through input data and reduce elements along the +// dimensions given in axis. +template +inline bool reduce(const In *input_data, const int *input_dims, const int *, + const int input_num_dims, const int, const int *axis, const int num_axis, + int *input_iter, Out reducer(Out, const In), Out *output_data) +{ + // Reset input iterator. + for (int idx = 0; idx < input_num_dims; ++idx) + { + input_iter[idx] = 0; + } + // Iterate through input_data. + do + { + size_t input_offset = reducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr); + size_t output_offset = + reducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis); + output_data[output_offset] = reducer(output_data[output_offset], input_data[input_offset]); + } while (nextIndex(input_num_dims, input_dims, input_iter)); + return true; +} + +// This method expects that output_data has been initialized. +template +inline bool reduceSumImpl(const In *input_data, const int *input_dims, const int *output_dims, + const int input_num_dims, const int output_num_dims, const int *axis, + const int num_axis, int *input_iter, Out *output_data) +{ + auto reducer = [](const Out current, const In in) -> Out { + const Out actual_in = static_cast(in); + return current + actual_in; + }; + return reduce(input_data, input_dims, output_dims, input_num_dims, output_num_dims, axis, + num_axis, input_iter, reducer, output_data); +} +} // namespace + +template +inline bool Mean(const T *input_data, const int *input_dims, const int input_num_dims, + T *output_data, const int *output_dims, const int output_num_dims, const int *axis, + const int num_axis_dimensions, bool, int *temp_index, int *resolved_axis, + U *temp_sum) +{ + // Reset output data. + size_t num_outputs = 1; + for (int idx = 0; idx < output_num_dims; ++idx) + { + size_t current = static_cast(output_dims[idx]); + // Overflow prevention. + if (num_outputs > std::numeric_limits::max() / current) + { + return false; + } + num_outputs *= current; + } + for (size_t idx = 0; idx < num_outputs; ++idx) + { + output_data[idx] = T(); + temp_sum[idx] = U(); + } + + // Resolve axis. + int num_resolved_axis = 0; + if (!resolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis, &num_resolved_axis)) + { + return false; + } + + if (!reduceSumImpl(input_data, input_dims, output_dims, input_num_dims, output_num_dims, + resolved_axis, num_resolved_axis, temp_index, temp_sum)) + { + return false; + } + + // Calculate mean by dividing output_data by num of aggregated element. + size_t num_elements_in_axis = 1; + for (int idx = 0; idx < num_resolved_axis; ++idx) + { + size_t current = static_cast(input_dims[resolved_axis[idx]]); + // Overflow prevention. + if (current > (std::numeric_limits::max() / num_elements_in_axis)) + { + return false; + } + num_elements_in_axis *= current; + } + + if (num_elements_in_axis > 0) + { + for (size_t idx = 0; idx < num_outputs; ++idx) + { + output_data[idx] = static_cast(temp_sum[idx] / static_cast(num_elements_in_axis)); + } + } + return true; +} + +inline void Mean(const MeanParams &op_params, + const luci_interpreter::RuntimeShape &unextended_input_shape, + const float *input_data, + const luci_interpreter::RuntimeShape &unextended_output_shape, float *output_data) +{ + // Current implementation only supports dimension equals 4 and simultaneous + // reduction over width and height. + const luci_interpreter::RuntimeShape input_shape = + luci_interpreter::RuntimeShape::extendedShape(4, unextended_input_shape); + const luci_interpreter::RuntimeShape output_shape = + luci_interpreter::RuntimeShape::extendedShape(4, unextended_output_shape); + + const int output_batch = output_shape.dims(0); + const int output_depth = output_shape.dims(3); + + const int input_height = input_shape.dims(1); + const int input_width = input_shape.dims(2); + + for (int out_b = 0; out_b < output_batch; ++out_b) + { + for (int out_d = 0; out_d < output_depth; ++out_d) + { + float value = 0; + for (int in_h = 0; in_h < input_height; ++in_h) + { + for (int in_w = 0; in_w < input_width; ++in_w) + { + value += input_data[offset(input_shape.dimsData(), out_b, in_h, in_w, out_d)]; + } + } + output_data[offset(output_shape.dimsData(), out_b, 0, 0, out_d)] = + value / (input_width * input_height); + } + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_MEAN_COMMON_H diff --git a/onert-micro/luci-interpreter/pal/common/Params.h b/onert-micro/luci-interpreter/pal/common/Params.h index d641ab87bc1..a6b8e46bb45 100644 --- a/onert-micro/luci-interpreter/pal/common/Params.h +++ b/onert-micro/luci-interpreter/pal/common/Params.h @@ -21,6 +21,12 @@ namespace luci_interpreter_pal { +struct MeanParams +{ + int8_t axis_count; + int16_t axis[4]; +}; + struct PadParams { int8_t left_padding_count; diff --git a/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst b/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst index 04e28bb89c4..f8fc3f75568 100644 --- a/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst +++ b/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst @@ -45,6 +45,7 @@ REGISTER_KERNEL(LEAKY_RELU, LeakyRelu) REGISTER_KERNEL(LOG_SOFTMAX, LogSoftmax) REGISTER_KERNEL(MUL, Mul) REGISTER_KERNEL(MAXIMUM, Maximum) +REGISTER_KERNEL(MEAN, Mean) REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D) REGISTER_KERNEL(MINIMUM, Minimum) REGISTER_KERNEL(CONCATENATION, Concatenation) diff --git a/onert-micro/luci-interpreter/src/kernels/Mean.cpp b/onert-micro/luci-interpreter/src/kernels/Mean.cpp index 4128aa68d1e..1b87336f55c 100644 --- a/onert-micro/luci-interpreter/src/kernels/Mean.cpp +++ b/onert-micro/luci-interpreter/src/kernels/Mean.cpp @@ -1,6 +1,5 @@ /* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,330 +14,103 @@ * limitations under the License. */ -#include "kernels/Mean.h" - +#include "Builders.h" #include "kernels/Utils.h" +#include "TISOKernel.h" + +#include "PALMean.h" -#include +#include namespace luci_interpreter { -namespace kernels +namespace { +const int kMaxNumberOfAxis = 5; +const int kMaxNumberOfReducedAxis = 2; -static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params) +void ResolveAxis(const int *axis_data, int axis_count, luci_interpreter_pal::MeanParams *op_params) { - params->axis_count = num_axes; - for (int i = 0; i < num_axes; ++i) + int i = 0; + for (; i < axis_count; ++i) { - params->axis[i] = static_cast(axes_data[i]); + op_params->axis[i] = static_cast(axis_data[i]); } - for (int i = num_axes; i < 4; ++i) + for (; i < 4; ++i) { - params->axis[i] = 1; + op_params->axis[i] = 1; } + op_params->axis_count = axis_count; } -// Returns the number of axes that will be reduced. Removes duplicates. -static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims) -{ - int reduction_count = num_axes; - for (int i = 0; i < num_axes; ++i) - { - int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims; - assert(current >= 0 && current < input_num_dims); - for (int j = 0; j < i; j++) - { - int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims; - // This checks for duplicate axis - if (current == previous) - { - --reduction_count; - break; - } - } - } - return reduction_count; -} +} // namespace -static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes, - bool keep_dims) +void configure_kernel_CircleMean(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph) { - int input_num_dims = input_shape.num_dims(); - if (input_num_dims == 0) - { - return Shape(0); - } + kernels::TISOKernel kernel(cur_op, runtime_graph); - if (keep_dims) - { - Shape output_shape(input_num_dims); - for (int idx = 0; idx < input_num_dims; ++idx) - { - bool is_axis = false; - for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) - { - if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) - { - is_axis = true; - break; - } - } - if (is_axis) - { - output_shape.dim(idx) = 1; - } - else - { - output_shape.dim(idx) = input_shape.dim(idx); - } - } - return output_shape; - } - else - { - int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims); - Shape output_shape(input_num_dims - num_reduce_axes); - int num_skip_axes = 0; - for (int idx = 0; idx < input_num_dims; ++idx) - { - bool is_axis = false; - for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) - { - if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) - { - ++num_skip_axes; - is_axis = true; - break; - } - } - if (!is_axis) - { - output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx); - } - } - return output_shape; - } -} + LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) == + Tensor::element_type(kernel.output())); + LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input2()) == DataType::S32); -Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, - Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms) - : KernelWithParams({input, axes}, {output, temp_index, resolved_axes, temp_sum}, - params) -{ + const int32_t axis_value = + kernels::getTensorData(runtime_graph->getConstDataByTensor(kernel.input2()))[0]; + LUCI_INTERPRETER_CHECK(axis_value >= 0); } -void Mean::configure() +void execute_kernel_CircleMean(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph) { - LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); - LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32); - if (input()->element_type() == DataType::S16) - { - LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); - } + kernels::TISOKernel kernel(cur_op, runtime_graph); + kernels::TISOData tiso_data = kernel.readData(); - const Shape &input_shape = input()->shape(); - int input_num_dims = input_shape.num_dims(); + const auto *input = kernel.input1(); + const auto *axis = kernel.input2(); + const auto *output = kernel.output(); - const auto *axes_data = getTensorData(axes()); - int num_axes = axes()->shape().num_elements(); - assert(num_axes <= 4); - // TODO: enable it only if kernel with dynamic shapes - Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims); - output()->resize(output_shape); + const auto *options = cur_op->builtin_options_as_ReducerOptions(); - tflite::MeanParams params{}; - resolveAxes(axes_data, num_axes, ¶ms); - _need_temporaries = !( - _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && - ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1))); - if (_need_temporaries) - { - auto temp_index = getOutputTensors()[1]; - auto resolved_axes = getOutputTensors()[2]; - auto temp_sum = getOutputTensors()[3]; + int num_axis = static_cast(Tensor::num_elements(axis)); + int temp_index[kMaxNumberOfAxis]; + int resolved_axis[kMaxNumberOfReducedAxis]; - temp_index->resize(Shape(input_num_dims)); - resolved_axes->resize(Shape(num_axes)); - temp_sum->resize(output()->shape()); - } - else - { - auto temp_index = getOutputTensors()[1]; - auto resolved_axes = getOutputTensors()[2]; - auto temp_sum = getOutputTensors()[3]; - - temp_index->set_allocatable(false); - resolved_axes->set_allocatable(false); - temp_sum->set_allocatable(false); - } -} - -void Mean::execute() const -{ - switch (input()->element_type()) + switch (Tensor::element_type(kernel.input1())) { +#ifndef DIS_FLOAT case DataType::FLOAT32: - evalFloat(); - break; - case DataType::U8: - evalQuantized(); - break; - case DataType::S16: - evalQuantizedS16(); - break; - default: - assert(false && "Unsupported type."); - } -} - -void Mean::evalFloat() const -{ - const Shape &input_shape = input()->shape(); - int input_num_dims = input_shape.num_dims(); - const auto *axes_data = getTensorData(axes()); - int num_axes = axes()->shape().num_elements(); - - tflite::MeanParams params{}; - resolveAxes(axes_data, num_axes, ¶ms); - - auto temp_index = getOutputTensors()[1]; - auto resolved_axes = getOutputTensors()[2]; - auto temp_sum = getOutputTensors()[3]; - - // Defer to specialized implementation for 4D Mean across axes 1 & 2. - if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && - ((params.axis[0] == 1 && params.axis[1] == 2) || - (params.axis[0] == 2 && params.axis[1] == 1))) - { - tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData(input()), - getTensorShape(output()), getTensorData(output())); - } - else - { - tflite::reference_ops::Mean(getTensorData(input()), getTensorShape(input()).DimsData(), - input()->shape().num_dims(), getTensorData(output()), - getTensorShape(output()).DimsData(), output()->shape().num_dims(), - axes_data, num_axes, _params.keep_dims, - getTensorData(temp_index), getTensorData(resolved_axes), - getTensorData(temp_sum)); - } -} - -void Mean::evalQuantized() const -{ - const Shape &input_shape = input()->shape(); - int input_num_dims = input_shape.num_dims(); - const auto *axes_data = getTensorData(axes()); - int num_axes = axes()->shape().num_elements(); - - tflite::MeanParams params{}; - resolveAxes(axes_data, num_axes, ¶ms); - - auto temp_index = getOutputTensors()[1]; - auto resolved_axes = getOutputTensors()[2]; - auto temp_sum = getOutputTensors()[3]; - - // Defer to specialized implementation for 4D Mean across axes 1 & 2. - if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && - ((params.axis[0] == 1 && params.axis[1] == 2) || - (params.axis[0] == 2 && params.axis[1] == 1))) - { - tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData(input()), - input()->zero_point(), input()->scale(), getTensorShape(output()), - getTensorData(output()), output()->zero_point(), - output()->scale()); - } - else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale()) - { - tflite::reference_ops::Mean(getTensorData(input()), getTensorShape(input()).DimsData(), - input()->shape().num_dims(), getTensorData(output()), - getTensorShape(output()).DimsData(), output()->shape().num_dims(), - axes_data, num_axes, _params.keep_dims, - getTensorData(temp_index), getTensorData(resolved_axes), - getTensorData(temp_sum)); - } - else - { - tflite::reference_ops::QuantizedMeanOrSum<>( - getTensorData(input()), input()->zero_point(), input()->scale(), - getTensorShape(input()).DimsData(), input()->shape().num_dims(), - getTensorData(output()), output()->zero_point(), output()->scale(), - getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes, - _params.keep_dims, getTensorData(temp_index), getTensorData(resolved_axes), - getTensorData(temp_sum), - /*compute_sum=*/false); - } -} - -void Mean::evalQuantizedS16() const -{ - const auto *input_data = getTensorData(input()); - auto *output_data = getTensorData(output()); - - const Shape &input_shape = input()->shape(); - const Shape &output_shape = output()->shape(); - - const auto *axes_data = getTensorData(axes()); - const int num_axes = axes()->shape().num_elements(); - - constexpr int32_t output_min = -std::numeric_limits::max(); - constexpr int32_t output_max = std::numeric_limits::max(); - - // Defer to specialized implementation for 4D Mean across axes 1 & 2. - if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 && - ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1))) - { - const int32_t batches = input_shape.dim(0); - const int32_t input_height = input_shape.dim(1); - const int32_t input_width = input_shape.dim(2); - const int32_t depth = input_shape.dim(3); - assert(output_shape.num_dims() == 4); - assert(output_shape.dim(0) == batches); - assert(output_shape.dim(1) == 1); - assert(output_shape.dim(2) == 1); - assert(output_shape.dim(3) == depth); - - const double real_multiplier = - static_cast(input()->scale()) / static_cast(output()->scale()); - - int32_t output_multiplier{}; - int output_shift{}; - quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); - - const int32_t num_elements_in_axes = input_height * input_width; - - for (int32_t batch = 0; batch < batches; ++batch) { - for (int32_t c = 0; c < depth; ++c) + luci_interpreter_pal::MeanParams op_params; + ResolveAxis(kernels::getTensorData(tiso_data.input2_data), num_axis, &op_params); + + // Special case mean implementation exists for 4D mean across axes 1 + // and 2. + bool special_case_4d_axes_1_and_2 = Tensor::num_dims(input) == 4 && + op_params.axis_count == 2 && + ((op_params.axis[0] == 1 && op_params.axis[1] == 2) || + (op_params.axis[0] == 2 && op_params.axis[1] == 1)); + + // Defer to specialized implementation for 4D Mean across axes 1 & 2. + if (options->keep_dims() && special_case_4d_axes_1_and_2) { - int32_t acc = 0; - for (int32_t in_y = 0; in_y < input_height; ++in_y) - { - for (int32_t in_x = 0; in_x < input_width; ++in_x) - { - acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)]; - } - } - int32_t scaled_acc = - tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); - // Divide by the number of elements rounding to the nearest integer. - scaled_acc = scaled_acc > 0 - ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes - : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes; - - scaled_acc = std::max(scaled_acc, output_min); - scaled_acc = std::min(scaled_acc, output_max); - - output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc; + luci_interpreter_pal::Mean(op_params, kernels::getTensorShape(input), + kernels::getTensorData(tiso_data.input1_data), + kernels::getTensorShape(output), + kernels::getTensorData(tiso_data.output_data)); + } + else + { + luci_interpreter_pal::Mean( + kernels::getTensorData(tiso_data.input1_data), wrap(input->shape()).data(), + Tensor::num_dims(input), kernels::getTensorData(tiso_data.output_data), + wrap(output->shape()).data(), Tensor::num_dims(output), + kernels::getTensorData(tiso_data.input2_data), num_axis, options->keep_dims(), + temp_index, resolved_axis, kernels::getTensorData(tiso_data.output_data)); } } - } - else - { - assert(false && "Unsupported configuration."); + break; +#endif // DIS_FLOAT + default: + assert(false && "Unsupported type"); } } -} // namespace kernels } // namespace luci_interpreter diff --git a/onert-micro/luci-interpreter/src/kernels/Mean.h b/onert-micro/luci-interpreter/src/kernels/Mean.h deleted file mode 100644 index ed07ae56177..00000000000 --- a/onert-micro/luci-interpreter/src/kernels/Mean.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LUCI_INTERPRETER_KERNELS_MEAN_H -#define LUCI_INTERPRETER_KERNELS_MEAN_H - -#include "core/Kernel.h" -#include "core/KernelParams.h" - -#include - -namespace luci_interpreter -{ -namespace kernels -{ - -class Mean : public KernelWithParams -{ -public: - Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, - Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms); - - const Tensor *input() const { return _inputs[0]; } - const Tensor *axes() const { return _inputs[1]; } - Tensor *output() const { return _outputs[0]; } - - void configure() override; - void execute() const override; - -private: - void evalFloat() const; - void evalQuantized() const; - void evalQuantizedS16() const; - -private: - bool _need_temporaries = false; -}; - -} // namespace kernels -} // namespace luci_interpreter - -#endif // LUCI_INTERPRETER_KERNELS_MEAN_H diff --git a/onert-micro/luci-interpreter/src/kernels/Mean.test.cpp b/onert-micro/luci-interpreter/src/kernels/Mean.test.cpp index d2c00935ab0..bf03d4ac048 100644 --- a/onert-micro/luci-interpreter/src/kernels/Mean.test.cpp +++ b/onert-micro/luci-interpreter/src/kernels/Mean.test.cpp @@ -15,14 +15,14 @@ * limitations under the License. */ -#include "kernels/Mean.h" #include "kernels/TestUtils.h" -#include "luci_interpreter/TestMemoryManager.h" +#include "luci_interpreter/test_models/mean/FloatMeanKernel.h" +#include "luci_interpreter/test_models/mean/NegMeanKernel.h" + +#include "loader/ModuleLoader.h" namespace luci_interpreter { -namespace kernels -{ namespace { @@ -30,211 +30,58 @@ using namespace testing; class MeanTest : public ::testing::Test { -protected: - void SetUp() override { _memory_manager = std::make_unique(); } - - std::unique_ptr _memory_manager; + // Do nothing }; -TEST_F(MeanTest, FloatKeepDims) -{ - std::vector input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, - 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, - 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; - - std::vector axis_data{0, 2}; - Tensor input_tensor = - makeInputTensor({4, 3, 2}, input_data, _memory_manager.get()); - Tensor axis_tensor = makeInputTensor({2}, axis_data, _memory_manager.get()); - Tensor temp_index(DataType::S32, Shape({}), {}, ""); - Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); - Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); - Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); - - ReducerParams params{}; - params.keep_dims = true; - - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, - params); - kernel.configure(); - _memory_manager->allocate_memory(temp_index); - _memory_manager->allocate_memory(resolved_axes); - _memory_manager->allocate_memory(temp_sum); - _memory_manager->allocate_memory(output_tensor); - kernel.execute(); - - std::vector ref_output_data{10.5, 12.5, 14.5}; - std::initializer_list ref_output_shape{1, 3, 1}; - EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); - EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); -} - -TEST_F(MeanTest, FloatKeepDims4DMean) -{ - std::vector input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, - 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, - 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; - - std::vector axis_data{1, 2}; - Tensor input_tensor = - makeInputTensor({2, 2, 3, 2}, input_data, _memory_manager.get()); - Tensor axis_tensor = makeInputTensor({2}, axis_data, _memory_manager.get()); - Tensor temp_index(DataType::S32, Shape({}), {}, ""); - Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); - Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); - Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); - - ReducerParams params{}; - params.keep_dims = true; - - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, - params); - kernel.configure(); - _memory_manager->allocate_memory(temp_index); - _memory_manager->allocate_memory(resolved_axes); - _memory_manager->allocate_memory(temp_sum); - _memory_manager->allocate_memory(output_tensor); - kernel.execute(); - - std::vector ref_output_data{6, 7, 18, 19}; - std::initializer_list ref_output_shape{2, 1, 1, 2}; - EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); - EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); -} - -TEST_F(MeanTest, FloatNotKeepDims) +template std::vector checkMeanKernel(test_kernel::TestDataBase *test_data_base) { - std::vector input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, - 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, - 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; - - std::vector axis_data{1, 0, -3, -3}; - Tensor input_tensor = - makeInputTensor({4, 3, 2}, input_data, _memory_manager.get()); - Tensor axis_tensor = makeInputTensor({4}, axis_data, _memory_manager.get()); - Tensor temp_index(DataType::S32, Shape({}), {}, ""); - Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); - Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); - Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); - - ReducerParams params{}; - params.keep_dims = false; + MemoryManager memory_manager{}; + RuntimeModule runtime_module{}; + bool dealloc_input = true; - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, - params); - kernel.configure(); - _memory_manager->allocate_memory(temp_index); - _memory_manager->allocate_memory(resolved_axes); - _memory_manager->allocate_memory(temp_sum); - _memory_manager->allocate_memory(output_tensor); - kernel.execute(); + // Load model with single op + auto *model_data_raw = reinterpret_cast(test_data_base->get_model_ptr()); + ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input); - std::vector ref_output_data{12, 13}; - std::initializer_list ref_output_shape{2}; - EXPECT_THAT(extractTensorData(output_tensor), FloatArrayNear(ref_output_data)); - EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); -} - -TEST_F(MeanTest, Uint8KeepDims) -{ - float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255); - std::vector input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; - std::pair quant_param = quantizationParams(-1.0f, 1.0f); + auto *main_runtime_graph = runtime_module.getMainGraph(); + assert(main_runtime_graph->getNumOfInputTensors() == 1); - std::vector axis_data{1}; - Tensor input_tensor = makeInputTensor({3, 2}, quant_param.first, quant_param.second, - input_data, _memory_manager.get()); - Tensor axis_tensor = makeInputTensor({1}, axis_data, _memory_manager.get()); - Tensor temp_index(DataType::S32, Shape({}), {}, ""); - Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); - Tensor temp_sum(DataType::U8, Shape({}), {}, ""); - Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + // Set input data + { + auto *input_tensor_data = reinterpret_cast(main_runtime_graph->configureGraphInput(0)); + std::copy(test_data_base->get_input_data_by_index(0).begin(), + test_data_base->get_input_data_by_index(0).end(), input_tensor_data); + } - ReducerParams params{}; - params.keep_dims = true; + runtime_module.execute(); - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, - params); - kernel.configure(); - _memory_manager->allocate_memory(temp_index); - _memory_manager->allocate_memory(resolved_axes); - _memory_manager->allocate_memory(temp_sum); - _memory_manager->allocate_memory(output_tensor); - kernel.execute(); + assert(main_runtime_graph->getNumOfOutputTensors() == 1); - std::vector ref_output_data{0.3, 0.35, 0.55}; - std::initializer_list ref_output_shape{3, 1}; - EXPECT_THAT(dequantizeTensorData(output_tensor), - FloatArrayNear(ref_output_data, kQuantizedTolerance)); - EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + T *output_data = reinterpret_cast(main_runtime_graph->getOutputDataByIndex(0)); + const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T)); + std::vector output_data_vector(output_data, output_data + num_elements); + return output_data_vector; } -TEST_F(MeanTest, Uint8NotKeepDims) +TEST_F(MeanTest, Float_P) { - float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255); - std::vector input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; - std::pair quant_param = quantizationParams(-1.0f, 1.0f); - - std::vector axis_data{1}; - Tensor input_tensor = makeInputTensor( - {1, 3, 2}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); - Tensor axis_tensor = makeInputTensor({1}, axis_data, _memory_manager.get()); - Tensor temp_index(DataType::S32, Shape({}), {}, ""); - Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); - Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); - Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); - - ReducerParams params{}; - params.keep_dims = false; - - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, - params); - kernel.configure(); - _memory_manager->allocate_memory(temp_index); - _memory_manager->allocate_memory(resolved_axes); - _memory_manager->allocate_memory(temp_sum); - _memory_manager->allocate_memory(output_tensor); - kernel.execute(); - - std::vector ref_output_data{0.4, 0.4}; - std::initializer_list ref_output_shape{1, 2}; - EXPECT_THAT(dequantizeTensorData(output_tensor), - FloatArrayNear(ref_output_data, kQuantizedTolerance)); - EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + test_kernel::TestDataFloatMean test_data_kernel; + std::vector output_data_vector = checkMeanKernel(&test_data_kernel); + EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear( + test_data_kernel.get_output_data_by_index(0), 0.0001f)); } -TEST_F(MeanTest, SInt16KeepDims4D) +TEST_F(MeanTest, Input_output_type_mismatch_NEG) { - std::vector input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, - 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, - 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; - std::vector axes_data{1, 2}; - std::vector ref_output_data{6, 7, 18, 19}; - - Tensor input_tensor = - makeInputTensor({2, 2, 3, 2}, 0.25, 0, input_data, _memory_manager.get()); - Tensor axes_tensor = makeInputTensor({2}, axes_data, _memory_manager.get()); - Tensor temp_index(DataType::S32, Shape({}), {}, ""); - Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); - Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); - Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0); - - ReducerParams params{}; - params.keep_dims = true; - - Mean kernel(&input_tensor, &axes_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, - params); - kernel.configure(); - _memory_manager->allocate_memory(temp_index); - _memory_manager->allocate_memory(resolved_axes); - _memory_manager->allocate_memory(temp_sum); - _memory_manager->allocate_memory(output_tensor); - kernel.execute(); - - EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2})); - EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + test_kernel::NegTestDataInputOutputTypeMismatchMeanKernel test_data_kernel; + MemoryManager memory_manager{}; + RuntimeModule runtime_module{}; + bool dealloc_input = true; + // Load model with single op + auto *model_data_raw = reinterpret_cast(test_data_kernel.get_model_ptr()); + EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input), + ""); } } // namespace -} // namespace kernels } // namespace luci_interpreter