Skip to content

Commit

Permalink
[infra/onert] Bump up XNNPack version (Samsung#13022)
Browse files Browse the repository at this point in the history
This commit updates XNNPack and dependent packages version.

ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
  • Loading branch information
hseok-oh authored May 22, 2024
1 parent c3bd1e2 commit eeffa46
Show file tree
Hide file tree
Showing 17 changed files with 125 additions and 29 deletions.
4 changes: 2 additions & 2 deletions infra/cmake/packages/CpuInfoSourceConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ function(_CpuInfoSource_import)
nnas_include(OptionTools)

envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
# CPUINFO commit from tflite v2.8
envoption(CPUINFO_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/cpuinfo/archive/5916273f79a21551890fd3d56fc5375a78d1598d.tar.gz)
# CPUINFO commit from tflite v2.16.1
envoption(CPUINFO_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/cpuinfo/archive/ef634603954d88d2643d5809011288b890ac126e.tar.gz)
ExternalSource_Download(CPUINFO
DIRNAME CPUINFO
URL ${CPUINFO_URL})
Expand Down
4 changes: 2 additions & 2 deletions infra/cmake/packages/Fp16SourceConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ function(_Fp16Source_import)
nnas_include(OptionTools)

envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
# fp16 commit in xnnpack 8b283aa30a31
envoption(FP16_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FP16/archive/4dfe081cf6bcd15db339cf2680b9281b8451eeb3.tar.gz)
# fp16 commit in xnnpack (tflite v2.16.1)
envoption(FP16_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.tar.gz)
ExternalSource_Download(FP16
DIRNAME FP16
URL ${FP16_URL})
Expand Down
4 changes: 2 additions & 2 deletions infra/cmake/packages/FxdivSourceConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ function(_FxdivSource_import)
nnas_include(OptionTools)

envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
# fxdiv commit in xnnpack 8b283aa30a31
envoption(FXDIV_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FXdiv/archive/f8c5354679ec2597792bc70a9e06eff50c508b9a.tar.gz)
# fxdiv commit in tflite v2.16.1
envoption(FXDIV_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.tar.gz)
ExternalSource_Download(FXDIV
DIRNAME FXDIV
URL ${FXDIV_URL})
Expand Down
4 changes: 2 additions & 2 deletions infra/cmake/packages/PthreadpoolSourceConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ function(_PthreadpoolSource_import)
nnas_include(OptionTools)

envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
# pthreadpool commit in xnnpack 8b283aa30a31
envoption(PTHREADPOOL_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/pthreadpool/archive/029c88620802e1361ccf41d1970bd5b07fd6b7bb.tar.gz)
# pthreadpool commit in xnnpack (tflite v2.16.1)
envoption(PTHREADPOOL_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/pthreadpool/archive/4fe0e1e183925bf8cfa6aae24237e724a96479b8.tar.gz)
ExternalSource_Download(PTHREADPOOL
DIRNAME PTHREADPOOL
URL ${PTHREADPOOL_URL})
Expand Down
52 changes: 52 additions & 0 deletions infra/cmake/packages/XnnpackSource.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
--- a/src/configs/dwconv-config.c
+++ b/src/configs/dwconv-config.c
@@ -688,6 +688,7 @@
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->use_arm_neon) {
+#if defined(XNN_ENABLE_ASSEMBLY) && XNN_ENABLE_ASSEMBLY
if (hardware_config->use_arm_neon_v8) {
qs8_qc8w_dwconv_config[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_qs8_qc8w_dwconv_minmax_fp32_ukernel_3p16c__asm_aarch32_neonv8_mla8_cortex_a35;
qs8_qc8w_dwconv_config[0].init.qs8_qc8w = xnn_init_qs8_qc8w_conv_minmax_fp32_neonv8_params;
@@ -708,6 +709,9 @@
qs8_qc8w_dwconv_config[2].channel_round = 1;
qs8_qc8w_dwconv_config[2].primary_tile = 25;
} else {
+#else
+ {
+#endif
qs8_qc8w_dwconv_config[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_qs8_qc8w_dwconv_minmax_fp32_ukernel_3p16c__neon_mla8_ld128;
qs8_qc8w_dwconv_config[0].init.qs8_qc8w = xnn_init_qs8_qc8w_conv_minmax_fp32_neon_params;
qs8_qc8w_dwconv_config[0].channel_tile = 16;
--- a/src/configs/hardware-config.c
+++ b/src/configs/hardware-config.c
@@ -99,7 +99,11 @@
hardware_config.use_arm_neon = cpuinfo_has_arm_neon();
hardware_config.use_arm_neon_fp16 = cpuinfo_has_arm_neon_fp16();
hardware_config.use_arm_neon_fma = cpuinfo_has_arm_neon_fma();
+#if defined(XNN_ENABLE_ASSEMBLY) && XNN_ENABLE_ASSEMBLY
hardware_config.use_arm_neon_v8 = cpuinfo_has_arm_neon_v8();
+#else
+ hardware_config.use_arm_neon_v8 = false;
+#endif
#endif

#if XNN_ARCH_ARM64
--- a/src/configs/unary-elementwise-config.c
+++ b/src/configs/unary-elementwise-config.c
@@ -1806,11 +1806,15 @@
#if XNN_ARCH_ARM
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
+#if defined(XNN_ENABLE_ASSEMBLY) && XNN_ENABLE_ASSEMBLY
if (hardware_config->use_arm_neon) {
qs16_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs16_qs8_vcvt_ukernel__asm_aarch32_neon_u16;
qs16_to_qs8_cvt_config.init.qs16_qs8_cvt = xnn_init_qs16_qs8_cvt_neon_params;
qs16_to_qs8_cvt_config.element_tile = 16;
} else if (!XNN_PLATFORM_MOBILE) {
+#else
+ if (!XNN_PLATFORM_MOBILE) {
+#endif
qs16_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs16_qs8_vcvt_ukernel__scalar_u4;
qs16_to_qs8_cvt_config.init.qs16_qs8_cvt = xnn_init_qs16_qs8_cvt_scalar_params;
qs16_to_qs8_cvt_config.element_tile = 4;
9 changes: 6 additions & 3 deletions infra/cmake/packages/XnnpackSourceConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@ function(_XnnpackSource_import)
nnas_include(OptionTools)

envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
# xnnpack commit in tflite v2.3
envoption(XNNPACK_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/XNNPACK/archive/8b283aa30a3186c6e640aed520543e9c067132d.tar.gz)
# xnnpack latest commit (2024.05.20)
# xnnpack in tflite v2.16.1 is not stable on armv7l gbs and linux cross build process (assembly microkernel build issue)
# Patch: workaround to resolve build fail by forcing disable using armv8 feature on gbs build and arm linux cross build under gcc 10
envoption(XNNPACK_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/XNNPACK/archive/fcb36699c67201ceff7358df42730809e8f2c9cc.tar.gz)
ExternalSource_Download(XNNPACK
DIRNAME XNNPACK
URL ${XNNPACK_URL})
URL ${XNNPACK_URL}
PATCH ${CMAKE_CURRENT_LIST_DIR}/XnnpackSource.patch)

set(XnnpackSource_DIR ${XNNPACK_SOURCE_DIR} PARENT_SCOPE)
set(XnnpackSource_FOUND TRUE PARENT_SCOPE)
Expand Down
3 changes: 3 additions & 0 deletions infra/nnfw/cmake/options/options_armv7l-linux.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@ option(BUILD_GPU_CL "Build gpu_cl backend" ON)
option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
option(DOWNLOAD_PYBIND11 "Download Pybind11 source" ON)
option(BUILD_PYTHON_BINDING "Build python binding" ON)

# Under linux gcc 10.0, required header for xnnpack arm build is not supported
cmake_dependent_option(BUILD_XNNPACK "Build xnnpack library from the downloaded source" OFF "CXX_COMPILER_VERSION VERSION_LESS 10.0" ON)
4 changes: 2 additions & 2 deletions infra/nnfw/cmake/packages/TensorFlowGpuConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ function(_Build_TfliteGpuDelagate_)
nnas_find_package(Farmhash REQUIRED)
return_unless(Farmhash_FOUND)

nnas_find_package(Fp16Source REQUIRED)
return_unless(Fp16Source_FOUND)
nnfw_find_package(Fp16 REQUIRED)
return_unless(Fp16_FOUND)

nnas_find_package(VulkanSource QUIET)
return_unless(VulkanSource_FOUND)
Expand Down
15 changes: 13 additions & 2 deletions infra/nnfw/cmake/packages/XnnpackConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ function(_Xnnpack_Build)
nnfw_find_package(Fxdiv QUIET)
nnfw_find_package(CpuInfo QUIET)
nnfw_find_package(Pthreadpool QUIET)
nnfw_find_package(Psimd QUIET)
nnfw_find_package(Fp16 QUIET)

# NOTE This line prevents multiple definitions of cpuinfo target
Expand All @@ -19,12 +18,24 @@ function(_Xnnpack_Build)
return()
endif(NOT XnnpackSource_FOUND)

set(XNNPACK_LIBRARY_TYPE "static")
set(XNNPACK_BUILD_TESTS OFF CACHE BOOL "Build XNNPACK unit tests")
set(XNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "Build XNNPACK benchmarks")
set(XNNPACK_USE_SYSTEM_LIBS ON CACHE BOOL "Use system-provided dependency libraries")

# microkernel build is not supported under gcc 9.x and clang
# TODO Enable this
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10.0 OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(XNNPACK_ENABLE_ASSEMBLY OFF CACHE BOOL "Build XNNPACK with assembly micro-kernels")
set(XNNPACK_ENABLE_ARM_I8MM OFF CACHE BOOL "Build XNNPACK with ARM I8MM (8-bit integer matrix multiply accumulate) micro-kernels")
# Set definition: used on patched code
add_compile_definitions("XNN_ENABLE_ASSEMBLY=$<BOOL:${XNNPACK_ENABLE_ASSEMBLY}>")
endif()

# Set -fPIC property to XNNPack and linked libraries
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

add_extdirectory("${XnnpackSource_DIR}" XNNPACK EXCLUDE_FROM_ALL)
set_target_properties(XNNPACK PROPERTIES POSITION_INDEPENDENT_CODE ON)
# Suppress warnings generated by xnnpack
set_target_properties(XNNPACK PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
set(XnnpackSource_DIR ${XnnpackSource_DIR} PARENT_SCOPE)
Expand Down
Binary file modified packaging/CPUINFO.tar.gz
Binary file not shown.
Binary file modified packaging/FP16.tar.gz
Binary file not shown.
Binary file modified packaging/FXDIV.tar.gz
Binary file not shown.
Binary file modified packaging/PTHREADPOOL.tar.gz
Binary file not shown.
Binary file modified packaging/XNNPACK.tar.gz
Binary file not shown.
20 changes: 15 additions & 5 deletions runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ bool ConvolutionLayer::create()
input_channels /* input_channel_stride */, output_channels /* output_channel_stride */,
reinterpret_cast<const float *>(_kernel->buffer()),
reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, output_activation_max,
0, &_kernel_op);
0, nullptr, nullptr, &_kernel_op);
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 Convolution operator"};
Expand All @@ -131,10 +131,20 @@ bool ConvolutionLayer::setup()
uint32_t input_width = _input->getShape().dim(2);
uint32_t input_height = _input->getShape().dim(1);
uint32_t batch_size = _input->getShape().dim(0);
enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
_kernel_op, batch_size, input_height, input_width,
reinterpret_cast<const float *>(_input->buffer()), reinterpret_cast<float *>(_output->buffer()),
_external_context->getThreadPool());
size_t workspace_size = 0;
size_t workspace_alignment = 0;
enum xnn_status status = xnn_reshape_convolution2d_nhwc_f32(
_kernel_op, batch_size, input_height, input_width, &workspace_size, &workspace_alignment,
nullptr, nullptr, _external_context->getThreadPool());
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
}

std::vector<uint8_t> workspace(workspace_size);
status = xnn_setup_convolution2d_nhwc_f32(_kernel_op, workspace.data(),
reinterpret_cast<const float *>(_input->buffer()),
reinterpret_cast<float *>(_output->buffer()));
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 Convolution operator"};
Expand Down
20 changes: 15 additions & 5 deletions runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ bool DepthwiseConvolutionLayer::create()
_multiplier /* group_output_channels */, input_channels /* input_channel_stride */,
output_channels /* output_channel_stride */, reinterpret_cast<const float *>(_kernel->buffer()),
reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, output_activation_max,
XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op);
XNN_FLAG_DEPTHWISE_CONVOLUTION, nullptr, nullptr, &_kernel_op);
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
Expand All @@ -132,10 +132,20 @@ bool DepthwiseConvolutionLayer::setup()
uint32_t input_width = _input->getShape().dim(2);
uint32_t input_height = _input->getShape().dim(1);
uint32_t batch_size = _input->getShape().dim(0);
enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
_kernel_op, batch_size, input_height, input_width,
reinterpret_cast<const float *>(_input->buffer()), reinterpret_cast<float *>(_output->buffer()),
_external_context->getThreadPool());
size_t workspace_size = 0;
size_t workspace_alignment = 0;
enum xnn_status status = xnn_reshape_convolution2d_nhwc_f32(
_kernel_op, batch_size, input_height, input_width, &workspace_size, &workspace_alignment,
nullptr, nullptr, _external_context->getThreadPool());
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
}

std::vector<uint8_t> workspace(workspace_size);
status = xnn_setup_convolution2d_nhwc_f32(_kernel_op, workspace.data(),
reinterpret_cast<const float *>(_input->buffer()),
reinterpret_cast<float *>(_output->buffer()));
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
Expand Down
15 changes: 11 additions & 4 deletions runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ bool FullyConnectedLayer::create()
enum xnn_status status = xnn_create_fully_connected_nc_f32(
input_channels, output_channels, input_channels /* input stride */,
output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
output_activation_max, flag, &_kernel_op);
output_activation_max, flag, nullptr, nullptr, &_kernel_op);
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
Expand All @@ -122,9 +122,16 @@ bool FullyConnectedLayer::setup()
}

uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1);
enum xnn_status status = xnn_setup_fully_connected_nc_f32(
_kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
enum xnn_status status =
xnn_reshape_fully_connected_nc_f32(_kernel_op, batch_size, _external_context->getThreadPool());
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
}

status =
xnn_setup_fully_connected_nc_f32(_kernel_op, reinterpret_cast<const float *>(_input->buffer()),
reinterpret_cast<float *>(_output->buffer()));
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
Expand Down

0 comments on commit eeffa46

Please sign in to comment.