diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 9b7d4a573ddd32..85f5c41ebf5701 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -919,6 +919,10 @@ void GraphOptimizer::FuseFCAndTransposeOnWeights(Graph& graph) { void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph& graph) { const auto& graphNodes = graph.GetNodes(); +// zero points fusing is skipped on ARM platforms because oneDNN is not involved into int8 convolution inference +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) + return; +#endif auto isSuitableConvNode = [](const NodePtr& node) { bool retVal = false; diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_conv.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_conv.cpp index fd5f5ca4a238a3..28be0a66da1159 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_conv.cpp @@ -94,6 +94,9 @@ bool ACLConvolutionExecutor::supports(const ConvConfig& config) { config.descs.at(ARG_WEI)->getPrecision() == ov::element::i8; VERIFY(isQuantized, UNSUPPORTED_SRC_PRECISIONS); + if (config.attrs.withBias) { + VERIFY(config.descs.at(ARG_BIAS)->getPrecision() == ov::element::i32, UNSUPPORTED_BIAS_PRECISIONS); + } VERIFY(config.attrs.postOps.size() <= 1U, UNSUPPORTED_BY_EXECUTOR); return true; diff --git a/src/plugins/intel_cpu/src/nodes/executors/convolution_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/convolution_implementations.cpp index 78a45bd10bb76f..60d993fac68133 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/convolution_implementations.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/convolution_implementations.cpp @@ -59,11 +59,14 @@ static const TypeMapping dnnlConvTypeMapping { {{_f32, _half_float | _i8, _any, _any}, {bypass(), bypass(), use<0>(), use<0>()}}, {{_bf16, _f16, _any, _any}, {bypass(), bypass(), use<0>(), use<0>()}}, {{_f16, _bf16, _any, _any}, {bypass(), bypass(), use<0>(), use<0>()}}, - // quantization configuration + // quantization configuration is not applicable for ARM + // because there is the dedicated low-precision implementation for ARM +#if !defined(OPENVINO_ARCH_ARM64) && !defined(OPENVINO_ARCH_ARM) // int8 conv does not support f16 output and bias {{_u8 | _i8, _i8, _quant |_bf16 | _f32 | _i32 | _dynamic, _quant | _bf16 | _f32 | _i32 | _dynamic}, {bypass(), bypass(), bypass(), bypass()}}, {{_u8 | _i8, _i8, _f16, _u8 | _i8 | _i32 | _bf16 | _f32}, {bypass(), bypass(), just(), bypass()}}, {{_u8 | _i8, _i8, _any, _any}, {bypass(), bypass(), just(), just()}}, +#endif // @todo should we fallback to FPXX instead of _f32? {{_any, _any, _any, _any}, {just(), just(), just(), just()}}, // @todo explicitly cover configuration limitations for oneDNN on ARM @@ -71,8 +74,8 @@ static const TypeMapping dnnlConvTypeMapping { static const TypeMapping aclLowpConvTypeMapping { // {src, wei, bia, dst} pt - {{_u8, _u8 | _i8, _any, _u8}, {bypass(), bypass(), just(), bypass()}}, - {{_i8, _i8, _any, _i8}, {bypass(), bypass(), just(), bypass()}}, + {{_u8, _u8 | _i8, _i32 | _dynamic, _u8}, {bypass(), bypass(), bypass(), bypass()}}, + {{_i8, _i8, _i32 | _dynamic, _i8}, {bypass(), bypass(), bypass(), bypass()}}, }; // clang-format on struct CreateOptimalConfigDefault { @@ -245,13 +248,23 @@ const std::vector>& getImplementations() { AcceptsAnyShape, CreateDnnlDefault{} ) + OV_CPU_INSTANCE_ACL( + "convolution_acl_lowp", ExecutorType::Acl, OperationType::Convolution, + // supports + [](const ConvConfig& config, [[maybe_unused]] const MemoryFormatFilter& memoryFormatFilter) -> bool { + VERIFY(ACLConvolutionExecutor::supports(config), UNSUPPORTED_BY_EXECUTOR); + return true; + }, + CreateOptimalConfigAclLowp{{LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp}}, + AcceptsAnyShape, + CreateDefault{} + ) OV_CPU_INSTANCE_ACL( "convolution_dnnl_nspc_nspc_unconditional_acl", ExecutorType::Dnnl, OperationType::Convolution, // supports [](const ConvConfig& config, const MemoryFormatFilter& memoryFormatFilter) -> bool { VERIFY(MatchesMemoryFormatFilter(config.descs, LayoutConfig{LayoutType::nspc, LayoutType::ncsp, LayoutType::nspc, LayoutType::nspc}, memoryFormatFilter, dnnlConvolutionMappingNotation), MEMORY_FORMAT_MISMATCH); - VERIFY(!isQuantized(config), UNSUPPORTED_SRC_PRECISIONS); return true; }, CreateOptimalConfigDefault{{LayoutType::nspc, LayoutType::ncsp, LayoutType::nspc, LayoutType::nspc}}, @@ -274,17 +287,6 @@ const std::vector>& getImplementations() { AcceptsAnyShape, CreateDnnlDefault{} ) - OV_CPU_INSTANCE_ACL( - "convolution_acl_lowp", ExecutorType::Acl, OperationType::Convolution, - // supports - [](const ConvConfig& config, [[maybe_unused]] const MemoryFormatFilter& memoryFormatFilter) -> bool { - VERIFY(ACLConvolutionExecutor::supports(config), UNSUPPORTED_BY_EXECUTOR); - return true; - }, - CreateOptimalConfigAclLowp{{LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp}}, - AcceptsAnyShape, - CreateDefault{} - ) }; return convolutionImplementations; diff --git a/src/plugins/intel_cpu/src/nodes/executors/debug_messages.hpp b/src/plugins/intel_cpu/src/nodes/executors/debug_messages.hpp index 578c176a60dd8a..c813dc318ba26f 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/debug_messages.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/debug_messages.hpp @@ -11,6 +11,7 @@ #define UNSUPPORTED_TYPE_OF_POSTOPS " the type of post ops is not supported" #define UNSUPPORTED_SRC_PRECISIONS " unsupported src precisions" #define UNSUPPORTED_WEI_PRECISIONS " unsupported wei precisions" +#define UNSUPPORTED_BIAS_PRECISIONS " unsupported bias precisions" #define UNSUPPORTED_DST_PRECISIONS " unsupported dst precisions" #define UNSUPPORTED_ISA " unsupported isa" #define UNSUPPORTED_SRC_RANK " unsupported src rank"