Skip to content

Commit d818b9c

Browse files
authored
[CPU][ARM] ACL int8 Convolution executor shouldn't support non-i32 bias (#32638)
### Details: - Before the fix ACL int8 convolution executor was chosen for fp32 bias case. Previous type mapping forces fp32 to int32 conversion, which led to accuracy degradation - Type mapping has been fixed to accept i32 bias only - If bias is not i32 then such case is handled by dnnl executor. To do that the order of ARM executors has been changed: int8 executor first, default dnnl executor next. ### Tickets: - CVS-175513
1 parent d8266a9 commit d818b9c

File tree

4 files changed

+25
-15
lines changed

4 files changed

+25
-15
lines changed

src/plugins/intel_cpu/src/graph_optimizer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,10 @@ void GraphOptimizer::FuseFCAndTransposeOnWeights(Graph& graph) {
919919

920920
void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph& graph) {
921921
const auto& graphNodes = graph.GetNodes();
922+
// zero points fusing is skipped on ARM platforms because oneDNN is not involved into int8 convolution inference
923+
#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
924+
return;
925+
#endif
922926

923927
auto isSuitableConvNode = [](const NodePtr& node) {
924928
bool retVal = false;

src/plugins/intel_cpu/src/nodes/executors/acl/acl_conv.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ bool ACLConvolutionExecutor::supports(const ConvConfig& config) {
9494
config.descs.at(ARG_WEI)->getPrecision() == ov::element::i8;
9595

9696
VERIFY(isQuantized, UNSUPPORTED_SRC_PRECISIONS);
97+
if (config.attrs.withBias) {
98+
VERIFY(config.descs.at(ARG_BIAS)->getPrecision() == ov::element::i32, UNSUPPORTED_BIAS_PRECISIONS);
99+
}
97100
VERIFY(config.attrs.postOps.size() <= 1U, UNSUPPORTED_BY_EXECUTOR);
98101

99102
return true;

src/plugins/intel_cpu/src/nodes/executors/convolution_implementations.cpp

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -59,20 +59,23 @@ static const TypeMapping dnnlConvTypeMapping {
5959
{{_f32, _half_float | _i8, _any, _any}, {bypass(), bypass(), use<0>(), use<0>()}},
6060
{{_bf16, _f16, _any, _any}, {bypass(), bypass(), use<0>(), use<0>()}},
6161
{{_f16, _bf16, _any, _any}, {bypass(), bypass(), use<0>(), use<0>()}},
62-
// quantization configuration
62+
// quantization configuration is not applicable for ARM
63+
// because there is the dedicated low-precision implementation for ARM
64+
#if !defined(OPENVINO_ARCH_ARM64) && !defined(OPENVINO_ARCH_ARM)
6365
// int8 conv does not support f16 output and bias
6466
{{_u8 | _i8, _i8, _quant |_bf16 | _f32 | _i32 | _dynamic, _quant | _bf16 | _f32 | _i32 | _dynamic}, {bypass(), bypass(), bypass(), bypass()}},
6567
{{_u8 | _i8, _i8, _f16, _u8 | _i8 | _i32 | _bf16 | _f32}, {bypass(), bypass(), just<f32>(), bypass()}},
6668
{{_u8 | _i8, _i8, _any, _any}, {bypass(), bypass(), just<f32>(), just<f32>()}},
69+
#endif
6770
// @todo should we fallback to FPXX instead of _f32?
6871
{{_any, _any, _any, _any}, {just<f32>(), just<f32>(), just<f32>(), just<f32>()}},
6972
// @todo explicitly cover configuration limitations for oneDNN on ARM
7073
};
7174

7275
static const TypeMapping aclLowpConvTypeMapping {
7376
// {src, wei, bia, dst} pt<src, wei, bias, dst>
74-
{{_u8, _u8 | _i8, _any, _u8}, {bypass(), bypass(), just<i32>(), bypass()}},
75-
{{_i8, _i8, _any, _i8}, {bypass(), bypass(), just<i32>(), bypass()}},
77+
{{_u8, _u8 | _i8, _i32 | _dynamic, _u8}, {bypass(), bypass(), bypass(), bypass()}},
78+
{{_i8, _i8, _i32 | _dynamic, _i8}, {bypass(), bypass(), bypass(), bypass()}},
7679
};
7780
// clang-format on
7881
struct CreateOptimalConfigDefault {
@@ -245,13 +248,23 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
245248
AcceptsAnyShape<ConvAttrs>,
246249
CreateDnnlDefault<DnnlConvolutionPrimitive, ConvAttrs>{}
247250
)
251+
OV_CPU_INSTANCE_ACL(
252+
"convolution_acl_lowp", ExecutorType::Acl, OperationType::Convolution,
253+
// supports
254+
[](const ConvConfig& config, [[maybe_unused]] const MemoryFormatFilter& memoryFormatFilter) -> bool {
255+
VERIFY(ACLConvolutionExecutor::supports(config), UNSUPPORTED_BY_EXECUTOR);
256+
return true;
257+
},
258+
CreateOptimalConfigAclLowp{{LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp}},
259+
AcceptsAnyShape<ConvAttrs>,
260+
CreateDefault<ACLConvolutionExecutor, ConvAttrs>{}
261+
)
248262
OV_CPU_INSTANCE_ACL(
249263
"convolution_dnnl_nspc_nspc_unconditional_acl", ExecutorType::Dnnl, OperationType::Convolution,
250264
// supports
251265
[](const ConvConfig& config, const MemoryFormatFilter& memoryFormatFilter) -> bool {
252266
VERIFY(MatchesMemoryFormatFilter(config.descs, LayoutConfig{LayoutType::nspc, LayoutType::ncsp, LayoutType::nspc, LayoutType::nspc},
253267
memoryFormatFilter, dnnlConvolutionMappingNotation), MEMORY_FORMAT_MISMATCH);
254-
VERIFY(!isQuantized(config), UNSUPPORTED_SRC_PRECISIONS);
255268
return true;
256269
},
257270
CreateOptimalConfigDefault{{LayoutType::nspc, LayoutType::ncsp, LayoutType::nspc, LayoutType::nspc}},
@@ -274,17 +287,6 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
274287
AcceptsAnyShape<ConvAttrs>,
275288
CreateDnnlDefault<DnnlConvolutionPrimitive, ConvAttrs>{}
276289
)
277-
OV_CPU_INSTANCE_ACL(
278-
"convolution_acl_lowp", ExecutorType::Acl, OperationType::Convolution,
279-
// supports
280-
[](const ConvConfig& config, [[maybe_unused]] const MemoryFormatFilter& memoryFormatFilter) -> bool {
281-
VERIFY(ACLConvolutionExecutor::supports(config), UNSUPPORTED_BY_EXECUTOR);
282-
return true;
283-
},
284-
CreateOptimalConfigAclLowp{{LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp}},
285-
AcceptsAnyShape<ConvAttrs>,
286-
CreateDefault<ACLConvolutionExecutor, ConvAttrs>{}
287-
)
288290
};
289291

290292
return convolutionImplementations;

src/plugins/intel_cpu/src/nodes/executors/debug_messages.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#define UNSUPPORTED_TYPE_OF_POSTOPS " the type of post ops is not supported"
1212
#define UNSUPPORTED_SRC_PRECISIONS " unsupported src precisions"
1313
#define UNSUPPORTED_WEI_PRECISIONS " unsupported wei precisions"
14+
#define UNSUPPORTED_BIAS_PRECISIONS " unsupported bias precisions"
1415
#define UNSUPPORTED_DST_PRECISIONS " unsupported dst precisions"
1516
#define UNSUPPORTED_ISA " unsupported isa"
1617
#define UNSUPPORTED_SRC_RANK " unsupported src rank"

0 commit comments

Comments
 (0)