@@ -59,20 +59,23 @@ static const TypeMapping dnnlConvTypeMapping {
5959 {{_f32, _half_float | _i8, _any, _any}, {bypass (), bypass (), use<0 >(), use<0 >()}},
6060 {{_bf16, _f16, _any, _any}, {bypass (), bypass (), use<0 >(), use<0 >()}},
6161 {{_f16, _bf16, _any, _any}, {bypass (), bypass (), use<0 >(), use<0 >()}},
62- // quantization configuration
62+ // quantization configuration is not applicable for ARM
63+ // because there is the dedicated low-precision implementation for ARM
64+ #if !defined(OPENVINO_ARCH_ARM64) && !defined(OPENVINO_ARCH_ARM)
6365 // int8 conv does not support f16 output and bias
6466 {{_u8 | _i8, _i8, _quant |_bf16 | _f32 | _i32 | _dynamic, _quant | _bf16 | _f32 | _i32 | _dynamic}, {bypass (), bypass (), bypass (), bypass ()}},
6567 {{_u8 | _i8, _i8, _f16, _u8 | _i8 | _i32 | _bf16 | _f32}, {bypass (), bypass (), just<f32 >(), bypass ()}},
6668 {{_u8 | _i8, _i8, _any, _any}, {bypass (), bypass (), just<f32 >(), just<f32 >()}},
69+ #endif
6770 // @todo should we fallback to FPXX instead of _f32?
6871 {{_any, _any, _any, _any}, {just<f32 >(), just<f32 >(), just<f32 >(), just<f32 >()}},
6972 // @todo explicitly cover configuration limitations for oneDNN on ARM
7073};
7174
7275static const TypeMapping aclLowpConvTypeMapping {
7376 // {src, wei, bia, dst} pt<src, wei, bias, dst>
74- {{_u8, _u8 | _i8, _any , _u8}, {bypass (), bypass (), just< i32 > (), bypass ()}},
75- {{_i8, _i8, _any , _i8}, {bypass (), bypass (), just< i32 > (), bypass ()}},
77+ {{_u8, _u8 | _i8, _i32 | _dynamic , _u8}, {bypass (), bypass (), bypass (), bypass ()}},
78+ {{_i8, _i8, _i32 | _dynamic , _i8}, {bypass (), bypass (), bypass (), bypass ()}},
7679};
7780// clang-format on
7881struct CreateOptimalConfigDefault {
@@ -245,13 +248,23 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
245248 AcceptsAnyShape<ConvAttrs>,
246249 CreateDnnlDefault<DnnlConvolutionPrimitive, ConvAttrs>{}
247250 )
251+ OV_CPU_INSTANCE_ACL (
252+ " convolution_acl_lowp" , ExecutorType::Acl, OperationType::Convolution,
253+ // supports
254+ [](const ConvConfig& config, [[maybe_unused]] const MemoryFormatFilter& memoryFormatFilter) -> bool {
255+ VERIFY (ACLConvolutionExecutor::supports (config), UNSUPPORTED_BY_EXECUTOR);
256+ return true ;
257+ },
258+ CreateOptimalConfigAclLowp{{LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp}},
259+ AcceptsAnyShape<ConvAttrs>,
260+ CreateDefault<ACLConvolutionExecutor, ConvAttrs>{}
261+ )
248262 OV_CPU_INSTANCE_ACL (
249263 " convolution_dnnl_nspc_nspc_unconditional_acl" , ExecutorType::Dnnl, OperationType::Convolution,
250264 // supports
251265 [](const ConvConfig& config, const MemoryFormatFilter& memoryFormatFilter) -> bool {
252266 VERIFY (MatchesMemoryFormatFilter (config.descs , LayoutConfig{LayoutType::nspc, LayoutType::ncsp, LayoutType::nspc, LayoutType::nspc},
253267 memoryFormatFilter, dnnlConvolutionMappingNotation), MEMORY_FORMAT_MISMATCH);
254- VERIFY (!isQuantized (config), UNSUPPORTED_SRC_PRECISIONS);
255268 return true ;
256269 },
257270 CreateOptimalConfigDefault{{LayoutType::nspc, LayoutType::ncsp, LayoutType::nspc, LayoutType::nspc}},
@@ -274,17 +287,6 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
274287 AcceptsAnyShape<ConvAttrs>,
275288 CreateDnnlDefault<DnnlConvolutionPrimitive, ConvAttrs>{}
276289 )
277- OV_CPU_INSTANCE_ACL (
278- " convolution_acl_lowp" , ExecutorType::Acl, OperationType::Convolution,
279- // supports
280- [](const ConvConfig& config, [[maybe_unused]] const MemoryFormatFilter& memoryFormatFilter) -> bool {
281- VERIFY (ACLConvolutionExecutor::supports (config), UNSUPPORTED_BY_EXECUTOR);
282- return true ;
283- },
284- CreateOptimalConfigAclLowp{{LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp}},
285- AcceptsAnyShape<ConvAttrs>,
286- CreateDefault<ACLConvolutionExecutor, ConvAttrs>{}
287- )
288290 };
289291
290292 return convolutionImplementations;
0 commit comments