Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/plugins/intel_cpu/src/graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,10 @@ void GraphOptimizer::FuseFCAndTransposeOnWeights(Graph& graph) {

void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph& graph) {
const auto& graphNodes = graph.GetNodes();
// zero points fusing is skipped on ARM platforms because oneDNN is not involved into int8 convolution inference
#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
return;
#endif

auto isSuitableConvNode = [](const NodePtr& node) {
bool retVal = false;
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ bool ACLConvolutionExecutor::supports(const ConvConfig& config) {
config.descs.at(ARG_WEI)->getPrecision() == ov::element::i8;

VERIFY(isQuantized, UNSUPPORTED_SRC_PRECISIONS);
if (config.attrs.withBias) {
VERIFY(config.descs.at(ARG_BIAS)->getPrecision() == ov::element::i32, UNSUPPORTED_BIAS_PRECISIONS);
}
VERIFY(config.attrs.postOps.size() <= 1U, UNSUPPORTED_BY_EXECUTOR);

return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,23 @@ static const TypeMapping dnnlConvTypeMapping {
{{_f32, _half_float | _i8, _any, _any}, {bypass(), bypass(), use<0>(), use<0>()}},
{{_bf16, _f16, _any, _any}, {bypass(), bypass(), use<0>(), use<0>()}},
{{_f16, _bf16, _any, _any}, {bypass(), bypass(), use<0>(), use<0>()}},
// quantization configuration
// quantization configuration is not applicable for ARM
// because there is the dedicated low-precision implementation for ARM
#if !defined(OPENVINO_ARCH_ARM64) && !defined(OPENVINO_ARCH_ARM)
// int8 conv does not support f16 output and bias
{{_u8 | _i8, _i8, _quant |_bf16 | _f32 | _i32 | _dynamic, _quant | _bf16 | _f32 | _i32 | _dynamic}, {bypass(), bypass(), bypass(), bypass()}},
{{_u8 | _i8, _i8, _f16, _u8 | _i8 | _i32 | _bf16 | _f32}, {bypass(), bypass(), just<f32>(), bypass()}},
{{_u8 | _i8, _i8, _any, _any}, {bypass(), bypass(), just<f32>(), just<f32>()}},
#endif
// @todo should we fallback to FPXX instead of _f32?
{{_any, _any, _any, _any}, {just<f32>(), just<f32>(), just<f32>(), just<f32>()}},
// @todo explicitly cover configuration limitations for oneDNN on ARM
};

static const TypeMapping aclLowpConvTypeMapping {
// {src, wei, bia, dst} pt<src, wei, bias, dst>
{{_u8, _u8 | _i8, _any, _u8}, {bypass(), bypass(), just<i32>(), bypass()}},
{{_i8, _i8, _any, _i8}, {bypass(), bypass(), just<i32>(), bypass()}},
{{_u8, _u8 | _i8, _i32 | _dynamic, _u8}, {bypass(), bypass(), bypass(), bypass()}},
{{_i8, _i8, _i32 | _dynamic, _i8}, {bypass(), bypass(), bypass(), bypass()}},
};
// clang-format on
struct CreateOptimalConfigDefault {
Expand Down Expand Up @@ -245,13 +248,23 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
AcceptsAnyShape<ConvAttrs>,
CreateDnnlDefault<DnnlConvolutionPrimitive, ConvAttrs>{}
)
OV_CPU_INSTANCE_ACL(
"convolution_acl_lowp", ExecutorType::Acl, OperationType::Convolution,
// supports
[](const ConvConfig& config, [[maybe_unused]] const MemoryFormatFilter& memoryFormatFilter) -> bool {
VERIFY(ACLConvolutionExecutor::supports(config), UNSUPPORTED_BY_EXECUTOR);
return true;
},
CreateOptimalConfigAclLowp{{LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp}},
AcceptsAnyShape<ConvAttrs>,
CreateDefault<ACLConvolutionExecutor, ConvAttrs>{}
)
OV_CPU_INSTANCE_ACL(
"convolution_dnnl_nspc_nspc_unconditional_acl", ExecutorType::Dnnl, OperationType::Convolution,
// supports
[](const ConvConfig& config, const MemoryFormatFilter& memoryFormatFilter) -> bool {
VERIFY(MatchesMemoryFormatFilter(config.descs, LayoutConfig{LayoutType::nspc, LayoutType::ncsp, LayoutType::nspc, LayoutType::nspc},
memoryFormatFilter, dnnlConvolutionMappingNotation), MEMORY_FORMAT_MISMATCH);
VERIFY(!isQuantized(config), UNSUPPORTED_SRC_PRECISIONS);
return true;
},
CreateOptimalConfigDefault{{LayoutType::nspc, LayoutType::ncsp, LayoutType::nspc, LayoutType::nspc}},
Expand All @@ -274,17 +287,6 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
AcceptsAnyShape<ConvAttrs>,
CreateDnnlDefault<DnnlConvolutionPrimitive, ConvAttrs>{}
)
OV_CPU_INSTANCE_ACL(
"convolution_acl_lowp", ExecutorType::Acl, OperationType::Convolution,
// supports
[](const ConvConfig& config, [[maybe_unused]] const MemoryFormatFilter& memoryFormatFilter) -> bool {
VERIFY(ACLConvolutionExecutor::supports(config), UNSUPPORTED_BY_EXECUTOR);
return true;
},
CreateOptimalConfigAclLowp{{LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp, LayoutType::ncsp}},
AcceptsAnyShape<ConvAttrs>,
CreateDefault<ACLConvolutionExecutor, ConvAttrs>{}
)
};

return convolutionImplementations;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#define UNSUPPORTED_TYPE_OF_POSTOPS " the type of post ops is not supported"
#define UNSUPPORTED_SRC_PRECISIONS " unsupported src precisions"
#define UNSUPPORTED_WEI_PRECISIONS " unsupported wei precisions"
#define UNSUPPORTED_BIAS_PRECISIONS " unsupported bias precisions"
#define UNSUPPORTED_DST_PRECISIONS " unsupported dst precisions"
#define UNSUPPORTED_ISA " unsupported isa"
#define UNSUPPORTED_SRC_RANK " unsupported src rank"
Expand Down
Loading