diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 4cb2dc9058551f..82723d0fa543ab 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -980,6 +980,31 @@ void Convolution::createDescriptor(const std::vector& inputDesc, memory::data_type bdt = outDnnlDesc.get_data_type(); #else memory::data_type bdt = memory::data_type::f32; + /* brdgmm_dw_conv has more perf gain on bf16/fp16 inference. + brdgmm_dw_conv supports only bia_type the same as src_type or dst_type. + dw convolution support in onednn 3.5. + BF16: + kernel type | brgdconv | jit_uni_dw_convolution_fwd_t + support impl type | native bf16 ISA without AMX | avx512_core_bf16 or avx512_core + bias dt | oneof(src,dest) | oneof(src, dest, f32) + FP16: + kernel type | brgdconv | brgemm_convolution_fwd_t + impl type | native FP16 ISA without AMX | native FP16 ISA + bias type | oneof(src,dest) | oneof(src, dest, f32) + @todo: this bias type changes may have minor accuracy impact on some models, so when upstream ONEDNN extend this + kind of matrix support (ticket MFDNN-12936) we can continue use bdt = memory::data_type::f32 here; + */ + auto out_dt = outDnnlDesc.get_data_type(); + if (!canBeExecutedInInt8() && isDepthWise()) { + bool isF16BiasSupported = (out_dt == memory::data_type::f16) && hasHardwareSupport(ov::element::f16); + bool isBF16BiasSupported = (out_dt == memory::data_type::bf16) && + (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16) || + dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)); + + if (isF16BiasSupported || isBF16BiasSupported) { + bdt = out_dt; + } + } #endif biasDnnlDesc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(expectedBiasDims), bdt, memory::format_tag::any);