From 45b719be022cc7478554e64fe41997e653a633eb Mon Sep 17 00:00:00 2001 From: Wanglei Shen Date: Wed, 8 Jan 2025 22:40:48 +0800 Subject: [PATCH] identify arm cpu isa and update model prefer threads for tput (#28207) ### Details: - *identify arm cpu isa and update model prefer threads for tput* ### Tickets: - *CVS-148164* --- .../intel_cpu/src/cpu_streams_calculation.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index 1f20f6f2fa183b..d5eb594a519248 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -9,6 +9,9 @@ #include #include +#if (defined(OPENVINO_ARCH_ARM64) && defined(__linux__)) +# include "cpu/aarch64/cpu_isa_traits.hpp" +#endif #include "cpu_map_scheduling.hpp" #include "graph.h" #include "openvino/op/fake_quantize.hpp" @@ -552,6 +555,12 @@ int get_model_prefer_threads(const int num_streams, const int sockets = get_num_sockets(); auto model_prefer = 0; if (-1 == config.modelPreferThreads) { +#if (defined(OPENVINO_ARCH_ARM64) && defined(__linux__)) + config.modelPreferThreads = 8; + if (dnnl::impl::cpu::aarch64::mayiuse(dnnl::impl::cpu::aarch64::cpu_isa_t::sve_128)) { + config.modelPreferThreads = 16; + } +#else const auto isa = dnnl::get_effective_cpu_isa(); float isaSpecificThreshold = 1.0f; switch (isa) { @@ -579,7 +588,7 @@ int get_model_prefer_threads(const int num_streams, ov::MemBandwidthPressure networkToleranceForLowCache = ov::mem_bandwidth_pressure_tolerance(model, L2_cache_size, memThresholdAssumeLimitedForISA); -#if ((defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__linux__)) +# if (defined(OPENVINO_ARCH_ARM) && defined(__linux__)) config.modelPreferThreads = 4; if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) { if (networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) { @@ -590,7 +599,7 @@ int get_model_prefer_threads(const int num_streams, (networkToleranceForLowCache.ratio_mem_limited_gemms > ov::MemBandwidthPressure::LIMITED))) { config.modelPreferThreads = 8; } -#elif ((defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__)) +# elif ((defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__)) config.modelPreferThreads = 1; if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) { if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) || @@ -612,7 +621,7 @@ int get_model_prefer_threads(const int num_streams, networkToleranceForLowCache.ratio_compute_convs > ov::MemBandwidthPressure::LIMITED) { config.modelPreferThreads = 2; } -#else +# else config.modelPreferThreads = 0; if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) { if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) || @@ -631,6 +640,7 @@ int get_model_prefer_threads(const int num_streams, (proc_type_table[0][HYPER_THREADING_PROC] == proc_type_table[0][MAIN_CORE_PROC])) { config.modelPreferThreads = 2; } +# endif #endif }