diff --git a/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst b/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst index 8ab8a43031ca39..5a706061777594 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst @@ -349,7 +349,7 @@ following usage message: [-api {sync,async}] [-nireq NUMBER_INFER_REQUESTS] [-nstreams NUMBER_STREAMS] [-inference_only [INFERENCE_ONLY]] [-infer_precision INFER_PRECISION] [-ip {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}] [-op {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}] [-iop INPUT_OUTPUT_PRECISION] [--mean_values [R,G,B]] [--scale_values [R,G,B]] - [-nthreads NUMBER_THREADS] [-pin {YES,NO,NUMA,HYBRID_AWARE}] [-latency_percentile LATENCY_PERCENTILE] + [-nthreads NUMBER_THREADS] [-pin {YES,NO}] [-latency_percentile LATENCY_PERCENTILE] [-report_type {no_counters,average_counters,detailed_counters}] [-report_folder REPORT_FOLDER] [-pc [PERF_COUNTS]] [-pcsort {no_sort,sort,simple_sort}] [-pcseq [PCSEQ]] [-exec_graph_path EXEC_GRAPH_PATH] [-dump_config DUMP_CONFIG] [-load_config LOAD_CONFIG] @@ -462,10 +462,8 @@ following usage message: -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS Number of threads to use for inference on the CPU (including HETERO and MULTI cases). - -pin {YES,NO,NUMA,HYBRID_AWARE}, --infer_threads_pinning {YES,NO,NUMA,HYBRID_AWARE} - Optional. Enable threads->cores ('YES' which is OpenVINO runtime's default for conventional CPUs), threads->(NUMA)nodes ('NUMA'), - threads->appropriate core types ('HYBRID_AWARE', which is OpenVINO runtime's default for Hybrid CPUs) or completely disable ('NO') CPU threads - pinning for CPU-involved inference. + -pin {YES,NO}, --infer_threads_pinning {YES,NO} + Optional. Enable threads->cores pinning for CPU-involved inference. Statistics dumping options: @@ -577,11 +575,7 @@ following usage message: Device-specific performance options: -nthreads Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases). - -pin ("YES"|"CORE") / "HYBRID_AWARE" / ("NO"|"NONE") / "NUMA" Optional. Explicit inference threads binding options (leave empty to let the OpenVINO make a choice): - enabling threads->cores pinning("YES", which is already default for any conventional CPU), - letting the runtime to decide on the threads->different core types("HYBRID_AWARE", which is default on the hybrid CPUs) - threads->(NUMA)nodes("NUMA") or - completely disable("NO") CPU inference threads pinning + -pin "YES" / "NO" Optional. Explicit threads->cores pinning for CPU inference tasks (leave empty to let the OpenVINO make a choice). Statistics dumping options: -latency_percentile Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median). diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst index f1a914e6b9dac3..30d376e18a608a 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst @@ -357,7 +357,6 @@ All parameters must be set before calling ``ov::Core::compile_model()`` in order - ``ov::hint::enable_hyper_threading`` - ``ov::hint::enable_cpu_pinning`` - ``ov::num_streams`` -- ``ov::affinity`` - ``ov::inference_num_threads`` - ``ov::cache_dir`` - ``ov::intel_cpu::denormals_optimization`` @@ -373,8 +372,6 @@ Read-only properties - ``ov::device::full_name`` - ``ov::device::capabilities`` -.. note:: - ``ov::affinity`` is replaced by ``ov::hint::enable_cpu_pinning``. As such, it is deprecated in the 2024.0 release and will be removed in the 2025 release. External Dependencies ########################################################### diff --git a/samples/cpp/benchmark_app/benchmark_app.hpp b/samples/cpp/benchmark_app/benchmark_app.hpp index cf38ff6708ad29..ab7fddb4d7d9a9 100644 --- a/samples/cpp/benchmark_app/benchmark_app.hpp +++ b/samples/cpp/benchmark_app/benchmark_app.hpp @@ -179,13 +179,8 @@ static const char infer_num_threads_message[] = "Optional. Number of threads to "(including HETERO and MULTI cases)."; // @brief message for CPU threads pinning option -static const char infer_threads_pinning_message[] = - "Optional. Explicit inference threads binding options (leave empty to let the OpenVINO make a choice):\n" - "\t\t\t\tenabling threads->cores pinning(\"YES\", which is already default for any conventional CPU), \n" - "\t\t\t\tletting the runtime to decide on the threads->different core types(\"HYBRID_AWARE\", which is default on " - "the hybrid CPUs) \n" - "\t\t\t\tthreads->(NUMA)nodes(\"NUMA\") or \n" - "\t\t\t\tcompletely disable(\"NO\") CPU inference threads pinning"; +static const char infer_threads_pinning_message[] = "Optional. Explicit threads->cores pinning for CPU inference tasks " + "(leave empty to let the OpenVINO make a choice)."; // @brief message for switching memory allocation type option static const char use_device_mem_message[] = @@ -426,8 +421,7 @@ static void show_usage() { std::cout << std::endl; std::cout << "Device-specific performance options:" << std::endl; std::cout << " -nthreads " << infer_num_threads_message << std::endl; - std::cout << " -pin (\"YES\"|\"CORE\") / \"HYBRID_AWARE\" / (\"NO\"|\"NONE\") / \"NUMA\" " - << infer_threads_pinning_message << std::endl; + std::cout << " -pin \"YES\" / \"NO\" " << infer_threads_pinning_message << std::endl; std::cout << " -use_device_mem " << use_device_mem_message << std::endl; std::cout << std::endl; std::cout << "Statistics dumping options:" << std::endl; diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp index 1f1b89c2427e67..0d025524f82b8e 100644 --- a/samples/cpp/benchmark_app/main.cpp +++ b/samples/cpp/benchmark_app/main.cpp @@ -490,21 +490,11 @@ int main(int argc, char* argv[]) { } }; - auto fix_pin_option = [](const std::string& str) -> std::string { - if (str == "NO") - return "NONE"; - else if (str == "YES") - return "CORE"; - else - return str; - }; - auto set_nthreads_pin = [&](const std::string& str) { - OPENVINO_SUPPRESS_DEPRECATED_START - auto property_name = str == "nthreads" ? ov::inference_num_threads.name() : ov::affinity.name(); + auto property_name = + str == "nthreads" ? ov::inference_num_threads.name() : ov::hint::enable_cpu_pinning.name(); auto property = str == "nthreads" ? ov::inference_num_threads(int(FLAGS_nthreads)) - : ov::affinity(fix_pin_option(FLAGS_pin)); - OPENVINO_SUPPRESS_DEPRECATED_END + : ov::hint::enable_cpu_pinning(FLAGS_pin); if (supported(property_name) || device_name == "AUTO") { // create nthreads/pin primary property for HW device or AUTO if -d is AUTO directly. device_config[property.first] = property.second; diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h index 356a46ee74d1ef..502a391ee9abfb 100644 --- a/src/bindings/c/include/openvino/c/ov_property.h +++ b/src/bindings/c/include/openvino/c/ov_property.h @@ -123,13 +123,6 @@ ov_property_key_cache_encryption_callbacks; OPENVINO_C_VAR(const char*) ov_property_key_num_streams; -/** - * @brief Read-write property to set/get the name for setting CPU affinity per thread option. - * @ingroup ov_property_c_api - */ -OPENVINO_C_VAR(const char*) -ov_property_key_affinity; - /** * @brief Read-write property to set/get the maximum number of threads that can be used * for inference tasks. diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp index 3786499ab79677..61be74ee265599 100644 --- a/src/bindings/c/src/ov_property.cpp +++ b/src/bindings/c/src/ov_property.cpp @@ -21,7 +21,6 @@ const char* ov_property_key_max_batch_size = "MAX_BATCH_SIZE"; const char* ov_property_key_cache_dir = "CACHE_DIR"; const char* ov_property_key_cache_mode = "CACHE_MODE"; const char* ov_property_key_num_streams = "NUM_STREAMS"; -const char* ov_property_key_affinity = "AFFINITY"; const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS"; const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT"; const char* ov_property_key_hint_enable_cpu_pinning = "ENABLE_CPU_PINNING"; diff --git a/src/bindings/js/node/src/helper.cpp b/src/bindings/js/node/src/helper.cpp index 09161deb2bc30e..d5ef4290c6bca5 100644 --- a/src/bindings/js/node/src/helper.cpp +++ b/src/bindings/js/node/src/helper.cpp @@ -414,10 +414,6 @@ Napi::Value any_to_js(const Napi::CallbackInfo& info, ov::Any value) { else if (value.is()) { return Napi::Number::New(info.Env(), value.as()); } - // Check for ov::Affinity - else if (value.is()) { - return Napi::String::New(info.Env(), value.as()); - } // Check for ov::element::Type else if (value.is()) { return Napi::String::New(info.Env(), value.as()); diff --git a/src/bindings/python/src/openvino/properties/__init__.py b/src/bindings/python/src/openvino/properties/__init__.py index 371660bcd9f214..8553ae56bfb3a6 100644 --- a/src/bindings/python/src/openvino/properties/__init__.py +++ b/src/bindings/python/src/openvino/properties/__init__.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 # Enums -from openvino._pyopenvino.properties import Affinity from openvino._pyopenvino.properties import CacheMode from openvino._pyopenvino.properties import WorkloadType diff --git a/src/bindings/python/src/openvino/runtime/properties/__init__.py b/src/bindings/python/src/openvino/runtime/properties/__init__.py index 3269ea42e32ac2..511c019be8d969 100644 --- a/src/bindings/python/src/openvino/runtime/properties/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/__init__.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 # Enums -from openvino._pyopenvino.properties import Affinity from openvino._pyopenvino.properties import CacheMode from openvino._pyopenvino.properties import WorkloadType @@ -15,7 +14,6 @@ from openvino._pyopenvino.properties import num_streams from openvino._pyopenvino.properties import inference_num_threads from openvino._pyopenvino.properties import compilation_num_threads -from openvino._pyopenvino.properties import affinity from openvino._pyopenvino.properties import force_tbb_terminate from openvino._pyopenvino.properties import enable_mmap from openvino._pyopenvino.properties import supported_properties diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 564e5f69f5ee14..d0e3ddb21644e7 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -14,13 +14,6 @@ void regmodule_properties(py::module m) { // Top submodule py::module m_properties = m.def_submodule("properties", "openvino.properties submodule"); - // Submodule properties - enums - py::enum_(m_properties, "Affinity", py::arithmetic()) - .value("NONE", ov::Affinity::NONE) - .value("CORE", ov::Affinity::CORE) - .value("NUMA", ov::Affinity::NUMA) - .value("HYBRID_AWARE", ov::Affinity::HYBRID_AWARE); - py::enum_(m_properties, "WorkloadType", py::arithmetic()) .value("DEFAULT", ov::WorkloadType::DEFAULT) .value("EFFICIENT", ov::WorkloadType::EFFICIENT); @@ -38,9 +31,6 @@ void regmodule_properties(py::module m) { wrap_property_RW(m_properties, ov::num_streams, "num_streams"); wrap_property_RW(m_properties, ov::inference_num_threads, "inference_num_threads"); wrap_property_RW(m_properties, ov::compilation_num_threads, "compilation_num_threads"); - OPENVINO_SUPPRESS_DEPRECATED_START - wrap_property_RW(m_properties, ov::affinity, "affinity"); - OPENVINO_SUPPRESS_DEPRECATED_END wrap_property_RW(m_properties, ov::force_tbb_terminate, "force_tbb_terminate"); wrap_property_RW(m_properties, ov::enable_mmap, "enable_mmap"); wrap_property_RW(m_properties, ov::weights_path, "weights_path"); diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp index bd1520119bd8a9..7ddab55129d41a 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.cpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp @@ -217,8 +217,6 @@ py::object from_ov_any(const ov::Any& any) { return py::cast(any.as()); } else if (any.is()) { return py::cast(any.as()); - } else if (any.is()) { - return py::cast(any.as()); } else if (any.is()) { return py::cast(any.as()); } else if (any.is()) { @@ -372,9 +370,7 @@ ov::AnyMap py_object_to_any_map(const py::object& py_obj) { for (auto& item : py::cast(py_obj)) { std::string key = py::cast(item.first); py::object value = py::cast(item.second); - if (py::isinstance(value)) { - return_value[key] = py::cast(value); - } else if (py_object_is_any_map(value)) { + if (py_object_is_any_map(value)) { return_value[key] = Common::utils::py_object_to_any_map(value); } else { return_value[key] = Common::utils::py_object_to_any(value); @@ -449,8 +445,6 @@ ov::Any py_object_to_any(const py::object& py_obj) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); - } else if (py::isinstance(py_obj)) { - return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index 6065d72196b44b..61fb7442987418 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -45,15 +45,6 @@ def test_properties_rw_base(): @pytest.mark.parametrize( ("ov_enum", "expected_values"), [ - ( - props.Affinity, - ( - (props.Affinity.NONE, "Affinity.NONE", -1), - (props.Affinity.CORE, "Affinity.CORE", 0), - (props.Affinity.NUMA, "Affinity.NUMA", 1), - (props.Affinity.HYBRID_AWARE, "Affinity.HYBRID_AWARE", 2), - ), - ), ( props.CacheMode, ( @@ -259,11 +250,6 @@ def test_properties_ro(ov_property_ro, expected_value): "COMPILATION_NUM_THREADS", ((44, 44),), ), - ( - props.affinity, - "AFFINITY", - ((props.Affinity.NONE, props.Affinity.NONE),), - ), (props.force_tbb_terminate, "FORCE_TBB_TERMINATE", ((True, True), (False, False))), (props.enable_mmap, "ENABLE_MMAP", ((True, True), (False, False))), ( @@ -539,7 +525,6 @@ def test_single_property_setting(device): props.enable_profiling(True), props.cache_dir("./"), props.inference_num_threads(9), - props.affinity(props.Affinity.NONE), hints.inference_precision(Type.f32), hints.performance_mode(hints.PerformanceMode.LATENCY), hints.enable_cpu_pinning(True), @@ -554,7 +539,6 @@ def test_single_property_setting(device): props.enable_profiling: True, props.cache_dir: "./", props.inference_num_threads: 9, - props.affinity: props.Affinity.NONE, hints.inference_precision: Type.f32, hints.performance_mode: hints.PerformanceMode.LATENCY, hints.enable_cpu_pinning: True, @@ -568,7 +552,6 @@ def test_single_property_setting(device): props.enable_profiling: True, "CACHE_DIR": "./", props.inference_num_threads: 9, - props.affinity: "NONE", "INFERENCE_PRECISION_HINT": Type.f32, hints.performance_mode: hints.PerformanceMode.LATENCY, hints.scheduling_core_type: hints.SchedulingCoreType.PCORE_ONLY, @@ -589,7 +572,6 @@ def test_core_cpu_properties(properties_to_set): assert core.get_property("CPU", props.enable_profiling) is True assert core.get_property("CPU", props.cache_dir) == "./" assert core.get_property("CPU", props.inference_num_threads) == 9 - assert core.get_property("CPU", props.affinity) == props.Affinity.NONE assert core.get_property("CPU", streams.num) == 5 # RO properties diff --git a/src/common/transformations/include/transformations/utils/gen_pattern.hpp b/src/common/transformations/include/transformations/utils/gen_pattern.hpp index 976561b4844a17..215825d2cd13eb 100644 --- a/src/common/transformations/include/transformations/utils/gen_pattern.hpp +++ b/src/common/transformations/include/transformations/utils/gen_pattern.hpp @@ -40,6 +40,14 @@ namespace gen_pattern { #ifdef CPU_DEBUG_CAPS +# ifdef __GNUC__ +# define CURRENT_LINE_NO __builtin_LINE() +# define CURRENT_FILE __builtin_FILE() +# else +# define CURRENT_LINE_NO -1 +# define CURRENT_FILE "" +# endif + template static inline void _verbose_log(Args&&... args) { std::stringstream ss; @@ -58,6 +66,10 @@ static bool matcher_verbose_enabled() { if (matcher_verbose_enabled()) \ _verbose_log(__VA_ARGS__) #else + +# define CURRENT_LINE_NO -1 +# define CURRENT_FILE "" + static bool matcher_verbose_enabled() { return false; } @@ -181,6 +193,8 @@ class Symbol { double literal_const_value; std::shared_ptr lhs; std::shared_ptr rhs; + const char* filename = ""; + int line_no = -1; // _,+,-,*,/ // l : literal const // n : named symbol @@ -220,10 +234,12 @@ class Symbol { entity->op = 'n'; entity->name = name; } - Symbol(const int value) { + Symbol(const int value, int line_no = CURRENT_LINE_NO, const char* file = CURRENT_FILE) { entity = std::make_shared(); entity->op = 'l'; entity->literal_const_value = value; + entity->line_no = line_no; + entity->filename = file; } Symbol(char op, const Symbol& lhs, const Symbol& rhs) { entity = std::make_shared(); @@ -246,8 +262,12 @@ class Symbol { void* get_id() const { return entity.get(); } - const char* get_name() const { - return entity->name; + std::string get_name() const { + if (entity->line_no == -1 || is_independent_var()) + return entity->name; + auto filename = strrchr(entity->filename, '/') ? strrchr(entity->filename, '/') + 1 : entity->filename; + std::string name(filename); // use filename:lineno instead + return name + ":" + std::to_string(entity->line_no); } bool operator<(const Symbol& rhs) const { return get_id() < rhs.get_id(); @@ -739,7 +759,9 @@ class GenericPattern : public ov::pass::pattern::op::Pattern { explicit GenericPattern(const DiscreteTypeInfo& type_info, const OutputVector& args, const detail::AttrMap& attrs, - const char* vt) + const char* vt, + const int line_no = -1, + const char* file = "") : ov::pass::pattern::op::Pattern(args), m_type_info(type_info), m_attrs(attrs), @@ -758,6 +780,12 @@ class GenericPattern : public ov::pass::pattern::op::Pattern { sep = ","; } ss << ")"; + if (line_no != -1) { + // add the code line no to the log: + // O P752(P736,P745)@fuse_rotary_positional_embeddings.cpp:551 vs ... + auto filename = strrchr(file, '/') ? strrchr(file, '/') + 1 : file; + ss << "@" << filename << ":" << line_no; + } m_signature = ss.str(); set_friendly_name(std::string("P") + std::to_string(id)); } @@ -776,7 +804,13 @@ class GenericPattern : public ov::pass::pattern::op::Pattern { // strictly requires pattern & graph value to come from output port with same index, // this is absolute necessary when pattern contains split node connections. if (pattern_value.get_index() != graph_value.get_index()) { - _VERBOSE_LOG(level, "X output index mismatch: ", pattern_value.get_index(), "!=", graph_value.get_index()); + _VERBOSE_LOG(level, + "X output index mismatch:(", + m_signature, + "): ", + pattern_value.get_index(), + "!=", + graph_value.get_index()); return false; } @@ -1018,7 +1052,9 @@ template std::shared_ptr makePattern(const std::vector& inputs, detail::AttrMap attrmap = {}, const char* vt = nullptr, - const char* friendly_name = nullptr) { + const char* friendly_name = nullptr, + int line_no = CURRENT_LINE_NO, + const char* file = CURRENT_FILE) { OutputVector args; for (auto& in : inputs) args.push_back(in.get_output()); @@ -1026,7 +1062,8 @@ std::shared_ptr makePattern(const std::vector& inputs // pattern nodes are better for pattern matching because // - it can be generic/incomplete, so normal OP node is not working properly // - it has predicate to correctly decide which branch to take (in Or pattern) - auto pattern_node = std::make_shared(T::get_type_info_static(), args, attrmap, vt); + auto pattern_node = + std::make_shared(T::get_type_info_static(), args, attrmap, vt, line_no, file); if (friendly_name) pattern_node->set_friendly_name(friendly_name); @@ -1120,7 +1157,9 @@ inline std::shared_ptr GenStridedSlice(detail::PatternNode data, detail::PatternNode start, detail::PatternNode stop, detail::PatternNode step, - size_t axis) { + size_t axis, + int line_no = CURRENT_LINE_NO, + const char* file = CURRENT_FILE) { std::vector begin_mask(axis + 1, 1); std::vector end_mask(axis + 1, 1); std::vector new_axis_mask; @@ -1135,12 +1174,27 @@ inline std::shared_ptr GenStridedSlice(detail::PatternNode data, {"end_mask", end_mask}, {"new_axis_mask", new_axis_mask}, {"shrink_axis_mask", shrink_axis_mask}, - {"ellipsis_mask", ellipsis_mask}}); + {"ellipsis_mask", ellipsis_mask}}, + nullptr, + nullptr, + line_no, + file); return opt2; } -inline std::shared_ptr GenSlice(detail::PatternNode data, Symbol start, Symbol stop, Symbol step, size_t axis) { - auto opt1 = makePattern({data, {start}, {stop}, {step}, {static_cast(axis)}}); +inline std::shared_ptr GenSlice(detail::PatternNode data, + Symbol start, + Symbol stop, + Symbol step, + size_t axis, + int line_no = CURRENT_LINE_NO, + const char* file = CURRENT_FILE) { + auto opt1 = makePattern({data, {start}, {stop}, {step}, {static_cast(axis)}}, + {}, + nullptr, + nullptr, + line_no, + file); std::vector vbegin(axis + 1, Symbol(0)); std::vector vend(axis + 1, Symbol(0)); @@ -1168,7 +1222,11 @@ inline std::shared_ptr GenSlice(detail::PatternNode data, Symbol start, Sy {"end_mask", end_mask}, {"new_axis_mask", new_axis_mask}, {"shrink_axis_mask", shrink_axis_mask}, - {"ellipsis_mask", ellipsis_mask}}); + {"ellipsis_mask", ellipsis_mask}}, + nullptr, + nullptr, + line_no, + file); return opt1 | opt2; } @@ -1329,7 +1387,9 @@ class PatternValidator { auto id = sym.get_id(); if (symbol_value_map.count(id)) { if (symbol_value_map[id] != value) { - _VERBOSE_LOG(" in-consistency between multiple references of same symbol : ", + _VERBOSE_LOG(" in-consistency between multiple references of same symbol(", + sym.get_name(), + "): ", symbol_value_map[id], " != ", value); @@ -1345,7 +1405,12 @@ class PatternValidator { if (sym.is_literal_const()) { auto literal = sym.eval(symbol_value_map); if (literal != value) { - _VERBOSE_LOG(" mismatch between literal symbol & value : ", literal, " != ", value); + _VERBOSE_LOG(" mismatch between literal symbol & value(", + sym.get_name(), + "): ", + literal, + " != ", + value); return false; } // no need to put literal into value map to eval them. @@ -1373,7 +1438,9 @@ class PatternValidator { } } if (!is_match) { - _VERBOSE_LOG(" mismatch between derived & value : ", + _VERBOSE_LOG(" mismatch between derived & value(", + sym.get_name(), + "): ", std::setprecision(std::numeric_limits::max_digits10), derived, " != ", diff --git a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp index 3630c2bb5a6ed7..29b96f402a4822 100644 --- a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp +++ b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp @@ -51,19 +51,6 @@ class OPENVINO_RUNTIME_API IStreamsExecutor : virtual public ITaskExecutor { Task task; }; - /** - * @brief Defines inference thread binding type - */ - enum ThreadBindingType : std::uint8_t { - NONE, //!< Don't bind the inference threads - CORES, //!< Bind inference threads to the CPU cores (round-robin) - // the following modes are implemented only for the TBB code-path: - NUMA, //!< Bind to the NUMA nodes (default mode for the non-hybrid CPUs on the Win/MacOS, where the 'CORES' is - //!< not implemeneted) - HYBRID_AWARE //!< Let the runtime bind the inference threads depending on the cores type (default mode for the - //!< hybrid CPUs) - }; - /** * @brief Defines IStreamsExecutor configuration */ diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 8baea3ed408656..28538f0f60e22e 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -1289,64 +1289,6 @@ static constexpr Property inference_num_threads */ static constexpr Property compilation_num_threads{"COMPILATION_NUM_THREADS"}; -/** - * @brief Enum to define possible affinity patterns - * @ingroup ov_runtime_cpp_prop_api - */ -enum class Affinity { - NONE = -1, //!< Disable threads affinity pinning - CORE = 0, //!< Pin threads to cores, best for static benchmarks - NUMA = 1, //!< Pin threads to NUMA nodes, best for real-life, contented cases. On the Windows and MacOS* this - //!< option behaves as CORE - HYBRID_AWARE = 2, //!< Let the runtime to do pinning to the cores types, e.g. prefer the "big" cores for latency - //!< tasks. On the hybrid CPUs this option is default -}; - -/** @cond INTERNAL */ -inline std::ostream& operator<<(std::ostream& os, const Affinity& affinity) { - switch (affinity) { - case Affinity::NONE: - return os << "NONE"; - case Affinity::CORE: - return os << "CORE"; - case Affinity::NUMA: - return os << "NUMA"; - case Affinity::HYBRID_AWARE: - return os << "HYBRID_AWARE"; - default: - OPENVINO_THROW("Unsupported affinity pattern"); - } -} - -inline std::istream& operator>>(std::istream& is, Affinity& affinity) { - std::string str; - is >> str; - if (str == "NONE") { - affinity = Affinity::NONE; - } else if (str == "CORE") { - affinity = Affinity::CORE; - } else if (str == "NUMA") { - affinity = Affinity::NUMA; - } else if (str == "HYBRID_AWARE") { - affinity = Affinity::HYBRID_AWARE; - } else { - OPENVINO_THROW("Unsupported affinity pattern: ", str); - } - return is; -} -/** @endcond */ - -/** - * @deprecated Use ov::hint::enable_cpu_pinning - * @brief The name for setting CPU affinity per thread option. - * @ingroup ov_runtime_cpp_prop_api - * @note The setting is ignored, if the OpenVINO compiled with OpenMP and any affinity-related OpenMP's - * environment variable is set (as affinity is configured explicitly) - */ -OPENVINO_DEPRECATED( - "This property is deprecated and will be removed soon. Use ov::hint::enable_cpu_pinning instead of it.") -static constexpr Property affinity{"AFFINITY"}; - /** * @brief The devices that the inference task been executed. * @ingroup ov_runtime_cpp_prop_api diff --git a/src/plugins/auto/src/cumulative_schedule.cpp b/src/plugins/auto/src/cumulative_schedule.cpp index a607205e17d1e5..2cd26032114cdc 100644 --- a/src/plugins/auto/src/cumulative_schedule.cpp +++ b/src/plugins/auto/src/cumulative_schedule.cpp @@ -73,10 +73,7 @@ void CumuSchedule::init() { idx++; } else { cpu_device_information = device; - OPENVINO_SUPPRESS_DEPRECATED_START - cpu_device_information.config.insert( - {ov::affinity.name(), ov::Any(ov::Affinity::CORE).as()}); - OPENVINO_SUPPRESS_DEPRECATED_END + cpu_device_information.config.insert({ov::hint::enable_cpu_pinning.name(), "YES"}); } } if (!cpu_device_information.device_name.empty()) diff --git a/src/plugins/auto/src/plugin.cpp b/src/plugins/auto/src/plugin.cpp index 2371107281d630..1ee6c5bfe14493 100644 --- a/src/plugins/auto/src/plugin.cpp +++ b/src/plugins/auto/src/plugin.cpp @@ -130,17 +130,13 @@ std::vector Plugin::parse_meta_devices(const std::string& pri if (get_device_name() == "MULTI") { auto is_set_numstreams = properties.find(ov::num_streams.name()) != properties.end(); - OPENVINO_SUPPRESS_DEPRECATED_START - auto is_set_affinity = properties.find(ov::affinity.name()) != properties.end(); - OPENVINO_SUPPRESS_DEPRECATED_END auto is_set_numthreads = properties.find(ov::inference_num_threads.name()) != properties.end(); - if (!is_set_perfhint && !is_set_affinity && !is_set_numthreads && !is_set_device_properties&& !is_set_numstreams) { + if (!is_set_perfhint && !is_set_numthreads && !is_set_device_properties&& !is_set_numstreams) { // setting tput as the default performance mode if // 1. no hints setting for MULTI plugin - // 2. no affinity setting for MULTI plugin - // 3. no inference_num_threads setting for MULTI plugin - // 4. no ov::device::properties(secondary properties) setting for target device - // 5. no ov::num_streams setting for target device + // 2. no inference_num_threads setting for MULTI plugin + // 3. no ov::device::properties(secondary properties) setting for target device + // 4. no ov::num_streams setting for target device device_config[ov::hint::performance_mode.name()] = ov::hint::PerformanceMode::THROUGHPUT; } } diff --git a/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp b/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp index 73a56d1506b79b..7b4978892cba26 100644 --- a/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp +++ b/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp @@ -23,29 +23,6 @@ INSTANTIATE_TEST_SUITE_P( ::testing::ValuesIn(inproperties)), OVClassCompiledModelPropertiesIncorrectTests::getTestCaseName); -#if (defined(__APPLE__) || defined(_WIN32)) -auto default_affinity = [] { - auto numaNodes = ov::get_available_numa_nodes(); - auto coreTypes = ov::get_available_cores_types(); - if (coreTypes.size() > 1) { - return ov::Affinity::HYBRID_AWARE; - } else if (numaNodes.size() > 1) { - return ov::Affinity::NUMA; - } else { - return ov::Affinity::NONE; - } -}(); -#else -auto default_affinity = [] { - auto coreTypes = ov::get_available_cores_types(); - if (coreTypes.size() > 1) { - return ov::Affinity::HYBRID_AWARE; - } else { - return ov::Affinity::CORE; - } -}(); -#endif - const std::vector multi_properties = { {ov::device::priorities(ov::test::utils::DEVICE_TEMPLATE), ov::num_streams(ov::streams::AUTO)}, }; diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index f81c7dbbced99d..fe347f3b4e252a 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -258,10 +258,6 @@ ov::Any CompiledModel::get_property(const std::string& name) const { RO_property(ov::hint::kv_cache_precision.name()), }; - OPENVINO_SUPPRESS_DEPRECATED_START - ro_properties.insert(ro_properties.end(), RO_property(ov::affinity.name())); - OPENVINO_SUPPRESS_DEPRECATED_END - return ro_properties; } @@ -277,21 +273,6 @@ ov::Any CompiledModel::get_property(const std::string& name) const { const auto streams = config.streamExecutorConfig.get_streams(); return decltype(ov::num_streams)::value_type( streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) - OPENVINO_SUPPRESS_DEPRECATED_START - } else if (name == ov::affinity) { - const auto affinity = config.threadBindingType; - switch (affinity) { - case IStreamsExecutor::ThreadBindingType::NONE: - return ov::Affinity::NONE; - case IStreamsExecutor::ThreadBindingType::CORES: - return ov::Affinity::CORE; - case IStreamsExecutor::ThreadBindingType::NUMA: - return ov::Affinity::NUMA; - case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: - return ov::Affinity::HYBRID_AWARE; - } - return ov::Affinity::NONE; - OPENVINO_SUPPRESS_DEPRECATED_END } else if (name == ov::inference_num_threads) { const auto num_threads = config.streamExecutorConfig.get_threads(); return decltype(ov::inference_num_threads)::value_type(num_threads); diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 7d1ee05897e81d..594523e697390b 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -25,29 +25,6 @@ using namespace ov::threading; using namespace dnnl::impl::cpu::x64; Config::Config() { - // this is default mode -#if defined(__APPLE__) || defined(_WIN32) - threadBindingType = IStreamsExecutor::NONE; -#else - threadBindingType = IStreamsExecutor::CORES; -#endif - -// for the TBB code-path, additional configuration depending on the OS and CPU types -#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO) -# if defined(__APPLE__) || defined(_WIN32) - // 'CORES' is not implemented for Win/MacOS; so the 'NONE' or 'NUMA' is default - auto numaNodes = get_available_numa_nodes(); - if (numaNodes.size() > 1) { - threadBindingType = IStreamsExecutor::NUMA; - } else { - threadBindingType = IStreamsExecutor::NONE; - } -# endif - - if (get_available_cores_types().size() > 1 /*Hybrid CPU*/) { - threadBindingType = IStreamsExecutor::HYBRID_AWARE; - } -#endif CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties()); updateProperties(); @@ -93,47 +70,6 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { streamsChanged = true; } } - OPENVINO_SUPPRESS_DEPRECATED_START - } else if (key == ov::affinity.name()) { - try { - changedCpuPinning = true; - ov::Affinity affinity = val.as(); -#if defined(__APPLE__) - enableCpuPinning = false; - threadBindingType = affinity == ov::Affinity::NONE ? IStreamsExecutor::ThreadBindingType::NONE - : IStreamsExecutor::ThreadBindingType::NUMA; -#else - enableCpuPinning = - (affinity == ov::Affinity::CORE || affinity == ov::Affinity::HYBRID_AWARE) ? true : false; - switch (affinity) { - case ov::Affinity::NONE: - threadBindingType = IStreamsExecutor::ThreadBindingType::NONE; - break; - case ov::Affinity::CORE: { - threadBindingType = IStreamsExecutor::ThreadBindingType::CORES; - } break; - case ov::Affinity::NUMA: - threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA; - break; - case ov::Affinity::HYBRID_AWARE: - threadBindingType = IStreamsExecutor::ThreadBindingType::HYBRID_AWARE; - break; - default: - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - key, - ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); - } -#endif - } catch (const ov::Exception&) { - OPENVINO_THROW("Wrong value ", - val.as(), - "for property key ", - key, - ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE."); - } - OPENVINO_SUPPRESS_DEPRECATED_END } else if (key == ov::hint::performance_mode.name()) { try { hintPerfMode = !changedHintPerfMode ? val.as() : hintPerfMode; diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 1aa08f4412f0b3..5a347b1fa30c94 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -64,8 +64,6 @@ struct Config { bool streamsChanged = false; int threads = 0; int threadsPerStream = 0; - ov::threading::IStreamsExecutor::ThreadBindingType threadBindingType = - ov::threading::IStreamsExecutor::ThreadBindingType::NONE; ov::hint::PerformanceMode hintPerfMode = ov::hint::PerformanceMode::LATENCY; std::vector> streamsRankTable; bool changedHintPerfMode = false; diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 534470c746f2fe..5813fa3892765d 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -8,6 +8,7 @@ #include "common/utils.hpp" #include "emitters/utils.hpp" +#include "openvino/core/type/element_type.hpp" #include "transformations/cpu_opset/common/op/swish_cpu.hpp" namespace ov { @@ -2128,7 +2129,7 @@ size_t jit_prelu_emitter::get_aux_vecs_count() const { std::set> jit_prelu_emitter::get_supported_precisions( const std::shared_ptr& node) { - return {{element::f32}}; + return {{element::f32, element::f32}}; } void jit_prelu_emitter::emit_impl(const std::vector& in_vec_idxs, diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp index 6405f3e36eb45c..cf89181e2a7979 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp @@ -13,7 +13,13 @@ #include "emitters/snippets/cpu_runtime_configurator.hpp" #include "emitters/utils.hpp" #include "jit_snippets_emitters.hpp" +#include "openvino/core/type.hpp" +#include "openvino/op/prelu.hpp" +#include "openvino/op/round.hpp" +#include "openvino/op/sqrt.hpp" #include "openvino/opsets/opset13.hpp" +#include "snippets/emitter.hpp" +#include "snippets/lowered/expression.hpp" #include "snippets/snippets_isa.hpp" #include "transformations/cpu_opset/common/op/swish_cpu.hpp" #include "transformations/snippets/common/op/fused_mul_add.hpp" @@ -44,7 +50,7 @@ namespace ov { { \ [this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr { \ const auto& n = expr->get_node(); \ - const auto& gelu = std::dynamic_pointer_cast(n); \ + const auto& gelu = ov::as_type_ptr(n); \ if (gelu == nullptr) { \ OPENVINO_THROW("Can't cast to ov::op::v7::Gelu"); \ } \ @@ -73,6 +79,37 @@ namespace ov { } \ } +#define CREATE_ROUND_V5_EMITTER(e_type_from_zero, e_type_even) \ + { \ + [this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr { \ + const auto& n = expr->get_node(); \ + const auto& round = ov::as_type_ptr(n); \ + if (round == nullptr) { \ + OPENVINO_THROW("Can't cast to ov::op::v5::Round"); \ + } \ + const auto roundingMode = round->get_mode(); \ + if (roundingMode == ov::op::v5::Round::RoundMode::HALF_AWAY_FROM_ZERO) { \ + return std::make_shared(h.get(), isa, n); \ + } else if (roundingMode == ov::op::v5::Round::RoundMode::HALF_TO_EVEN) { \ + return std::make_shared(h.get(), isa, n); \ + } else { \ + OPENVINO_THROW("Unsupported Round mode"); \ + } \ + }, \ + [](const std::shared_ptr& n) -> std::set> { \ + const auto& round = std::dynamic_pointer_cast(n); \ + if (round == nullptr) { \ + OPENVINO_THROW("Can't cast to ov::op::v5::Round"); \ + } \ + if (round->get_mode() == ov::op::v5::Round::RoundMode::HALF_AWAY_FROM_ZERO) { \ + return e_type_from_zero::get_supported_precisions(n); \ + } else if (round->get_mode() == ov::op::v5::Round::RoundMode::HALF_TO_EVEN) { \ + return e_type_even::get_supported_precisions(n); \ + } \ + OPENVINO_THROW("Unsupported Round mode"); \ + } \ + } + class jit_snippet : public dnnl::impl::cpu::aarch64::jit_generator { public: DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_snippet) @@ -155,8 +192,12 @@ CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::aarch64::cpu_isa_t host_isa) CREATE_GELU_V7_EMITTER(jit_gelu_erf_emitter, jit_gelu_tanh_emitter); jitters[ov::op::v4::HSwish::get_type_info_static()] = CREATE_CPU_EMITTER(jit_hswish_emitter); jitters[ov::op::v4::Mish::get_type_info_static()] = CREATE_CPU_EMITTER(jit_mish_emitter); + jitters[ov::op::v0::PRelu::get_type_info_static()] = CREATE_CPU_EMITTER(jit_prelu_emitter); jitters[ov::op::v0::Relu::get_type_info_static()] = CREATE_CPU_EMITTER(jit_relu_emitter); + jitters[ov::op::v5::Round::get_type_info_static()] = + CREATE_ROUND_V5_EMITTER(jit_round_half_away_from_zero_emitter, jit_round_half_to_even_emitter); jitters[ov::op::v0::Sigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(jit_sigmoid_emitter); + jitters[ov::op::v0::Sqrt::get_type_info_static()] = CREATE_CPU_EMITTER(jit_sqrt_emitter); jitters[ov::intel_cpu::SwishNode::get_type_info_static()] = CREATE_CPU_EMITTER(jit_swish_emitter); jitters[ov::op::v0::Tanh::get_type_info_static()] = CREATE_CPU_EMITTER(jit_tanh_emitter); diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index b3c2aa0b298a5a..dff002f275d68e 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -320,21 +320,6 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) const auto streams = engConfig.streamExecutorConfig.get_streams(); return decltype(ov::num_streams)::value_type( streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) - OPENVINO_SUPPRESS_DEPRECATED_START - } else if (name == ov::affinity) { - const auto affinity = engConfig.threadBindingType; - switch (affinity) { - case IStreamsExecutor::ThreadBindingType::NONE: - return ov::Affinity::NONE; - case IStreamsExecutor::ThreadBindingType::CORES: - return ov::Affinity::CORE; - case IStreamsExecutor::ThreadBindingType::NUMA: - return ov::Affinity::NUMA; - case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: - return ov::Affinity::HYBRID_AWARE; - } - return ov::Affinity::NONE; - OPENVINO_SUPPRESS_DEPRECATED_END } else if (name == ov::device::id.name()) { return decltype(ov::device::id)::value_type{engConfig.device_id}; } else if (name == ov::inference_num_threads) { @@ -437,10 +422,6 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio RW_property(ov::hint::kv_cache_precision.name()), }; - OPENVINO_SUPPRESS_DEPRECATED_START - rwProperties.insert(rwProperties.end(), RW_property(ov::affinity.name())); - OPENVINO_SUPPRESS_DEPRECATED_END - std::vector supportedProperties; supportedProperties.reserve(roProperties.size() + rwProperties.size()); supportedProperties.insert(supportedProperties.end(), roProperties.begin(), roProperties.end()); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp index 447adb0b2fe23f..9b9aa4f4b34e48 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -20,7 +21,12 @@ #include "itt.hpp" #include "openvino/opsets/opset1.hpp" #include "ov_ops/type_relaxed.hpp" +#include "transformations/common_optimizations/simplify_shape_of_sub_graph.hpp" #include "transformations/cpu_opset/common/op/sdpa.hpp" +#include "transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.hpp" +#include "transformations/defs.hpp" +#include "transformations/op_conversions/convert_broadcast3.hpp" +#include "transformations/transpose_sinking/ts_shape_of.hpp" using namespace ov::gen_pattern; namespace ov { @@ -56,8 +62,9 @@ StatefulSDPAFusion::StatefulSDPAFusion() { std::shared_ptr reshape_k, reshape_v, unsqueeze_k, unsqueeze_v; std::shared_ptr computed_bcst_k, computed_bcst_v, multiply_k, multiply_v; std::shared_ptr mq_reshape_k, mq_reshape_v; + std::shared_ptr computed_bcst3_k, computed_bcst3_v; auto multi_query_bcst = [](const std::shared_ptr& kv) { - auto reshape_kv = wrap_type({kv, any_input()}); + auto reshape_kv = makePattern({kv, any_input()}); auto unsqueeze_kv = makePattern({kv, any_input()}); auto check_one = [](Output output) -> bool { @@ -73,13 +80,17 @@ StatefulSDPAFusion::StatefulSDPAFusion() { makePattern({wrap_type(check_one), any_input(), any_input()}, {{"mode", "numpy"}}); - auto multiply_kv = wrap_type({reshape_kv | unsqueeze_kv, constant_bcst | computed_bcst}); - auto result = wrap_type({multiply_kv, any_input()}); - return std::make_tuple(result, reshape_kv, unsqueeze_kv, computed_bcst, multiply_kv); + auto multiply_kv = makePattern({reshape_kv | unsqueeze_kv, constant_bcst | computed_bcst}); + auto computed_bcst3 = makePattern({unsqueeze_kv, any_input()}, {{"mode", "bidirectional"}}); + + auto result = makePattern({multiply_kv | computed_bcst3, any_input()}); + return std::make_tuple(result, reshape_kv, unsqueeze_kv, computed_bcst, multiply_kv, computed_bcst3); }; - std::tie(mq_reshape_k, reshape_k, unsqueeze_k, computed_bcst_k, multiply_k) = multi_query_bcst(concat_k); - std::tie(mq_reshape_v, reshape_v, unsqueeze_v, computed_bcst_v, multiply_v) = multi_query_bcst(concat_v); + std::tie(mq_reshape_k, reshape_k, unsqueeze_k, computed_bcst_k, multiply_k, computed_bcst3_k) = + multi_query_bcst(concat_k); + std::tie(mq_reshape_v, reshape_v, unsqueeze_v, computed_bcst_v, multiply_v, computed_bcst3_v) = + multi_query_bcst(concat_v); auto present_k = concat_k | mq_reshape_k; auto present_v = concat_v | mq_reshape_v; @@ -178,15 +189,19 @@ StatefulSDPAFusion::StatefulSDPAFusion() { opset6::Assign *assign_k_node = nullptr, *assign_v_node = nullptr; opset1::Convert *assign_cvt_k_node = nullptr, *assign_cvt_v_node = nullptr; - if (!find_assign(concat_k_node, assign_k_node, assign_cvt_k_node)) + if (!find_assign(concat_k_node, assign_k_node, assign_cvt_k_node)) { return false; - if (past_k_node->get_variable_id() != assign_k_node->get_variable_id()) + } + if (past_k_node->get_variable_id() != assign_k_node->get_variable_id()) { return false; + } - if (!find_assign(concat_v_node, assign_v_node, assign_cvt_v_node)) + if (!find_assign(concat_v_node, assign_v_node, assign_cvt_v_node)) { return false; - if (past_v_node->get_variable_id() != assign_v_node->get_variable_id()) + } + if (past_v_node->get_variable_id() != assign_v_node->get_variable_id()) { return false; + } auto is_optional_one_child = [&pattern_map](const std::vector>& nodes) { for (auto&& node : nodes) { @@ -212,7 +227,9 @@ StatefulSDPAFusion::StatefulSDPAFusion() { computed_bcst_v, multiply_v, mq_reshape_k, - mq_reshape_v})) { + mq_reshape_v, + computed_bcst3_k, + computed_bcst3_v})) { return false; } @@ -284,5 +301,19 @@ StatefulSDPAFusion::StatefulSDPAFusion() { this->register_matcher(m, callback); } +bool SDPASubgraphFusion::run_on_model(const std::shared_ptr& f) { + RUN_ON_FUNCTION_SCOPE(SDPASubgraphFusion); + ov::pass::Manager manager("SDPASubgraphFusion"); + + CPU_REGISTER_PASS_COMMON(manager, ov::pass::SimplifyGatherShapeOf); + CPU_REGISTER_PASS_COMMON(manager, ov::pass::transpose_sinking::TSShapeOfForward); + CPU_REGISTER_PASS_COMMON(manager, StatefulSDPAFusion); + // TODO: remove the following after snippets support patterns with dynamic shapes + CPU_REGISTER_PASS_X64(manager, ov::intel_cpu::SDPAFuseTransposeReshape); + + manager.run_passes(f); + return false; +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.hpp index 96028402aa9f92..59494736bb2c2e 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.hpp @@ -14,5 +14,12 @@ class StatefulSDPAFusion : public ov::pass::MatcherPass { StatefulSDPAFusion(); }; +class SDPASubgraphFusion : public ov::pass::ModelPass { +public: + OPENVINO_RTTI("SDPASubgraphFusion", "0"); + + bool run_on_model(const std::shared_ptr& f) override; +}; + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.cpp index 9b48708bc8ed5a..e33b468917c51a 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.cpp @@ -18,13 +18,13 @@ * Description: SDPA fuse transpose and reshape. * Original pattern Fused pattern * - * input1 input2 input3 + * input1 readvalue readvalue * | | | * q_reshape k_reshape v_reshap * | | | (qkv transpose and reshape's orders) - * q_transpose k_transpose v_transpose | - * \ | / input1 input2 input3 | - * \ | / \ | / / + * q_transpose k_transpose v_transpose | + * \ | / input1 ReadValue ReadValue | + * \ | / \ | / / * ScaledDotProductAttention ---------> SDPAWithTransposeReshape * | | * out_transpose | @@ -41,8 +41,8 @@ intel_cpu::SDPAFuseTransposeReshape::SDPAFuseTransposeReshape() { MATCHER_SCOPE(SDPAFuseTransposeReshape); auto q_reshape_node = wrap_type({any_input(), any_input()}); - auto k_reshape_node = wrap_type({any_input(), any_input()}); - auto v_reshape_node = wrap_type({any_input(), any_input()}); + auto k_reshape_node = wrap_type({wrap_type(), any_input()}); + auto v_reshape_node = wrap_type({wrap_type(), any_input()}); auto q_transpose_order_node = wrap_type(); auto k_transpose_order_node = wrap_type(); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 47f400b7e38c53..9283d67f52d12d 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -12,6 +12,9 @@ #include #include "openvino/op/paged_attention.hpp" +#include "openvino/op/prelu.hpp" +#include "openvino/op/round.hpp" +#include "openvino/op/sqrt.hpp" #include "openvino/opsets/opset1.hpp" #include "openvino/opsets/opset10.hpp" #include "openvino/opsets/opset2.hpp" @@ -431,6 +434,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis ov::pass::KeepConstAndDecompression); CPU_REGISTER_PASS_COMMON(manager, ov::pass::AUGRUCellFusion); + CPU_REGISTER_PASS_COMMON(manager, SDPASubgraphFusion); CPU_REGISTER_PASS_COMMON(manager, ov::pass::CommonOptimizations); CPU_REGISTER_PASS_X64(manager, ov::pass::KeepConstsPrecision, decompression_precisions, false, true); CPU_SET_CALLBACK_X64( @@ -654,16 +658,6 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_SET_CALLBACK_COMMON(manager, nmsCallback, ov::pass::ConvertNMS9ToNMSIEInternal); CPU_SET_CALLBACK_COMMON(manager, nmsCallback, ov::pass::ConvertMulticlassNmsToMulticlassNmsIE); CPU_SET_CALLBACK_COMMON(manager, nmsCallback, ov::pass::ConvertMatrixNmsToMatrixNmsIE); - CPU_SET_CALLBACK_COMMON( - manager, - [this](const_node_ptr& node) -> bool { - std::string errorMsg; - // Current SDPA impl is optimized only for LLM models, so we decompose it for others to avoid perf - // regression. Matching the pattern is a little complicated, so we just check if there is any state nodes. - return node::ScaledDotProductAttention::isSupportedOperation(node, errorMsg) && - model->get_variables().size() > 0; - }, - ov::pass::ScaledDotProductAttentionDecomposition); // List of enabled/disabled transformations @@ -945,9 +939,6 @@ void Transformations::PostLpt() { } #endif // OPENVINO_ARCH_X86_64 - CPU_REGISTER_PASS_COMMON(postLPTPassManager, ov::pass::transpose_sinking::TSShapeOfForward); - CPU_REGISTER_PASS_COMMON(postLPTPassManager, StatefulSDPAFusion); - CPU_REGISTER_PASS_X64(postLPTPassManager, ov::intel_cpu::SDPAFuseTransposeReshape); CPU_REGISTER_PASS_X64(postLPTPassManager, ov::pass::RMSFusion, false); CPU_REGISTER_PASS_X64(postLPTPassManager, ov::intel_cpu::DecomposeRMSNorm); CPU_SET_CALLBACK_X64( @@ -1123,16 +1114,17 @@ void Transformations::MainSnippets(void) { ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n)); + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n)); #else // CPU Plugin support Swish in Subgraph via conversion to SwichCPU which assumes second input to be constant, // and CPU Plugin does not support Mish for x64 diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp index 73086b78a0de95..073faba7f8d96f 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/properties.cpp @@ -24,7 +24,6 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable RO_property(ov::model_name.name()), RO_property(ov::optimal_number_of_infer_requests.name()), RO_property(ov::num_streams.name()), - RO_property(ov::affinity.name()), RO_property(ov::inference_num_threads.name()), RO_property(ov::enable_profiling.name()), RO_property(ov::hint::inference_precision.name()), diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp index 904d2b81dc05b6..f72df3f58b69e5 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp @@ -39,7 +39,6 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) { RO_property(ov::device::architecture.name()), // read write RW_property(ov::num_streams.name()), - RW_property(ov::affinity.name()), RW_property(ov::inference_num_threads.name()), RW_property(ov::enable_profiling.name()), RW_property(ov::hint::inference_precision.name()), @@ -149,67 +148,6 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigStreamsNum) { ASSERT_GT(value, 0); // value has been configured automatically } -TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinity) { - ov::Core ie; - -#if defined(__APPLE__) - ov::Affinity value = ov::Affinity::CORE; - auto defaultBindThreadParameter = ov::Affinity::NONE; -#else - ov::Affinity value = ov::Affinity::NUMA; -# if defined(_WIN32) - auto defaultBindThreadParameter = ov::Affinity::NONE; -# else - auto defaultBindThreadParameter = ov::Affinity::CORE; -# endif -#endif - auto coreTypes = ov::get_available_cores_types(); - if (coreTypes.size() > 1) { - defaultBindThreadParameter = ov::Affinity::HYBRID_AWARE; - } - - OV_ASSERT_NO_THROW(value = ie.get_property("CPU", ov::affinity)); - ASSERT_EQ(defaultBindThreadParameter, value); - - const ov::Affinity affinity = - defaultBindThreadParameter == ov::Affinity::HYBRID_AWARE ? ov::Affinity::NUMA : ov::Affinity::HYBRID_AWARE; - OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::affinity(affinity))); - OV_ASSERT_NO_THROW(value = ie.get_property("CPU", ov::affinity)); -#if defined(__APPLE__) - ASSERT_EQ(ov::Affinity::NUMA, value); -#else - ASSERT_EQ(affinity, value); -#endif -} - -TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinityCore) { - ov::Core ie; - ov::Affinity affinity = ov::Affinity::CORE; - bool value = false; - - OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::affinity(affinity))); - OV_ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::enable_cpu_pinning)); -#if defined(__APPLE__) - ASSERT_EQ(false, value); -#else - ASSERT_EQ(true, value); -#endif - - affinity = ov::Affinity::HYBRID_AWARE; - OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::affinity(affinity))); - OV_ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::enable_cpu_pinning)); -#if defined(__APPLE__) - ASSERT_EQ(false, value); -#else - ASSERT_EQ(true, value); -#endif - - affinity = ov::Affinity::NUMA; - OV_ASSERT_NO_THROW(ie.set_property("CPU", ov::affinity(affinity))); - OV_ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::enable_cpu_pinning)); - ASSERT_EQ(false, value); -} - #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) const auto expected_precision_for_performance_mode = ov::intel_cpu::hasHardwareSupport(ov::element::f16) ? ov::element::f16 : ov::element::f32; #else diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp index 0f63a7517b5745..16ce7ff630c20e 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp @@ -197,7 +197,9 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType (activation_type == utils::ActivationTypes::Sqrt) || (activation_type == utils::ActivationTypes::Swish) || (activation_type == utils::ActivationTypes::LogicalNot) || - (activation_type == utils::ActivationTypes::Tanh))) { + (activation_type == utils::ActivationTypes::Tanh) || + (activation_type == utils::ActivationTypes::RoundHalfAwayFromZero) || + (activation_type == utils::ActivationTypes::RoundHalfToEven))) { return "jit"; } @@ -209,7 +211,9 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType if ((activation_type == utils::ActivationTypes::Floor) || (activation_type == utils::ActivationTypes::Ceiling) || (activation_type == utils::ActivationTypes::IsNaN) || - (activation_type == utils::ActivationTypes::IsFinite)) { + (activation_type == utils::ActivationTypes::IsFinite) || + (activation_type == utils::ActivationTypes::RoundHalfAwayFromZero) || + (activation_type == utils::ActivationTypes::RoundHalfToEven)) { return "ref"; } return "acl"; @@ -265,22 +269,26 @@ const std::map>>& activat const std::map>>& activationTypesSnippets() { static const std::map>> activationTypes { - {Abs, {{}}}, - {Exp, {{}}}, - {Ceiling, {{}}}, - {Clamp, {{-2.0f, 2.0f}}}, - {Elu, {{0.1f}}}, - {Floor, {{}}}, - {GeluErf, {{}}}, - {GeluTanh, {{}}}, - {Relu, {{}}}, - {HSwish, {{}}}, + {Abs, {{}}}, + {Exp, {{}}}, + {Ceiling, {{}}}, + {Clamp, {{-2.0f, 2.0f}}}, + {Elu, {{0.1f}}}, + {Floor, {{}}}, + {GeluErf, {{}}}, + {GeluTanh, {{}}}, + {Relu, {{}}}, + {HSwish, {{}}}, + {PReLu, {{-0.01f}}}, + {Sqrt, {{}}}, + {RoundHalfToEven, {{}}}, + {RoundHalfAwayFromZero, {{}}}, #if defined(OPENVINO_ARCH_ARM64) - {Mish, {{}}}, + {Mish, {{}}}, #endif - {Sigmoid, {{}}}, - {Swish, {{0.1f}}}, - {Tanh, {{}}}, + {Sigmoid, {{}}}, + {Swish, {{0.1f}}}, + {Tanh, {{}}}, }; return activationTypes; diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_multiple_query_sdp.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_multiple_query_sdp.cpp index d74ab99fb3d5ab..fe5ba2b7eac5e7 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_multiple_query_sdp.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_multiple_query_sdp.cpp @@ -152,9 +152,9 @@ class ConcatMultiQuerySDPTest : public testing::WithParamInterface(concatK, unsquezeAxis); auto unsqueezeV = std::make_shared(concatV, unsquezeAxis); - auto targetShape = ov::op::v0::Constant::create(qkvType, {1, 1, 1, 4, 1}, {1}); - auto broadcastK = std::make_shared(unsqueezeK, targetShape); - auto broadcastV = std::make_shared(unsqueezeV, targetShape); + auto targetShape = ov::op::v0::Constant::create(element::i32, {5}, {1, 1, 1, 4, 1}); + auto broadcastK = std::make_shared(unsqueezeK, targetShape, op::BroadcastType::BIDIRECTIONAL); + auto broadcastV = std::make_shared(unsqueezeV, targetShape, op::BroadcastType::BIDIRECTIONAL); auto target4D = ov::op::v0::Constant::create(ov::element::i32, {4}, {0, 0, 8, 64}); diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_transpose_sdp_transpose.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_transpose_sdp_transpose.cpp index f4166544af2bf2..8ba978e32c4b9c 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_transpose_sdp_transpose.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_transpose_sdp_transpose.cpp @@ -71,7 +71,7 @@ class ConcatSDPTransposeTestBase : public testing::WithParamInterfaceGetParam(); std::vector& inputShapes = inputShapeAndOrders.first; transposeOrder = inputShapeAndOrders.second; @@ -124,6 +123,10 @@ class ConcatSDPTransposeTestBase : public testing::WithParamInterface(inputParams[0], preOrder); + std::shared_ptr transposeQ_shapeof; + if (hasShapeOf) { + transposeQ_shapeof = std::make_shared(transposeQ); + } auto concat_axis = transposeOrder[2]; auto beam_idx = std::make_shared(ElementType::i32, ov::PartialShape{-1}); @@ -166,6 +169,7 @@ class ConcatSDPTransposeTestBase : public testing::WithParamInterface(results, sinks, inputParams, "ConcatTranposeSDP"); @@ -237,6 +241,7 @@ class ConcatSDPTransposeTestBase : public testing::WithParamInterface transposeOrder; + bool hasShapeOf; }; class ConcatSDPTransposeTest : public ConcatSDPTransposeTestBase { @@ -287,7 +292,10 @@ TEST_P(ConcatSDPTransposeTest, CompareWithRefs) { CheckNumberOfNodesWithType(compiledModel, "Concatenation", 0); CheckNumberOfNodesWithType(compiledModel, "Reorder", 0); CheckNumberOfNodesWithType(compiledModel, "Transpose", 1); - CheckNumberOfNodesWithType(compiledModel, "Gather", 0); + // Transformation TSShapeOfForward will change: + // ?->transpose->shapeof ==> ?-->shapeof->gather + // |->transpose + CheckNumberOfNodesWithType(compiledModel, "Gather", hasShapeOf ? 1 : 0); auto expectedOutputs = run_test(functionRefs); CheckNumberOfNodesWithType(compiledModel, "ScaledDotProductAttention", 0); for (size_t i = 0; i < actualOutputs.size(); i++) { diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/properties.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/properties.cpp index f15127c1dab1e2..f473bb96306b28 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/compiled_model/properties.cpp @@ -21,39 +21,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, ::testing::ValuesIn(inproperties)), OVClassCompiledModelPropertiesIncorrectTests::getTestCaseName); -#if (defined(__APPLE__) || defined(_WIN32)) -auto default_affinity = [] { - auto numaNodes = ov::get_available_numa_nodes(); - auto coreTypes = ov::get_available_cores_types(); - if (coreTypes.size() > 1) { - return ov::Affinity::HYBRID_AWARE; - } else if (numaNodes.size() > 1) { - return ov::Affinity::NUMA; - } else { - return ov::Affinity::NONE; - } -}(); -#else -auto default_affinity = [] { - auto coreTypes = ov::get_available_cores_types(); - if (coreTypes.size() > 1) { - return ov::Affinity::HYBRID_AWARE; - } else { - return ov::Affinity::CORE; - } -}(); -#endif - -const std::vector default_properties = { - {ov::affinity(default_affinity)}, -}; - -INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, - OVClassCompiledModelPropertiesDefaultTests, - ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(default_properties)), - OVClassCompiledModelPropertiesDefaultTests::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelPropertiesDefaultSupportedTests, ::testing::Values(ov::test::utils::DEVICE_CPU), diff --git a/src/plugins/intel_cpu/tools/dump_check/dump_check.py b/src/plugins/intel_cpu/tools/dump_check/dump_check.py index 163552a67b1a23..3317abc3459589 100644 --- a/src/plugins/intel_cpu/tools/dump_check/dump_check.py +++ b/src/plugins/intel_cpu/tools/dump_check/dump_check.py @@ -91,12 +91,12 @@ def dump(cls, ieb_file, nparray): fmt = "@4sHBB7IB3BLLLL" magic, ver = b'IEB0', 256 - + precision = -1 for k,v in IEB.precision_table.items(): if (v[0] == nparray.dtype): precision = k - + assert(precision >= 0) ndims = len(nparray.shape) @@ -113,7 +113,7 @@ def dump(cls, ieb_file, nparray): dims[0], dims[1], dims[2], dims[3], dims[4], dims[5], dims[6], scaling_axis, reserved[0], reserved[1], reserved[2], data_offset, data_size, scaling_data_offset, scaling_data_size) - + with open(ieb_file,"wb") as f: f.write(header) f.write(nparray.tobytes()) @@ -132,7 +132,7 @@ def __init__(self, ieb_file) -> None: (dtype, type_size, ) = IEB.precision_table[self.precision] count = self.data_size//type_size - + # recover the data as numpy array self.dims = np.array([self.dims0, self.dims1, self.dims2, self.dims3, self.dims4, self.dims5, self.dims6]) self.dims = self.dims[0:self.ndims] @@ -166,7 +166,6 @@ def dump_tensors(core, model, dump_dir = "./cpu_dump", dump_ports="OUT", device_ mkdirp(dump_dir) device_config = {"PERF_COUNT": "NO", - "AFFINITY": "CORE", "PERFORMANCE_HINT_NUM_REQUESTS":0, "PERFORMANCE_HINT":"LATENCY", "INFERENCE_PRECISION_HINT": "f32", @@ -185,7 +184,7 @@ def dump_tensors(core, model, dump_dir = "./cpu_dump", dump_ports="OUT", device_ print(f" {i}") print("infer with dump..") - + result = req.infer(inputs) # dump result as ieb, so even no dump_ports, you can still know @@ -208,7 +207,7 @@ def dump_tensors(core, model, dump_dir = "./cpu_dump", dump_ports="OUT", device_ pass_manager = Manager() pass_manager.register_pass(Serialize(path_to_xml=xml_path, path_to_bin=bin_path)) pass_manager.run_passes(runtime_func) - + print(f"{device_target} Runtime model (exec_graph) is serialized to {xml_path}.") @@ -217,7 +216,7 @@ def visualize_diff_abs(diff_abs): cur_shape = diff_abs.shape if len(vis_abs.shape) > 3: vis_abs = vis_abs.reshape(-1,cur_shape[-2],cur_shape[-1]) - + fig, ax = plt.subplots() # first channel with diff @@ -315,10 +314,10 @@ def get_match_ieb_file2(f1): if not f2: print("{}[ SKIPPED ]: not found {} in {} {}".format(Colors.YELLOW, f1[-1], dump_dir2, Colors.END)) continue - + ieb_file1 = f1[-1] ieb_file2 = f2[-1] - # compare + # compare ieb1 = IEB(os.path.join(dump_dir1, ieb_file1)) ieb2 = IEB(os.path.join(dump_dir2, ieb_file2)) @@ -345,7 +344,7 @@ def get_match_ieb_file2(f1): info = "" if (np.prod(diff_abs.shape) < 8): info = "{} vs {}".format(ieb1.value.reshape(-1), ieb2.value.reshape(-1)) - + max_abs = np.amax(diff_abs[idx]) max_idx = np.where(diff_abs[idx] >= max_abs) max_org = np.abs(ieb2.value)[idx][max_idx] diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp index 2c70ec46079994..656b35747af0c9 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp @@ -333,13 +333,6 @@ OVPropertiesTestsWithCompileModelProps::getRWOptionalPropertiesValues( res.push_back({ov::compilation_num_threads(1)}); } - if (props.empty() || std::find(props.begin(), props.end(), ov::affinity.name()) != props.end()) { - ov::Affinity affinities[] = {ov::Affinity::NONE , ov::Affinity::CORE, ov::Affinity::NUMA, ov::Affinity::HYBRID_AWARE}; - for (auto &affinity : affinities) { - res.push_back({ov::affinity(affinity)}); - } - } - if (props.empty() || std::find(props.begin(), props.end(), ov::hint::enable_hyper_threading.name()) != props.end()) { res.push_back({ov::hint::enable_hyper_threading(true)}); res.push_back({ov::hint::enable_hyper_threading(false)}); @@ -391,10 +384,6 @@ OVPropertiesTestsWithCompileModelProps::getWrongRWOptionalPropertiesValues( res.push_back({{ov::compilation_num_threads.name(), -1}}); } - if (props.empty() || std::find(props.begin(), props.end(), ov::affinity.name()) != props.end()) { - res.push_back({{ov::affinity.name(), -5}}); - } - if (props.empty() || std::find(props.begin(), props.end(), ov::hint::enable_hyper_threading.name()) != props.end()) { res.push_back({{ov::hint::enable_hyper_threading.name(), -1}}); } diff --git a/tests/samples_tests/smoke_tests/test_benchmark_app.py b/tests/samples_tests/smoke_tests/test_benchmark_app.py index 3be4f4b88eaab8..d19efbcebae86f 100755 --- a/tests/samples_tests/smoke_tests/test_benchmark_app.py +++ b/tests/samples_tests/smoke_tests/test_benchmark_app.py @@ -69,9 +69,8 @@ def verify(sample_language, device, api=None, nireq=None, shape=None, data_shape assert 'CPU' in config_json assert not nstreams or config_json['CPU']['NUM_STREAMS'] == nstreams assert (not pin - or pin == 'YES' and config_json['CPU']['AFFINITY'] == 'CORE' - or pin == 'NO' and config_json['CPU']['AFFINITY'] == 'NONE' - or pin == config_json['CPU']['AFFINITY']) + or pin == 'YES' and config_json['CPU']['ENABLE_CPU_PINNING'] == 'YES' + or pin == 'NO' and config_json['CPU']['ENABLE_CPU_PINNING'] == 'NO') @pytest.mark.parametrize('sample_language', ['C++', 'Python']) @@ -96,7 +95,7 @@ def test_max_irate(sample_language, device, max_irate, cache, tmp_path): @pytest.mark.skipif('CPU' not in get_devices(), reason='affinity is a CPU property') @pytest.mark.parametrize('sample_language', ['C++', 'Python']) -@pytest.mark.parametrize('pin', ['YES', 'NO', 'NUMA', 'HYBRID_AWARE']) +@pytest.mark.parametrize('pin', ['YES', 'NO']) def test_pin(sample_language, pin, cache, tmp_path): verify(sample_language, 'CPU', pin=pin, nstreams='2', cache=cache, tmp_path=tmp_path) diff --git a/tools/benchmark_tool/openvino/tools/benchmark/main.py b/tools/benchmark_tool/openvino/tools/benchmark/main.py index acec4d17bdc377..f1e11d764f2b9a 100755 --- a/tools/benchmark_tool/openvino/tools/benchmark/main.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py @@ -288,11 +288,6 @@ def set_throughput_streams(): return def set_nthreads_pin(property_name, property_value): - if property_name == properties.affinity(): - if property_value == "YES": - property_value = properties.Affinity.CORE - elif property_value == "NO": - property_value = properties.Affinity.NONE if property_name in supported_properties or device_name == AUTO_DEVICE_NAME: # create nthreads/pin primary property for HW device or AUTO if -d is AUTO directly. config[device][property_name] = property_value @@ -309,7 +304,7 @@ def set_nthreads_pin(property_name, property_value): if is_flag_set_in_command_line('pin'): ## set for CPU to user defined value - set_nthreads_pin(properties.affinity(), args.infer_threads_pinning) + set_nthreads_pin(properties.hint.enable_cpu_pinning(), args.infer_threads_pinning) set_throughput_streams() set_infer_precision() diff --git a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py index dac2b1490bf534..2c505ccd6bda65 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py @@ -149,12 +149,8 @@ def parse_args(): devp.add_argument('-nthreads', '--number_threads', type=int, required=False, default=None, help='Number of threads to use for inference on the CPU ' '(including HETERO and MULTI cases).') - devp.add_argument('-pin', '--infer_threads_pinning', type=str, required=False, choices=['YES', 'NO', 'NUMA', 'HYBRID_AWARE'], - help='Optional. Enable threads->cores (\'YES\' which is OpenVINO runtime\'s default for conventional CPUs), ' - 'threads->(NUMA)nodes (\'NUMA\'), ' - 'threads->appropriate core types (\'HYBRID_AWARE\', which is OpenVINO runtime\'s default for Hybrid CPUs) ' - 'or completely disable (\'NO\') ' - 'CPU threads pinning for CPU-involved inference.') + devp.add_argument('-pin', '--infer_threads_pinning', type=str, required=False, choices=['YES', 'NO'], + help='Optional. Enable threads->cores pinning for CPU-involved inference.') stat = parser.add_argument_group('Statistics dumping options') stat.add_argument('-latency_percentile', '--latency_percentile', type=int, required=False, default=50, diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py index c705a3d832599d..45569c35db660e 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py @@ -766,8 +766,6 @@ def device_properties_to_string(config): for sk, sv in v.items(): if isinstance(sv, bool): sv = "YES" if sv else "NO" - if isinstance(sv, properties.Affinity): - sv = sv.name sub_str += "{0}:{1},".format(sk, sv) sub_str = sub_str[:-1] sub_str += "}" @@ -808,7 +806,7 @@ def dump_config(filename, config): for key, value in device_config.items(): if isinstance(value, OVAny) and (isinstance(value.value, dict)): value_string = device_properties_to_string(value.get()) - elif isinstance(value, (properties.hint.PerformanceMode, properties.Affinity)): + elif isinstance(value, properties.hint.PerformanceMode): value_string = value.name elif isinstance(value, OVAny): value_string = str(value.value)