From da4c16e92ee55f1befd68e9363d4f23ac3219b13 Mon Sep 17 00:00:00 2001 From: Ekaterina Shiryaeva Date: Tue, 14 Jan 2025 21:15:45 +0100 Subject: [PATCH] NPUW: Add new option to dump representative subset (#28308) ### Details: Added new possible value for dump options to let NPUW decide which subgraphs need to dump. With "MIN" option it will dump all non-repeating blocks and just one instance of the repeating block. ### Tickets: - *E-139858* --- .../intel_npu/npuw_private_properties.hpp | 20 ++++++++++--------- .../plugin/npuw/base_sync_infer_request.cpp | 10 ++++++---- .../src/plugin/npuw/compiled_model.cpp | 5 +++-- .../intel_npu/src/plugin/npuw/util.cpp | 9 ++++++++- .../intel_npu/src/plugin/npuw/util.hpp | 5 ++++- 5 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp index 5cf489b6df34b4..bbf7073a04656b 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp @@ -338,9 +338,10 @@ static constexpr ov::Property full{"NPUW_DUMP_FULL"}; * @brief * Type: std::string. * Dump the specified subgraph(s) in OpenVINO IR form in the current directory. - * Possible values: Comma-separated list of subgraph indices or "YES" for all - * subgraphs, "NO" or just empty value to turn option off. Keyword "last" can - * be used for dumping last subgraph without specifying it by specific index. + * Possible values: Comma-separated list of subgraph indices ("last" can be used + * for dumping last subgraph without specifying it by specific index), "YES" for + * all subgraphs, "MIN" for representative subgraph subset (all non-repeated and + * one instance of repeated block), "NO" or just empty value to turn option off. * E.g. "0,1" or "0,1,last" or "YES". * Default value: empty. */ @@ -350,9 +351,10 @@ static constexpr ov::Property subgraphs{"NPUW_DUMP_SUBS"}; * @brief * Type: std::string. * Dump subgraph on disk if a compilation failure happens. - * Possible values: Comma-separated list of subgraph indices or "YES" for all - * subgraphs, "NO" or just empty value to turn option off. Keyword "last" can - * be used for dumping last subgraph. E.g. "0,1" or "0,1,last" or "YES". + * Possible values: Comma-separated list of subgraph indices ("last" can be used + * for dumping last subgraph) or "YES" for all subgraphs, "MIN" for representative + * subgraph subset, "NO" or just empty value to turn option off. E.g. "0,1" or + * "0,1,last" or "YES". * Default value: empty. */ static constexpr ov::Property subgraphs_on_fail{"NPUW_DUMP_SUBS_ON_FAIL"}; @@ -361,9 +363,9 @@ static constexpr ov::Property subgraphs_on_fail{"NPUW_DUMP_SUBS_ON_ * @brief * Type: std::string. * Dump input & output tensors for subgraph(s). - * Possible values: Comma-separated list of subgraph indices or "YES" for all - * subgraphs, "NO" or just empty value to turn option off. Keyword "last" can - * be used for last subgraph. E.g. "0,1" or "0,1,last" or "YES". + * Possible values: Comma-separated list of subgraph indices ("last" can be used for + * last subgraph) or "YES" for all subgraphs, "MIN" for representative subgraph subset, + * "NO" or just empty value to turn option off. E.g. "0,1" or "0,1,last" or "YES". * Default value: empty. */ static constexpr ov::Property inputs_outputs{"NPUW_DUMP_IO"}; diff --git a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp index 77d000cb415de7..0a6ecfa7d556bf 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp @@ -491,11 +491,12 @@ void ov::npuw::IBaseInferRequest::bind_global_results(std::size_t idx, RqPtr req void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) { const std::string dump_ios_opt = m_npuw_model->m_cfg.get<::intel_npu::NPUW_DUMP_IO>(); const std::size_t end_idx = m_npuw_model->m_compiled_submodels.size(); - if (!ov::npuw::util::is_set(idx, dump_ios_opt, end_idx)) { + auto real_idx = m_npuw_model->m_compiled_submodels[idx].replaced_by.value_or(idx); + + if (!ov::npuw::util::is_set(idx, dump_ios_opt, real_idx, end_idx)) { return; } - auto real_idx = m_npuw_model->m_compiled_submodels[idx].replaced_by.value_or(idx); const auto& comp_submodel_desc = m_npuw_model->m_compiled_submodels[real_idx]; const auto& comp_submodel = comp_submodel_desc.compiled_model; @@ -569,11 +570,12 @@ void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) { void ov::npuw::IBaseInferRequest::dump_output_tensors(std::size_t idx) { const std::string dump_ios_opt = m_npuw_model->m_cfg.get<::intel_npu::NPUW_DUMP_IO>(); const std::size_t end_idx = m_npuw_model->m_compiled_submodels.size(); - if (!ov::npuw::util::is_set(idx, dump_ios_opt, end_idx)) { + auto real_idx = m_npuw_model->m_compiled_submodels[idx].replaced_by.value_or(idx); + + if (!ov::npuw::util::is_set(idx, dump_ios_opt, real_idx, end_idx)) { return; } - auto real_idx = m_npuw_model->m_compiled_submodels[idx].replaced_by.value_or(idx); const auto& comp_submodel_desc = m_npuw_model->m_compiled_submodels[real_idx]; const auto& comp_submodel = comp_submodel_desc.compiled_model; diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index be93e1f1b575f5..6ec6e047dddb8d 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -364,7 +364,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, fill_empty_tensor_names(m_compiled_submodels[real_id].model); } - if (ov::npuw::util::is_set(id, dump_sub_opt, end_sub_idx)) { + if (ov::npuw::util::is_set(id, dump_sub_opt, real_id, end_sub_idx)) { LOG_INFO("Dumping Subgraph[" << id << "]"); LOG_BLOCK(); if (real_id != id) { @@ -996,8 +996,9 @@ ov::SoPtr ov::npuw::CompiledModel::compile_submodel(const st void ov::npuw::CompiledModel::dump_on_fail(std::size_t id, const std::string& device_to_try, const char* extra) { const std::string dof_opt = m_cfg.get<::intel_npu::NPUW_DUMP_SUBS_ON_FAIL>(); const std::size_t end_idx = m_compiled_submodels.size(); + const std::size_t real_idx = m_compiled_submodels[id].replaced_by.value_or(id); - if (ov::npuw::util::is_set(id, dof_opt, end_idx)) { + if (ov::npuw::util::is_set(id, dof_opt, real_idx, end_idx)) { ov::npuw::dump_failure(m_compiled_submodels[id].model, device_to_try, extra); } } diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.cpp b/src/plugins/intel_npu/src/plugin/npuw/util.cpp index f6bb6f439cff25..517dc57e0a1468 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.cpp @@ -18,7 +18,10 @@ #include "openvino/runtime/make_tensor.hpp" // get_tensor_impl #include "util_xarch.hpp" -bool ov::npuw::util::is_set(const std::size_t sub_idx, const std::string& opt, const std::size_t end_idx) { +bool ov::npuw::util::is_set(const std::size_t sub_idx, + const std::string& opt, + const std::size_t real_idx, + const std::size_t end_idx) { if (opt.empty() || opt == "NO") { return false; } @@ -26,6 +29,10 @@ bool ov::npuw::util::is_set(const std::size_t sub_idx, const std::string& opt, c return true; } + if (opt == "MIN") { + return sub_idx == real_idx; + } + std::string str(opt); std::size_t last_pos = str.find("last"); if (last_pos != std::string::npos) { diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.hpp b/src/plugins/intel_npu/src/plugin/npuw/util.hpp index 616aff53128292..501c97cdff4b0e 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.hpp @@ -15,7 +15,10 @@ namespace ov { namespace npuw { namespace util { -bool is_set(const std::size_t sub_idx, const std::string& opt, const std::size_t end_idx = SIZE_MAX); +bool is_set(const std::size_t sub_idx, + const std::string& opt, + const std::size_t real_idx = SIZE_MAX, + const std::size_t end_idx = SIZE_MAX); // Every great project has its own string class... // NB: Newer C++ standards would allow to use string views or smt