From 2ab64646ad078b99ca05f13749a92d9a1f3653da Mon Sep 17 00:00:00 2001 From: Alexandru Enache Date: Mon, 16 Dec 2024 09:37:12 +0000 Subject: [PATCH 01/34] Squash commits from rebase Signed-off-by: Alexandru Enache --- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index cfcec542e6219e..61d96d35073e84 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -846,6 +846,11 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c } _logger.debug("Successfully read %zu bytes into blob.", graphSize); + auto storedMeta = read_metadata_from(blob); + if (!storedMeta->is_compatible()) { + OPENVINO_THROW("Incompatible blob version!"); + } + auto graph = compiler->parse(std::move(blob), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); From 714c889f15547668242dd8e285b60a293f276497 Mon Sep 17 00:00:00 2001 From: Alexandru Enache Date: Wed, 18 Dec 2024 13:45:09 +0000 Subject: [PATCH 02/34] Few important changes: * read_metadata_from method reads from a stream now * the metadata compatibility check is done before allocating the vector with its data * updated tests * updated tests * the blob is stripped from the metadata after compatibility check * moved all stream writes from export_model to meta.write * moved getFileSize from plugin.cpp to an unnamed namespace inside metadata.cpp * added blobDataSize as a field inside struct Metadata * added get_blob_size method in preparation for upcoming PR Signed-off-by: Alexandru Enache --- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 61d96d35073e84..cfcec542e6219e 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -846,11 +846,6 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - auto storedMeta = read_metadata_from(blob); - if (!storedMeta->is_compatible()) { - OPENVINO_THROW("Incompatible blob version!"); - } - auto graph = compiler->parse(std::move(blob), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); From 6ba434d7a1cf5054976c439ef10067fa773dc2c3 Mon Sep 17 00:00:00 2001 From: Alexandru Enache Date: Fri, 20 Dec 2024 09:32:29 +0000 Subject: [PATCH 03/34] Added test for reading any invalid metadata version Signed-off-by: Alexandru Enache --- .../tests/unit/npu/metadata_version.cpp | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp b/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp index 0c94a1e5334b36..4622a373753f2f 100644 --- a/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp +++ b/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp @@ -199,3 +199,42 @@ TEST_F(MetadataUnitTests, writeAndReadMetadataWithRemovedField) { std::unique_ptr storedMeta; EXPECT_ANY_THROW(storedMeta = read_metadata_from(stream)); } + +struct MetadataVersionTestFixture : Metadata, ::testing::TestWithParam { +public: + std::stringstream blob; + + void set_version(uint32_t newVersion) { + _version = newVersion; + } + + MetadataVersionTestFixture() : Metadata(0, std::nullopt) {} + + MetadataVersionTestFixture(uint64_t blobSize, std::optional ovVersion) + : Metadata(blobSize, ovVersion) {} + + void TestBody() override {} +}; + +TEST_P(MetadataVersionTestFixture, readInvalidMetadataVersion) { + auto dummyMeta = MetadataVersionTestFixture(0, ov::get_openvino_version().buildNumber); + auto metaVersion = GetParam(); + dummyMeta.set_version(metaVersion); + dummyMeta.write(blob); + + ASSERT_ANY_THROW(read_metadata_from(blob)); +} + +constexpr uint16_t currentMajor = get_major(CURRENT_METADATA_VERSION), + currentMinor = get_minor(CURRENT_METADATA_VERSION); + +INSTANTIATE_TEST_CASE_P(MetadataUnitTests, + MetadataVersionTestFixture, + ::testing::Values(make_version(currentMajor, currentMinor + 1), + make_version(currentMajor, currentMinor - 1), + make_version(currentMajor + 1, currentMinor), + make_version(currentMajor + 1, currentMinor + 1), + make_version(currentMajor + 1, currentMinor - 1), + make_version(currentMajor - 1, currentMinor), + make_version(currentMajor - 1, currentMinor + 1), + make_version(currentMajor - 1, currentMinor - 1))); From bbda03eea82985862dc811e11f348ff977da8edb Mon Sep 17 00:00:00 2001 From: Alexandru Enache Date: Wed, 8 Jan 2025 12:06:01 +0000 Subject: [PATCH 04/34] Updated writeAndReadInvalidMetadataVersion test body Signed-off-by: Alexandru Enache --- src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp b/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp index 4622a373753f2f..9bd2193db3f6ef 100644 --- a/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp +++ b/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp @@ -219,8 +219,10 @@ struct MetadataVersionTestFixture : Metadata, ::testin TEST_P(MetadataVersionTestFixture, readInvalidMetadataVersion) { auto dummyMeta = MetadataVersionTestFixture(0, ov::get_openvino_version().buildNumber); auto metaVersion = GetParam(); + dummyMeta.set_version(metaVersion); - dummyMeta.write(blob); + + OV_ASSERT_NO_THROW(dummyMeta.write(blob)); ASSERT_ANY_THROW(read_metadata_from(blob)); } From 938fde96c96ff58c50c3e7df43f1cd31bf46adc1 Mon Sep 17 00:00:00 2001 From: Alexandru Enache Date: Thu, 9 Jan 2025 13:25:31 +0000 Subject: [PATCH 05/34] Move version functions as static methods inside MetadataBase Signed-off-by: Alexandru Enache --- .../intel_npu/src/plugin/include/metadata.hpp | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/plugins/intel_npu/src/plugin/include/metadata.hpp b/src/plugins/intel_npu/src/plugin/include/metadata.hpp index f4ae25e84c9136..96ed4a5d8f5612 100644 --- a/src/plugins/intel_npu/src/plugin/include/metadata.hpp +++ b/src/plugins/intel_npu/src/plugin/include/metadata.hpp @@ -11,6 +11,53 @@ #include namespace intel_npu { +struct MetadataBase { + /** + * @brief Reads metadata from a stream. + */ + virtual void read(std::istream& stream) = 0; + + /** + * @brief Writes metadata to a stream. + */ + virtual void write(std::ostream& stream) = 0; + + virtual bool is_compatible() = 0; + + virtual uint64_t get_blob_size() const = 0; + + virtual ~MetadataBase() = default; + + /** + * @brief Returns a uint32_t value which represents two uint16_t values concatenated. + * @details Convention for bumping the metadata version: + * - Increment Major in case of: removing a current field OR adding a new field in between fields. + * - Increment Minor in case of: adding a new field at the end. + * + * @return Major and minor versions concatenated into a single uint32_t value. + */ + static constexpr uint32_t make_version(uint16_t major, uint16_t minor) { + return major << 16 | (minor & 0x0000ffff); + } + + /** + * @brief Gets the major version. + * + * @return Major version. + */ + static constexpr uint16_t get_major(uint32_t version) { + return static_cast(version >> 16); + } + + /** + * @brief Gets the minor version. + * + * @return Minor version. + */ + static constexpr uint16_t get_minor(uint32_t version) { + return static_cast(version); + } +}; struct MetadataBase { protected: From 3bb13d9d04e77a645f93e1de6aeb8a8b01675c5e Mon Sep 17 00:00:00 2001 From: Alexandru Enache Date: Thu, 9 Jan 2025 16:14:04 +0000 Subject: [PATCH 06/34] Nitpicks Signed-off-by: Alexandru Enache --- src/plugins/intel_npu/src/plugin/include/metadata.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/include/metadata.hpp b/src/plugins/intel_npu/src/plugin/include/metadata.hpp index 96ed4a5d8f5612..25662e7558fc74 100644 --- a/src/plugins/intel_npu/src/plugin/include/metadata.hpp +++ b/src/plugins/intel_npu/src/plugin/include/metadata.hpp @@ -11,6 +11,7 @@ #include namespace intel_npu { + struct MetadataBase { /** * @brief Reads metadata from a stream. @@ -42,7 +43,6 @@ struct MetadataBase { /** * @brief Gets the major version. - * * @return Major version. */ static constexpr uint16_t get_major(uint32_t version) { @@ -51,7 +51,6 @@ struct MetadataBase { /** * @brief Gets the minor version. - * * @return Minor version. */ static constexpr uint16_t get_minor(uint32_t version) { From 9613e419c6549e1b38a0d2dcea40e39d47d7e255 Mon Sep 17 00:00:00 2001 From: Alexandru Enache Date: Mon, 13 Jan 2025 11:01:50 +0000 Subject: [PATCH 07/34] Move version field to MetadataBase Signed-off-by: Alexandru Enache --- src/plugins/intel_npu/src/plugin/include/metadata.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/plugins/intel_npu/src/plugin/include/metadata.hpp b/src/plugins/intel_npu/src/plugin/include/metadata.hpp index 25662e7558fc74..d2be257c02bb8f 100644 --- a/src/plugins/intel_npu/src/plugin/include/metadata.hpp +++ b/src/plugins/intel_npu/src/plugin/include/metadata.hpp @@ -13,6 +13,12 @@ namespace intel_npu { struct MetadataBase { +protected: + uint32_t _version; + +public: + MetadataBase(uint32_t version) : _version(version) {} + /** * @brief Reads metadata from a stream. */ From f673af8d9caad51f5f6795b0fb43f63010abdf2d Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Thu, 14 Nov 2024 14:05:13 +0200 Subject: [PATCH 08/34] Add `ov::internal::caching_with_mmap` property logic --- .../include/driver_compiler_adapter.hpp | 4 ++- .../include/plugin_compiler_adapter.hpp | 6 +++- .../include/ze_graph_ext_wrappers.hpp | 4 ++- .../src/driver_compiler_adapter.cpp | 23 +++++++++++++-- .../src/plugin_compiler_adapter.cpp | 5 ++-- .../src/ze_graph_ext_wrappers.cpp | 24 ++++++++++++++++ .../intel_npu/src/plugin/include/metrics.hpp | 2 +- .../intel_npu/src/plugin/src/plugin.cpp | 28 +++++++++++-------- 8 files changed, 77 insertions(+), 19 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index 6801e26c2fed73..23c226da9b8324 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -19,12 +19,14 @@ namespace intel_npu { class DriverCompilerAdapter final : public ICompilerAdapter { public: - DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct); + DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct, ov::intel_npu::CompilerType compilerType); std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; std::shared_ptr parse(std::vector network, const Config& config) const override; + std::shared_ptr parse(const std::shared_ptr& mmapNetwork, const Config& config) const; + ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; uint32_t get_version() const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index 61870e718a088e..c5afc21f72708a 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -17,12 +17,16 @@ namespace intel_npu { class PluginCompilerAdapter final : public ICompilerAdapter { public: - PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct); + PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct, ov::intel_npu::CompilerType compilerType); std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; std::shared_ptr parse(std::vector network, const Config& config) const override; + std::shared_ptr parse(const std::shared_ptr& mmapNetwork, const Config& config) const override { + OPENVINO_THROW("CIP needs a blob vector!"); + } + ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; uint32_t get_version() const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index a80beb8c57305d..9906f591478189 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -37,7 +37,9 @@ class ZeGraphExtWrappers { ze_graph_handle_t getGraphHandle(const std::vector& network) const; - NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const; + ze_graph_handle_t getGraphHandle(const std::shared_ptr& mmapNetwork) const override; + + NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const override; _ze_result_t destroyGraph(ze_graph_handle_t graphHandle); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index d7c4def10c8c93..136887655bda7f 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -138,9 +138,10 @@ std::string rankToLegacyLayoutString(const size_t rank) { namespace intel_npu { -DriverCompilerAdapter::DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct) +DriverCompilerAdapter::DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct, ov::intel_npu::CompilerType compilerType) : _zeroInitStruct(zeroInitStruct), - _logger("DriverCompilerAdapter", Logger::global().level()) { + _logger("DriverCompilerAdapter", Logger::global().level()), + ICompilerAdapter(compilerType) { _logger.debug("initialize DriverCompilerAdapter start"); uint32_t graphExtVersion = _zeroInitStruct->getGraphDdiTable().version(); @@ -221,6 +222,24 @@ std::shared_ptr DriverCompilerAdapter::parse(std::vector networ std::optional>(std::move(network))); } +std::shared_ptr DriverCompilerAdapter::parse(const std::shared_ptr& mmapNetwork, const Config& config) const { + OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); + + _logger.debug("parse start"); + ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(mmapNetwork); + _logger.debug("parse end"); + + OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); + auto networkMeta = _zeGraphExt->getNetworkMeta(graphHandle); + + return std::make_shared(_zeGraphExt, + _zeroInitStruct, + graphHandle, + std::move(networkMeta), + config, + std::nullopt); +} + ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, const Config& config) const { OV_ITT_TASK_CHAIN(query_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "query"); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 72fab52d6cf895..fe36e078da470d 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -49,9 +49,10 @@ ov::SoPtr loadCompiler(const std::string& libpath) { namespace intel_npu { -PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct) +PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct, ov::intel_npu::CompilerType compilerType) : _zeroInitStruct(zeroInitStruct), - _logger("PluginCompilerAdapter", Logger::global().level()) { + _logger("PluginCompilerAdapter", Logger::global().level()), + ICompilerAdapter(compilerType) { _logger.debug("initialize PluginCompilerAdapter start"); _logger.info("MLIR compiler will be used."); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index a3626a79475dcd..09c84ffda33a52 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -389,6 +389,30 @@ ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std::vector& return graphHandle; } +template +ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std::shared_ptr& mmapNetwork) const { + ze_graph_handle_t graphHandle; + + if (mmapNetwork->size() == 0) { + OPENVINO_THROW("Empty blob"); + } + + ze_graph_desc_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + nullptr, + ZE_GRAPH_FORMAT_NATIVE, + mmapNetwork->size(), + reinterpret_cast(mmapNetwork->get_ptr()), + nullptr}; + + auto result = _zeroInitStruct->getGraphDdiTable().pfnCreate(_zeroInitStruct->getContext(), + _zeroInitStruct->getDevice(), + &desc, + &graphHandle); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnCreate", result, _zeroInitStruct->getGraphDdiTable()); + + return graphHandle; +} + /** * @brief Extracts the I/O metadata from Level Zero specific structures and converts them into OpenVINO specific * ones. diff --git a/src/plugins/intel_npu/src/plugin/include/metrics.hpp b/src/plugins/intel_npu/src/plugin/include/metrics.hpp index 91f78a9cd773f6..d0c35ef43ec15d 100644 --- a/src/plugins/intel_npu/src/plugin/include/metrics.hpp +++ b/src/plugins/intel_npu/src/plugin/include/metrics.hpp @@ -68,7 +68,7 @@ class Metrics final { ov::intel_npu::batch_mode.name(), ov::hint::execution_mode.name()}; - const std::vector _internalSupportedProperties = {ov::internal::caching_properties.name()}; + const std::vector _internalSupportedProperties = {ov::internal::caching_properties.name(), ov::internal::caching_with_mmap.name()}; // Metric to provide a hint for a range for number of async infer requests. (bottom bound, upper bound, step) const std::tuple _rangeForAsyncInferRequests{1u, 10u, 1u}; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index cfcec542e6219e..a8938627d56ce7 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -832,21 +832,27 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c CompilerAdapterFactory compilerAdapterFactory; auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); - auto storedMeta = read_metadata_from(stream); - if (!storedMeta->is_compatible()) { - OPENVINO_THROW("Incompatible blob version!"); + std::shared_ptr graph; + if (compiler->getCompilerType() == ov::intel_npu::CompilerType::DRIVER) { + if (auto mmap_buffer = dynamic_cast(stream.rdbuf())) { + graph = compiler->parse(mmap_buffer->get_buffer(), localConfig); + goto GRAPH_PARSED; + } } - auto graphSize = storedMeta->get_blob_size(); + { + auto graphSize = getFileSize(stream); - std::vector blob(graphSize); - stream.read(reinterpret_cast(blob.data()), graphSize); - if (!stream) { - OPENVINO_THROW("Failed to read data from stream!"); - } - _logger.debug("Successfully read %zu bytes into blob.", graphSize); + std::vector blob(graphSize); + stream.read(reinterpret_cast(blob.data()), graphSize); + if (!stream) { + OPENVINO_THROW("Failed to read data from stream!"); + } + _logger.debug("Successfully read %zu bytes into blob.", graphSize); - auto graph = compiler->parse(std::move(blob), localConfig); + graph = compiler->parse(std::move(blob), localConfig); + } +GRAPH_PARSED: graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = From a55f767ceb0e0d9ca9a62a464ca9c1b98877c139 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Thu, 14 Nov 2024 17:23:37 +0200 Subject: [PATCH 09/34] Refactor compiler type selection --- .../src/compiler_adapter/include/driver_compiler_adapter.hpp | 4 ++-- .../src/compiler_adapter/include/plugin_compiler_adapter.hpp | 2 +- .../src/compiler_adapter/src/driver_compiler_adapter.cpp | 5 ++--- .../src/compiler_adapter/src/plugin_compiler_adapter.cpp | 5 ++--- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index 23c226da9b8324..a28f06f220bcc8 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -19,13 +19,13 @@ namespace intel_npu { class DriverCompilerAdapter final : public ICompilerAdapter { public: - DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct, ov::intel_npu::CompilerType compilerType); + DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct); std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; std::shared_ptr parse(std::vector network, const Config& config) const override; - std::shared_ptr parse(const std::shared_ptr& mmapNetwork, const Config& config) const; + std::shared_ptr parse(const std::shared_ptr& mmapNetwork, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index c5afc21f72708a..b9705d12a649cf 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -17,7 +17,7 @@ namespace intel_npu { class PluginCompilerAdapter final : public ICompilerAdapter { public: - PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct, ov::intel_npu::CompilerType compilerType); + PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct); std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 136887655bda7f..a29f6fed078574 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -138,10 +138,9 @@ std::string rankToLegacyLayoutString(const size_t rank) { namespace intel_npu { -DriverCompilerAdapter::DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct, ov::intel_npu::CompilerType compilerType) +DriverCompilerAdapter::DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct) : _zeroInitStruct(zeroInitStruct), - _logger("DriverCompilerAdapter", Logger::global().level()), - ICompilerAdapter(compilerType) { + _logger("DriverCompilerAdapter", Logger::global().level()) { _logger.debug("initialize DriverCompilerAdapter start"); uint32_t graphExtVersion = _zeroInitStruct->getGraphDdiTable().version(); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index fe36e078da470d..72fab52d6cf895 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -49,10 +49,9 @@ ov::SoPtr loadCompiler(const std::string& libpath) { namespace intel_npu { -PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct, ov::intel_npu::CompilerType compilerType) +PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct) : _zeroInitStruct(zeroInitStruct), - _logger("PluginCompilerAdapter", Logger::global().level()), - ICompilerAdapter(compilerType) { + _logger("PluginCompilerAdapter", Logger::global().level()) { _logger.debug("initialize PluginCompilerAdapter start"); _logger.info("MLIR compiler will be used."); From 440a9287c2ab7b5140d98c899614e01ac2a03677 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Thu, 14 Nov 2024 22:58:09 +0200 Subject: [PATCH 10/34] Fix OV cache header not being removed from blob for memory mapped cache file --- .../openvino/runtime/aligned_buffer.hpp | 8 +++++-- .../openvino/runtime/shared_buffer.hpp | 21 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/core/dev_api/openvino/runtime/aligned_buffer.hpp b/src/core/dev_api/openvino/runtime/aligned_buffer.hpp index 992d258895f5f4..c9344e1cd84911 100644 --- a/src/core/dev_api/openvino/runtime/aligned_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/aligned_buffer.hpp @@ -30,14 +30,17 @@ class OPENVINO_API AlignedBuffer { size_t size() const { return m_byte_size; } + void updateOffset(size_t offset) { + m_offset = offset; + } void* get_ptr(size_t offset) const { return m_aligned_buffer + offset; } void* get_ptr() { - return m_aligned_buffer; + return m_aligned_buffer + m_offset; } const void* get_ptr() const { - return m_aligned_buffer; + return m_aligned_buffer + m_offset; } template T* get_ptr() { @@ -61,6 +64,7 @@ class OPENVINO_API AlignedBuffer { char* m_allocated_buffer; char* m_aligned_buffer; size_t m_byte_size; + size_t m_offset = 0; }; template <> diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp index 3ea97db6a1989f..fb0fd01403e763 100644 --- a/src/core/dev_api/openvino/runtime/shared_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp @@ -16,6 +16,7 @@ class SharedBuffer : public ov::AlignedBuffer { m_allocated_buffer = data; m_aligned_buffer = data; m_byte_size = size; + m_offset = 0; } virtual ~SharedBuffer() { @@ -81,6 +82,26 @@ class OwningSharedStreamBuffer : public SharedStreamBuffer { return m_shared_obj; } + std::streamsize xsgetn(char* s, std::streamsize count) override { + auto streamSize = SharedStreamBuffer::xsgetn(s, count); + m_shared_obj->updateOffset(m_offset); + return streamSize; + } + + int_type uflow() override { + auto val = SharedStreamBuffer::uflow(); + m_shared_obj->updateOffset(m_offset); + return val; + } + + pos_type seekoff(off_type off, + std::ios_base::seekdir dir, + std::ios_base::openmode which = std::ios_base::in) override { + auto pos = SharedStreamBuffer::seekoff(off, dir, which); + m_shared_obj->updateOffset(m_offset); + return pos; + } + protected: std::shared_ptr m_shared_obj; }; From b20d63a8f960bcf1a5d8fddf443a034b75f3166d Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Wed, 20 Nov 2024 12:56:44 +0200 Subject: [PATCH 11/34] Keep `shared_ptr` of blob in IGraph to fix `export_model` for import scenario --- .../include/intel_npu/common/igraph.hpp | 5 +- .../intel_npu/src/common/src/igraph.cpp | 2 +- .../include/driver_compiler_adapter.hpp | 4 +- .../compiler_adapter/include/driver_graph.hpp | 9 +--- .../include/ze_graph_ext_wrappers.hpp | 6 ++- .../src/driver_compiler_adapter.cpp | 20 +------- .../src/compiler_adapter/src/driver_graph.cpp | 50 ++++++------------- .../src/compiler_adapter/src/plugin_graph.cpp | 21 +++++--- .../src/ze_graph_ext_wrappers.cpp | 3 +- .../intel_npu/src/plugin/src/plugin.cpp | 6 ++- 10 files changed, 49 insertions(+), 77 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index fc5aec9158151c..de3298170262df 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -12,6 +12,7 @@ #include "intel_npu/utils/zero/zero_init.hpp" #include "intel_npu/utils/zero/zero_utils.hpp" #include "intel_npu/utils/zero/zero_wrappers.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "openvino/runtime/profiling_info.hpp" namespace intel_npu { @@ -21,7 +22,7 @@ class IGraph : public std::enable_shared_from_this { IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, const Config& config, - std::optional> blob); + std::optional> blob); virtual size_t export_blob(std::ostream& stream) const = 0; @@ -89,7 +90,7 @@ class IGraph : public std::enable_shared_from_this { // first inference starts running std::mutex _mutex; - std::vector _blob; + std::shared_ptr _blob; uint32_t _unique_id = 0; uint32_t _last_submitted_id; diff --git a/src/plugins/intel_npu/src/common/src/igraph.cpp b/src/plugins/intel_npu/src/common/src/igraph.cpp index 9a53928c9a3d9e..c7b46f2d9b44cb 100644 --- a/src/plugins/intel_npu/src/common/src/igraph.cpp +++ b/src/plugins/intel_npu/src/common/src/igraph.cpp @@ -17,7 +17,7 @@ namespace intel_npu { IGraph::IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, const Config& config, - std::optional> blob) + std::optional> blob) : _handle(handle), _metadata(std::move(metadata)), _logger("IGraph", config.get()) { diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index a28f06f220bcc8..7421bf2be1ff9e 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -23,7 +23,9 @@ class DriverCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(std::vector network, const Config& config) const override; + std::shared_ptr parse(std::vector network, const Config& config) const override { + OPENVINO_THROW("CID should not parse from std::vector anymore!"); + } std::shared_ptr parse(const std::shared_ptr& mmapNetwork, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index cf3d54c6b363e5..bca96067dac6b1 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -10,6 +10,7 @@ #include "intel_npu/common/igraph.hpp" #include "intel_npu/utils/zero/zero_init.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "ze_graph_ext_wrappers.hpp" namespace intel_npu { @@ -21,7 +22,7 @@ class DriverGraph final : public IGraph { ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::optional> blob); + std::optional> blob); size_t export_blob(std::ostream& stream) const override; @@ -35,16 +36,10 @@ class DriverGraph final : public IGraph { ~DriverGraph() override; private: - bool release_blob(const Config& config); - std::shared_ptr _zeGraphExt; std::shared_ptr _zeroInitStruct; Logger _logger; - - // In the case of the import path, the blob is released after graph initialization so it can not be any longer - // exported - bool _blobIsReleased = false; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index 9906f591478189..31c157c96321a5 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -15,6 +15,8 @@ #include "intel_npu/utils/zero/zero_init.hpp" #include "intel_npu/utils/zero/zero_types.hpp" +#include "openvino/runtime/aligned_buffer.hpp" + namespace intel_npu { using SerializedIR = std::pair>; @@ -37,9 +39,9 @@ class ZeGraphExtWrappers { ze_graph_handle_t getGraphHandle(const std::vector& network) const; - ze_graph_handle_t getGraphHandle(const std::shared_ptr& mmapNetwork) const override; + ze_graph_handle_t getGraphHandle(const std::shared_ptr& mmapNetwork) const; - NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const override; + NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const; _ze_result_t destroyGraph(ze_graph_handle_t graphHandle); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index a29f6fed078574..09e7cc3162d88e 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -203,24 +203,6 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr DriverCompilerAdapter::parse(std::vector network, const Config& config) const { - OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); - - _logger.debug("parse start"); - ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(network); - _logger.debug("parse end"); - - OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); - auto networkMeta = _zeGraphExt->getNetworkMeta(graphHandle); - - return std::make_shared(_zeGraphExt, - _zeroInitStruct, - graphHandle, - std::move(networkMeta), - config, - std::optional>(std::move(network))); -} - std::shared_ptr DriverCompilerAdapter::parse(const std::shared_ptr& mmapNetwork, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); @@ -236,7 +218,7 @@ std::shared_ptr DriverCompilerAdapter::parse(const std::shared_ptr>(mmapNetwork)); } ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index a29412075c7e39..a92d63b00b87e2 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -15,7 +15,7 @@ DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::optional> blob) + std::optional> blob) : IGraph(graphHandle, std::move(metadata), config, std::move(blob)), _zeGraphExt(zeGraphExt), _zeroInitStruct(zeroInitStruct), @@ -32,18 +32,17 @@ DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, initialize(config); } -size_t DriverGraph::export_blob(std::ostream& stream) const { - const uint8_t* blobPtr = nullptr; - size_t blobSize; - std::vector blob; +size_t DriverGraph::export_blob(std::ostream& stream) { + if (_blob.get() == nullptr) { + const uint8_t* blobPtr = nullptr; + size_t blobSize = -1; + std::shared_ptr> blob; - if (_blobIsReleased) { - OPENVINO_THROW("Model was imported (not compiled) by the plugin. Model export is forbidden in this case!"); + _zeGraphExt->getGraphBinary(_handle, *blob, blobPtr, blobSize); + _blob = std::make_shared>>>(reinterpret_cast(const_cast(blobPtr)), blobSize, blob); } - _zeGraphExt->getGraphBinary(_handle, blob, blobPtr, blobSize); - - stream.write(reinterpret_cast(blobPtr), blobSize); + stream.write(reinterpret_cast(_blob->get_ptr()), _blob->size()); if (!stream) { _logger.error("Write blob to stream failed. Blob is broken!"); @@ -52,16 +51,16 @@ size_t DriverGraph::export_blob(std::ostream& stream) const { if (_logger.level() >= ov::log::Level::INFO) { std::uint32_t result = 1171117u; - for (const uint8_t* it = blobPtr; it != blobPtr + blobSize; ++it) { + for (const uint8_t* it = reinterpret_cast(_blob->get_ptr()); it != reinterpret_cast(_blob->get_ptr()) + _blob->size(); ++it) { result = ((result << 7) + result) + static_cast(*it); } std::stringstream str; - str << "Blob size: " << blobSize << ", hash: " << std::hex << result; + str << "Blob size: " << _blob->size() << ", hash: " << std::hex << result; _logger.info(str.str().c_str()); } _logger.info("Write blob to stream successfully."); - return blobSize; + return _blob->size(); } std::vector DriverGraph::process_profiling_output(const std::vector& profData, @@ -122,6 +121,7 @@ void DriverGraph::initialize(const Config& config) { _zeGraphExt->initializeGraph(_handle, config); _logger.debug("Graph initialize finish"); +<<<<<<< HEAD // We are allowed to release the original blob because weights were loaded in NPU memory during // _zeGraphExt->initializeGraph(). The driver will not access the original blob from this moment on, so we are @@ -137,30 +137,10 @@ void DriverGraph::initialize(const Config& config) { _last_submitted_event.resize(number_of_command_lists); } +======= +>>>>>>> 25b5c05976 (Keep `shared_ptr` of blob in IGraph to fix `export_model` for import scenario) } -bool DriverGraph::release_blob(const Config& config) { - if (_blob.empty() || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 || - config.get()) { - return false; - } - - ze_graph_properties_2_t properties = {}; - properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; - _zeroInitStruct->getGraphDdiTable().pfnGetProperties2(_handle, &properties); - - if (~properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) { - return false; - } - - _blob.clear(); - _blob.shrink_to_fit(); - - _logger.debug("Blob is released"); - - return true; -}; - DriverGraph::~DriverGraph() { if (_handle != nullptr) { auto result = _zeGraphExt->destroyGraph(_handle); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index d0c24a82e03937..e08c0fac64719b 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -7,6 +7,7 @@ #include "intel_npu/config/common.hpp" #include "intel_npu/config/runtime.hpp" #include "intel_npu/utils/zero/zero_api.hpp" +#include "openvino/runtime/shared_buffer.hpp" namespace intel_npu { @@ -17,7 +18,7 @@ PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, NetworkMetadata metadata, std::vector blob, const Config& config) - : IGraph(graphHandle, std::move(metadata), config, std::optional>(std::move(blob))), + : IGraph(graphHandle, std::move(metadata), config, std::optional>(std::move(blob))), _zeGraphExt(zeGraphExt), _zeroInitStruct(zeroInitStruct), _compiler(compiler), @@ -30,8 +31,8 @@ PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, initialize(config); } -size_t PluginGraph::export_blob(std::ostream& stream) const { - stream.write(reinterpret_cast(_blob.data()), _blob.size()); +size_t PluginGraph::export_blob(std::ostream& stream) { + stream.write(reinterpret_cast(_blob->get_ptr()), _blob->size()); if (!stream) { _logger.error("Write blob to stream failed. Blob is broken!"); @@ -40,21 +41,27 @@ size_t PluginGraph::export_blob(std::ostream& stream) const { if (_logger.level() >= ov::log::Level::INFO) { std::uint32_t result = 1171117u; - for (const uint8_t* it = _blob.data(); it != _blob.data() + _blob.size(); ++it) { + for (const uint8_t* it = reinterpret_cast(_blob->get_ptr()); it != reinterpret_cast(_blob->get_ptr()) + _blob->size(); ++it) { result = ((result << 7) + result) + static_cast(*it); } std::stringstream str; - str << "Blob size: " << _blob.size() << ", hash: " << std::hex << result; + str << "Blob size: " << _blob->size() << ", hash: " << std::hex << result; _logger.info(str.str().c_str()); } _logger.info("Write blob to stream successfully."); - return _blob.size(); + return _blob->size(); } std::vector PluginGraph::process_profiling_output(const std::vector& profData, const Config& config) const { - return _compiler->process_profiling_output(profData, _blob, config); + + // Need to fix increased memory usage below, ov::SharedBuffer won't permit us to get underlying shared buffer as it is private + // Only if we work with std::vector blobs, but then IGraph needs to have 2 declarations for the same blob + // Maybe if we templatize blob in IGraph to be either std::vector or std::shared_ptr? + std::vector blob(_blob->size()); + blob.assign(reinterpret_cast(_blob->get_ptr()), reinterpret_cast(_blob->get_ptr()) + _blob->size()); + return _compiler->process_profiling_output(profData, blob, config); } void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const { diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index 09c84ffda33a52..4044a141b3c343 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -389,8 +389,7 @@ ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std::vector& return graphHandle; } -template -ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std::shared_ptr& mmapNetwork) const { +ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std::shared_ptr& mmapNetwork) const { ze_graph_handle_t graphHandle; if (mmapNetwork->size() == 0) { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index a8938627d56ce7..4b25473ba04e11 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -836,8 +836,12 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c if (compiler->getCompilerType() == ov::intel_npu::CompilerType::DRIVER) { if (auto mmap_buffer = dynamic_cast(stream.rdbuf())) { graph = compiler->parse(mmap_buffer->get_buffer(), localConfig); - goto GRAPH_PARSED; + } else { + auto graphSize = getFileSize(stream); + std::vector blob(graphSize); + graph = compiler->parse(std::make_shared>>>(reinterpret_cast(blob.data()), blob.size(), std::make_shared>(std::move(blob))), localConfig); } + goto GRAPH_PARSED; } { From 9d08a56b5c92757edb3789d4d65377bfb731cb6a Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Wed, 20 Nov 2024 16:49:10 +0200 Subject: [PATCH 12/34] Refactor changes for CIP & Drop `parse` function from `ICompilerAdapter`, so only `std::shared_ptr` parameters will be used --- .../include/intel_npu/common/igraph.hpp | 2 +- .../include/driver_compiler_adapter.hpp | 6 +----- .../include/plugin_compiler_adapter.hpp | 7 ++----- .../compiler_adapter/include/plugin_graph.hpp | 2 +- .../src/driver_compiler_adapter.cpp | 6 +++--- .../src/compiler_adapter/src/driver_graph.cpp | 4 ++-- .../src/plugin_compiler_adapter.cpp | 13 +++++++----- .../src/compiler_adapter/src/plugin_graph.cpp | 4 ++-- .../intel_npu/src/plugin/src/plugin.cpp | 20 ++++--------------- 9 files changed, 24 insertions(+), 40 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index de3298170262df..c7887a504886aa 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -22,7 +22,7 @@ class IGraph : public std::enable_shared_from_this { IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, const Config& config, - std::optional> blob); + std::optional> blobPtr); virtual size_t export_blob(std::ostream& stream) const = 0; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index 7421bf2be1ff9e..b935c9b80f1bce 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -23,11 +23,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(std::vector network, const Config& config) const override { - OPENVINO_THROW("CID should not parse from std::vector anymore!"); - } - - std::shared_ptr parse(const std::shared_ptr& mmapNetwork, const Config& config) const override; + std::shared_ptr parse(const std::shared_ptr& networkSO, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index b9705d12a649cf..3c663a71bbd746 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -10,6 +10,7 @@ #include "intel_npu/icompiler.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_init.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/runtime/so_ptr.hpp" #include "ze_graph_ext_wrappers.hpp" @@ -21,11 +22,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(std::vector network, const Config& config) const override; - - std::shared_ptr parse(const std::shared_ptr& mmapNetwork, const Config& config) const override { - OPENVINO_THROW("CIP needs a blob vector!"); - } + std::shared_ptr parse(const std::shared_ptr& networkSO, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp index 9c88ace1c29d23..0a6da92ab269bd 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp @@ -23,7 +23,7 @@ class PluginGraph final : public IGraph { const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, NetworkMetadata metadata, - std::vector blob, + const std::shared_ptr& blobSO, const Config& config); size_t export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 09e7cc3162d88e..4291e8174982e4 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -203,11 +203,11 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr DriverCompilerAdapter::parse(const std::shared_ptr& mmapNetwork, const Config& config) const { +std::shared_ptr DriverCompilerAdapter::parse(const std::shared_ptr& networkSO, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); _logger.debug("parse start"); - ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(mmapNetwork); + ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(networkSO); _logger.debug("parse end"); OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); @@ -218,7 +218,7 @@ std::shared_ptr DriverCompilerAdapter::parse(const std::shared_ptr>(mmapNetwork)); + std::optional>(networkSO)); } ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index a92d63b00b87e2..73727b807ca1f7 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -15,8 +15,8 @@ DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::optional> blob) - : IGraph(graphHandle, std::move(metadata), config, std::move(blob)), + std::optional> blobPtr) + : IGraph(graphHandle, std::move(metadata), config, std::move(blobPtr)), _zeGraphExt(zeGraphExt), _zeroInitStruct(zeroInitStruct), _logger("DriverGraph", config.get()) { diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 72fab52d6cf895..883e3a2c16e04b 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -80,6 +80,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrcompile(model, config); + auto networkSO = std::make_shared>>>(reinterpret_cast(networkDesc.compiledNetwork.data()), networkDesc.compiledNetwork.size(), std::make_shared>(std::move(networkDesc.compiledNetwork))); _logger.debug("compile end"); ze_graph_handle_t graphHandle = nullptr; @@ -87,7 +88,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphHandle(networkDesc.compiledNetwork); + graphHandle = _zeGraphExt->getGraphHandle(networkSO); } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); @@ -99,21 +100,23 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr PluginCompilerAdapter::parse(std::vector network, const Config& config) const { +std::shared_ptr PluginCompilerAdapter::parse(const std::shared_ptr& networkSO, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse"); _logger.debug("parse start"); + std::vector network(networkSO->size()); + network.assign(reinterpret_cast(networkSO->get_ptr()), reinterpret_cast(networkSO->get_ptr()) + networkSO->size()); auto networkMeta = _compiler->parse(network, config); _logger.debug("parse end"); ze_graph_handle_t graphHandle = nullptr; if (_zeGraphExt) { - graphHandle = _zeGraphExt->getGraphHandle(network); + graphHandle = _zeGraphExt->getGraphHandle(networkSO); } return std::make_shared(_zeGraphExt, @@ -121,7 +124,7 @@ std::shared_ptr PluginCompilerAdapter::parse(std::vector networ _zeroInitStruct, graphHandle, std::move(networkMeta), - std::move(network), + networkSO, config); } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index e08c0fac64719b..dcb5959c0000a0 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -16,9 +16,9 @@ PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, NetworkMetadata metadata, - std::vector blob, + std::unique_ptr blobPtr, const Config& config) - : IGraph(graphHandle, std::move(metadata), config, std::optional>(std::move(blob))), + : IGraph(graphHandle, std::move(metadata), config, std::optional>(std::move(blobPtr))), _zeGraphExt(zeGraphExt), _zeroInitStruct(zeroInitStruct), _compiler(compiler), diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 4b25473ba04e11..1d39925ca0fabd 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -833,30 +833,18 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); std::shared_ptr graph; - if (compiler->getCompilerType() == ov::intel_npu::CompilerType::DRIVER) { - if (auto mmap_buffer = dynamic_cast(stream.rdbuf())) { - graph = compiler->parse(mmap_buffer->get_buffer(), localConfig); - } else { - auto graphSize = getFileSize(stream); - std::vector blob(graphSize); - graph = compiler->parse(std::make_shared>>>(reinterpret_cast(blob.data()), blob.size(), std::make_shared>(std::move(blob))), localConfig); - } - goto GRAPH_PARSED; - } - - { + if (auto mmap_buffer = dynamic_cast(stream.rdbuf())) { + graph = compiler->parse(mmap_buffer->get_buffer(), localConfig); + } else { auto graphSize = getFileSize(stream); - std::vector blob(graphSize); stream.read(reinterpret_cast(blob.data()), graphSize); if (!stream) { OPENVINO_THROW("Failed to read data from stream!"); } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - - graph = compiler->parse(std::move(blob), localConfig); + graph = compiler->parse(std::make_shared>>>(reinterpret_cast(blob.data()), blob.size(), std::make_shared>(std::move(blob))), localConfig); } -GRAPH_PARSED: graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = From 615090afe6169b369e849871365b9853109b828f Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Tue, 19 Nov 2024 16:29:34 +0000 Subject: [PATCH 13/34] Update plugin API to import model with mmap buffer --- .../dev_api/openvino/runtime/iplugin.hpp | 27 +++++++++++++++++++ src/inference/src/cache_manager.hpp | 1 + src/inference/src/dev/iplugin.cpp | 13 +++++++++ src/inference/src/dev/plugin.cpp | 11 ++++++++ src/inference/src/dev/plugin.hpp | 7 +++++ src/plugins/intel_cpu/src/plugin.h | 11 ++++++++ 6 files changed, 70 insertions(+) diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp index a513af9ffa4502..58df9f1d92d640 100644 --- a/src/inference/dev_api/openvino/runtime/iplugin.hpp +++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp @@ -185,6 +185,33 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this& context, const ov::AnyMap& properties) const = 0; + /** + * @brief Creates an compiled model from an previously exported model using plugin implementation + * and removes OpenVINO Runtime magic and plugin name + * @param model Reference to model output stream + * @param weights_buffer AlignedBuffer with cached model + * @param properties A ov::AnyMap of properties + * @return An Compiled model + */ + virtual std::shared_ptr import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const; + + /** + * @brief Creates an compiled model from an previously exported model using plugin implementation + * and removes OpenVINO Runtime magic and plugin name + * @param model Reference to model output stream + * @param weights_buffer AlignedBuffer with cached model + * @param context A pointer to plugin context derived from RemoteContext class used to + * execute the network + * @param properties A ov::AnyMap of properties + * @return An Compiled model + */ + virtual std::shared_ptr import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::SoPtr& context, + const ov::AnyMap& properties) const; + /** * @brief Queries a plugin about supported layers in model * @param model Model object to query. diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp index eb5d92bed1ea39..1f6effa02b0d92 100644 --- a/src/inference/src/cache_manager.hpp +++ b/src/inference/src/cache_manager.hpp @@ -141,6 +141,7 @@ class FileStorageCacheManager final : public ICacheManager { auto mmap = ov::load_mmap_object(blob_file_name); auto shared_buffer = std::make_shared>>(mmap->data(), mmap->size(), mmap); +#if 0 OwningSharedStreamBuffer buf(shared_buffer); std::istream stream(&buf); reader(stream, shared_buffer); diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp index d7dc6ae617eb05..e5f346b6353cfb 100644 --- a/src/inference/src/dev/iplugin.cpp +++ b/src/inference/src/dev/iplugin.cpp @@ -58,6 +58,19 @@ const std::string& ov::IPlugin::get_device_name() const { return m_plugin_name; } +std::shared_ptr ov::IPlugin::import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const{ + OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); +} + +std::shared_ptr ov::IPlugin::import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::SoPtr& context, + const ov::AnyMap& properties) const{ + OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); +} + void ov::IPlugin::set_core(const std::weak_ptr& core) { OPENVINO_ASSERT(!core.expired()); m_core = core; diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp index 9c22b0ae7e77dc..dd7d0267744c9c 100644 --- a/src/inference/src/dev/plugin.cpp +++ b/src/inference/src/dev/plugin.cpp @@ -79,6 +79,17 @@ ov::SoPtr ov::Plugin::import_model(std::istream& model, OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so}); } +ov::SoPtr ov::Plugin::import_model(std::istream& model, std::shared_ptr model_buffer, const ov::AnyMap& properties) const { + OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so}); +} + +ov::SoPtr ov::Plugin::import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::SoPtr& context, + const ov::AnyMap& config) const { + OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so}); +} + ov::SoPtr ov::Plugin::create_context(const AnyMap& params) const { OV_PLUGIN_CALL_STATEMENT({ auto remote = m_ptr->create_context(params); diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp index b6968adda5c695..1d3ed5c4234ff0 100644 --- a/src/inference/src/dev/plugin.hpp +++ b/src/inference/src/dev/plugin.hpp @@ -59,6 +59,13 @@ class Plugin { const ov::SoPtr& context, const ov::AnyMap& config) const; + SoPtr import_model(std::istream& model, std::shared_ptr model_buffer, const ov::AnyMap& properties) const; + + SoPtr import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::SoPtr& context, + const ov::AnyMap& config) const; + ov::SoPtr create_context(const AnyMap& params) const; ov::SoPtr get_default_context(const AnyMap& params) const; diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index b063b70dba1983..5371da052b3077 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -32,6 +32,17 @@ class Plugin : public ov::IPlugin { OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!"); }; + std::shared_ptr import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const override; + std::shared_ptr import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::SoPtr& context, + const ov::AnyMap& properties) const override { + OPENVINO_THROW_NOT_IMPLEMENTED( + "import_model with RemoteContext is not supported by CPU plugin!"); + }; + ov::SupportedOpsMap query_model(const std::shared_ptr& model, const ov::AnyMap& properties) const override; ov::SoPtr create_context(const ov::AnyMap& remote_properties) const override { From cb350a8bd568a0ab2a54ca02ed655cd7a4d37b40 Mon Sep 17 00:00:00 2001 From: Mircea-Aurelian Dan Date: Thu, 21 Nov 2024 22:43:08 +0200 Subject: [PATCH 14/34] Use new `import_model` with `model_buffer` API --- .../intel_npu/src/plugin/include/plugin.hpp | 9 +++++++++ .../intel_npu/src/plugin/src/plugin.cpp | 18 +++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index b13be5000513ec..73db1dce1d8c5f 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -45,6 +45,15 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& stream, const ov::AnyMap& properties) const override; std::shared_ptr import_model(std::istream& stream, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const override; + + std::shared_ptr import_model(std::istream& stream, + const ov::SoPtr& context, + const ov::AnyMap& properties) const override; + + std::shared_ptr import_model(std::istream& stream, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 1d39925ca0fabd..2650ee45f501d9 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -788,6 +788,10 @@ ov::SoPtr Plugin::get_default_context(const ov::AnyMap&) con } std::shared_ptr Plugin::import_model(std::istream& stream, const ov::AnyMap& properties) const { + return import_model(stream, nullptr, properties); +} + +std::shared_ptr Plugin::import_model(std::istream& stream, std::shared_ptr model_buffer, const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); @@ -833,8 +837,8 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); std::shared_ptr graph; - if (auto mmap_buffer = dynamic_cast(stream.rdbuf())) { - graph = compiler->parse(mmap_buffer->get_buffer(), localConfig); + if (model_buffer != nullptr) { + graph = compiler->parse(model_buffer, localConfig); } else { auto graphSize = getFileSize(stream); std::vector blob(graphSize); @@ -865,12 +869,20 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c std::shared_ptr Plugin::import_model(std::istream& stream, const ov::SoPtr& context, const ov::AnyMap& properties) const { + + return import_model(stream, nullptr, context, properties); +} + +std::shared_ptr Plugin::import_model(std::istream& stream, + std::shared_ptr model_buffer, + const ov::SoPtr& context, + const ov::AnyMap& properties) const { auto casted = std::dynamic_pointer_cast(context._ptr); if (casted == nullptr) { OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type"); } - return import_model(stream, properties); + return import_model(stream, model_buffer, properties); } ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& model, From bd736a5c2a0e629129d56563ee80b371fb482f8b Mon Sep 17 00:00:00 2001 From: Mircea-Aurelian Dan Date: Fri, 22 Nov 2024 13:25:39 +0200 Subject: [PATCH 15/34] New fix for adding offset to `model_buffer` relative to end position of OV cache header --- .../dev_api/openvino/runtime/compilation_context.hpp | 7 ++++++- src/inference/src/dev/compilation_context.cpp | 6 +++++- src/inference/src/dev/core_impl.cpp | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/compilation_context.hpp b/src/inference/dev_api/openvino/runtime/compilation_context.hpp index 21bda52882ffec..03a11feaf5facc 100644 --- a/src/inference/dev_api/openvino/runtime/compilation_context.hpp +++ b/src/inference/dev_api/openvino/runtime/compilation_context.hpp @@ -32,9 +32,10 @@ class CompiledBlobHeader final { std::string m_ieVersion; std::string m_fileInfo; std::string m_runtimeInfo; + std::shared_ptr m_model_buffer; public: - CompiledBlobHeader(); + CompiledBlobHeader(std::shared_ptr model_buffer); CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, const std::string& runtimeInfo); const std::string& get_openvino_version() const { @@ -49,6 +50,10 @@ class CompiledBlobHeader final { return m_runtimeInfo; } + const std::shared_ptr get_model_buffer() const { + return m_model_buffer; + } + friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header); friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header); diff --git a/src/inference/src/dev/compilation_context.cpp b/src/inference/src/dev/compilation_context.cpp index bf1a7197826f49..2729249fa85495 100644 --- a/src/inference/src/dev/compilation_context.cpp +++ b/src/inference/src/dev/compilation_context.cpp @@ -156,7 +156,7 @@ std::string ModelCache::compute_hash(const std::string& modelStr, ////////////////////////////////////////////////// -CompiledBlobHeader::CompiledBlobHeader() {} +CompiledBlobHeader::CompiledBlobHeader(std::shared_ptr model_buffer) : m_model_buffer(model_buffer) {} CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, @@ -168,6 +168,10 @@ CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) { std::string xmlStr; std::getline(stream, xmlStr); + auto model_buffer = header.get_model_buffer(); + if (model_buffer != nullptr) { + model_buffer->updateOffset(stream.tellg()); + } pugi::xml_document document; pugi::xml_parse_result res = document.load_string(xmlStr.c_str()); diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 0cad1840e5d1a8..845afe9719293c 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1452,7 +1452,7 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( ov::itt::domains::LoadTime, "Core::load_model_from_cache::ReadStreamAndImport"); try { - ov::CompiledBlobHeader header; + ov::CompiledBlobHeader header(model_buffer); networkStream >> header; if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) { // Original file is changed, don't use cache From a1ef94655a2080b2d340979e76964e14b86b516b Mon Sep 17 00:00:00 2001 From: Mircea-Aurelian Dan Date: Fri, 22 Nov 2024 18:03:39 +0200 Subject: [PATCH 16/34] Fix `std::vector` being moved after accesing its `.data()` and `.size()` functions --- .../src/compiler_adapter/src/plugin_compiler_adapter.cpp | 7 ++++--- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 883e3a2c16e04b..32c38598a9f0d8 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -80,7 +80,8 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrcompile(model, config); - auto networkSO = std::make_shared>>>(reinterpret_cast(networkDesc.compiledNetwork.data()), networkDesc.compiledNetwork.size(), std::make_shared>(std::move(networkDesc.compiledNetwork))); + auto networkSO = std::make_shared>(std::move(networkDesc.compiledNetwork)); + auto networkSOPtr = std::make_shared>>>(reinterpret_cast(networkSO->data()), networkSO->size(), networkSO); _logger.debug("compile end"); ze_graph_handle_t graphHandle = nullptr; @@ -88,7 +89,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphHandle(networkSO); + graphHandle = _zeGraphExt->getGraphHandle(networkSOPtr); } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); @@ -100,7 +101,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr Plugin::import_model(std::istream& stream, s graph = compiler->parse(model_buffer, localConfig); } else { auto graphSize = getFileSize(stream); - std::vector blob(graphSize); - stream.read(reinterpret_cast(blob.data()), graphSize); + auto blobSO = std::make_shared>(graphSize); + stream.read(reinterpret_cast(blobSO->data()), graphSize); if (!stream) { OPENVINO_THROW("Failed to read data from stream!"); } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - graph = compiler->parse(std::make_shared>>>(reinterpret_cast(blob.data()), blob.size(), std::make_shared>(std::move(blob))), localConfig); + graph = compiler->parse(std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, blobSO), localConfig); } graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); From c2221ab5de493654bdbebf01061158da4db18d37 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Mon, 25 Nov 2024 18:21:36 +0200 Subject: [PATCH 17/34] Refactor `getGraphHandle` to drop dependency to `ov::AlignedBuffer` --- .../include/ze_graph_ext_wrappers.hpp | 4 +-- .../src/driver_compiler_adapter.cpp | 2 +- .../src/plugin_compiler_adapter.cpp | 9 +++--- .../src/ze_graph_ext_wrappers.cpp | 31 +++---------------- 4 files changed, 11 insertions(+), 35 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index 31c157c96321a5..f41a0071b5c5d9 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -37,9 +37,7 @@ class ZeGraphExtWrappers { const std::string& buildFlags, const uint32_t& flags) const; - ze_graph_handle_t getGraphHandle(const std::vector& network) const; - - ze_graph_handle_t getGraphHandle(const std::shared_ptr& mmapNetwork) const; + ze_graph_handle_t getGraphHandle(const uint8_t* data, size_t size) const; NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 4291e8174982e4..4b3d1addef49e6 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -207,7 +207,7 @@ std::shared_ptr DriverCompilerAdapter::parse(const std::shared_ptrgetGraphHandle(networkSO); + ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(networkSO->get_ptr()), networkSO->size()); _logger.debug("parse end"); OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 32c38598a9f0d8..51bf744ee0c167 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -81,7 +81,6 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrcompile(model, config); auto networkSO = std::make_shared>(std::move(networkDesc.compiledNetwork)); - auto networkSOPtr = std::make_shared>>>(reinterpret_cast(networkSO->data()), networkSO->size(), networkSO); _logger.debug("compile end"); ze_graph_handle_t graphHandle = nullptr; @@ -89,13 +88,13 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphHandle(networkSOPtr); + graphHandle = _zeGraphExt->getGraphHandle(networkSO->data(), networkSO->size()); } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); } } - + auto networkSOPtr = std::make_shared>>>(reinterpret_cast(networkSO->data()), networkSO->size(), networkSO); return std::make_shared(_zeGraphExt, _compiler, _zeroInitStruct, @@ -112,12 +111,14 @@ std::shared_ptr PluginCompilerAdapter::parse(const std::shared_ptr network(networkSO->size()); network.assign(reinterpret_cast(networkSO->get_ptr()), reinterpret_cast(networkSO->get_ptr()) + networkSO->size()); auto networkMeta = _compiler->parse(network, config); + network.clear(); + network.shrink_to_fit(); _logger.debug("parse end"); ze_graph_handle_t graphHandle = nullptr; if (_zeGraphExt) { - graphHandle = _zeGraphExt->getGraphHandle(networkSO); + graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(networkSO->get_ptr()), networkSO->size()); } return std::make_shared(_zeGraphExt, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index 4044a141b3c343..6c3011802ff925 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -365,18 +365,18 @@ ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(std::pair& network) const { +ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const uint8_t* blobData, size_t blobSize) const { ze_graph_handle_t graphHandle; - if (network.empty()) { + if (blobData == nullptr || blobSize == 0) { OPENVINO_THROW("Empty blob"); } ze_graph_desc_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, nullptr, ZE_GRAPH_FORMAT_NATIVE, - network.size(), - network.data(), + blobSize, + blobData, nullptr}; _logger.debug("getGraphHandle - perform pfnCreate"); @@ -389,29 +389,6 @@ ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std::vector& return graphHandle; } -ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std::shared_ptr& mmapNetwork) const { - ze_graph_handle_t graphHandle; - - if (mmapNetwork->size() == 0) { - OPENVINO_THROW("Empty blob"); - } - - ze_graph_desc_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - nullptr, - ZE_GRAPH_FORMAT_NATIVE, - mmapNetwork->size(), - reinterpret_cast(mmapNetwork->get_ptr()), - nullptr}; - - auto result = _zeroInitStruct->getGraphDdiTable().pfnCreate(_zeroInitStruct->getContext(), - _zeroInitStruct->getDevice(), - &desc, - &graphHandle); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnCreate", result, _zeroInitStruct->getGraphDdiTable()); - - return graphHandle; -} - /** * @brief Extracts the I/O metadata from Level Zero specific structures and converts them into OpenVINO specific * ones. From 8de684c1bef576b5c8979288d3efddc03659ba2a Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Tue, 26 Nov 2024 10:18:46 +0200 Subject: [PATCH 18/34] Refactor `import_model` new API to accept only either `std::istream` or `ov::AlignedBuffer` --- .../dev_api/openvino/runtime/iplugin.hpp | 6 +- src/inference/src/dev/iplugin.cpp | 6 +- src/inference/src/dev/plugin.cpp | 9 +-- src/inference/src/dev/plugin.hpp | 5 +- src/plugins/intel_cpu/src/plugin.cpp | 45 +++++++++++ src/plugins/intel_cpu/src/plugin.h | 6 +- src/plugins/intel_cpu/src/utils/serialize.cpp | 5 +- src/plugins/intel_cpu/src/utils/serialize.hpp | 4 +- .../intel_npu/src/plugin/include/plugin.hpp | 6 +- .../intel_npu/src/plugin/src/plugin.cpp | 79 ++++++++++++++----- 10 files changed, 122 insertions(+), 49 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp index 58df9f1d92d640..8796074297476a 100644 --- a/src/inference/dev_api/openvino/runtime/iplugin.hpp +++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp @@ -193,8 +193,7 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this import_model(std::istream& model, - std::shared_ptr model_buffer, + virtual std::shared_ptr import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const; /** @@ -207,8 +206,7 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this import_model(std::istream& model, - std::shared_ptr model_buffer, + virtual std::shared_ptr import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const; diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp index e5f346b6353cfb..c4c7a3fb5608a4 100644 --- a/src/inference/src/dev/iplugin.cpp +++ b/src/inference/src/dev/iplugin.cpp @@ -58,14 +58,12 @@ const std::string& ov::IPlugin::get_device_name() const { return m_plugin_name; } -std::shared_ptr ov::IPlugin::import_model(std::istream& model, - std::shared_ptr model_buffer, +std::shared_ptr ov::IPlugin::import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const{ OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); } -std::shared_ptr ov::IPlugin::import_model(std::istream& model, - std::shared_ptr model_buffer, +std::shared_ptr ov::IPlugin::import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const{ OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp index dd7d0267744c9c..7980f1dfd26d64 100644 --- a/src/inference/src/dev/plugin.cpp +++ b/src/inference/src/dev/plugin.cpp @@ -79,15 +79,14 @@ ov::SoPtr ov::Plugin::import_model(std::istream& model, OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so}); } -ov::SoPtr ov::Plugin::import_model(std::istream& model, std::shared_ptr model_buffer, const ov::AnyMap& properties) const { - OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so}); +ov::SoPtr ov::Plugin::import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const { + OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model_buffer, properties), m_so}); } -ov::SoPtr ov::Plugin::import_model(std::istream& model, - std::shared_ptr model_buffer, +ov::SoPtr ov::Plugin::import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& config) const { - OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so}); + OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model_buffer, context, config), m_so}); } ov::SoPtr ov::Plugin::create_context(const AnyMap& params) const { diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp index 1d3ed5c4234ff0..4ace7ea3e8016f 100644 --- a/src/inference/src/dev/plugin.hpp +++ b/src/inference/src/dev/plugin.hpp @@ -59,10 +59,9 @@ class Plugin { const ov::SoPtr& context, const ov::AnyMap& config) const; - SoPtr import_model(std::istream& model, std::shared_ptr model_buffer, const ov::AnyMap& properties) const; + SoPtr import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const; - SoPtr import_model(std::istream& model, - std::shared_ptr model_buffer, + SoPtr import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& config) const; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 6194438c928068..91b00168d8533f 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -574,6 +574,51 @@ std::shared_ptr Plugin::import_model(std::istream& model_str ModelDeserializer deserializer( model_stream, + nullptr, + [this](const std::shared_ptr& model, const std::shared_ptr& weights) { + return get_core()->read_model(model, weights); + }, + decrypt, decript_from_string); + + std::shared_ptr model; + deserializer >> model; + + Config conf = engConfig; + Config::ModelType modelType = getModelType(model); + + // check ov::loaded_from_cache property and erase it to avoid exception in readProperties. + auto _config = config; + const auto& it = _config.find(ov::loaded_from_cache.name()); + bool loaded_from_cache = false; + if (it != _config.end()) { + loaded_from_cache = it->second.as(); + _config.erase(it); + } + conf.readProperties(_config, modelType); + + // import config props from caching model + calculate_streams(conf, model, true); + auto compiled_model = std::make_shared(model, shared_from_this(), conf, loaded_from_cache); + return compiled_model; +} + + +std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, + const ov::AnyMap& config) const { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); + + CacheDecrypt decrypt{ codec_xor }; + bool decript_from_string = false; + if (config.count(ov::cache_encryption_callbacks.name())) { + auto encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as(); + decrypt.m_decrypt_str = encryption_callbacks.decrypt; + decript_from_string = true; + } + + std::stringstream empty_model_stream(""); + + ModelDeserializer deserializer( + empty_model_stream, model_buffer, [this](const std::shared_ptr& model, const std::shared_ptr& weights) { return get_core()->read_model(model, weights); diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index 5371da052b3077..aecafd89995b5d 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -32,11 +32,9 @@ class Plugin : public ov::IPlugin { OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!"); }; - std::shared_ptr import_model(std::istream& model, - std::shared_ptr model_buffer, + std::shared_ptr import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::istream& model, - std::shared_ptr model_buffer, + std::shared_ptr import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override { OPENVINO_THROW_NOT_IMPLEMENTED( diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp index 177cc817b8b3ab..1950c0043285ea 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -59,8 +59,7 @@ void ModelDeserializer::operator>>(std::shared_ptr& model) { } } -void ModelDeserializer::process_mmap(std::shared_ptr& model, - const std::shared_ptr& mmemory) { +void ModelDeserializer::process_mmap(const std::shared_ptr& mmemory) { // Note: Don't use seekg with mmaped stream. This may affect the performance of some models. // Get file size before seek content. // Blob from cache may have other header, so need to skip this. @@ -117,7 +116,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, std::shared_ptr model_buf = std::make_shared>>(&((*xml_buff)[0]), hdr.model_size, xml_buff); - model = m_model_builder(model_buf, weights_buf); + auto model = m_model_builder(model_buf, weights_buf); // Set Info pugi::xml_node root = xml_in_out_doc.child("cnndata"); diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp index f88acf57921c50..3d4ac705d8556e 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.hpp +++ b/src/plugins/intel_cpu/src/utils/serialize.hpp @@ -33,7 +33,7 @@ class ModelDeserializer { const std::shared_ptr&)> ModelBuilder; - ModelDeserializer(std::istream& model, + ModelDeserializer(std::istream& model_stream, std::shared_ptr model_buffer, ModelBuilder fn, const CacheDecrypt& encrypt_fn, @@ -46,7 +46,7 @@ class ModelDeserializer { protected: static void set_info(pugi::xml_node& root, std::shared_ptr& model); - void process_mmap(std::shared_ptr& model, const std::shared_ptr& memory); + void process_mmap(const std::shared_ptr& memory); void process_stream(std::shared_ptr& model); diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index 73db1dce1d8c5f..85c5df3afc2cf1 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -44,16 +44,14 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& stream, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::istream& stream, - std::shared_ptr model_buffer, + std::shared_ptr import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const override; std::shared_ptr import_model(std::istream& stream, const ov::SoPtr& context, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::istream& stream, - std::shared_ptr model_buffer, + std::shared_ptr import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 11203be27f04b5..ee064b76ecdfd3 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -788,10 +788,6 @@ ov::SoPtr Plugin::get_default_context(const ov::AnyMap&) con } std::shared_ptr Plugin::import_model(std::istream& stream, const ov::AnyMap& properties) const { - return import_model(stream, nullptr, properties); -} - -std::shared_ptr Plugin::import_model(std::istream& stream, std::shared_ptr model_buffer, const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); @@ -836,19 +832,59 @@ std::shared_ptr Plugin::import_model(std::istream& stream, s CompilerAdapterFactory compilerAdapterFactory; auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); - std::shared_ptr graph; - if (model_buffer != nullptr) { - graph = compiler->parse(model_buffer, localConfig); - } else { - auto graphSize = getFileSize(stream); - auto blobSO = std::make_shared>(graphSize); - stream.read(reinterpret_cast(blobSO->data()), graphSize); - if (!stream) { - OPENVINO_THROW("Failed to read data from stream!"); - } - _logger.debug("Successfully read %zu bytes into blob.", graphSize); - graph = compiler->parse(std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, blobSO), localConfig); + auto graphSize = getFileSize(stream); + auto blobSO = std::make_shared>(graphSize); + stream.read(reinterpret_cast(blobSO->data()), graphSize); + if (!stream) { + OPENVINO_THROW("Failed to read data from stream!"); } + _logger.debug("Successfully read %zu bytes into blob.", graphSize); + + auto graph = compiler->parse(std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, blobSO), localConfig); + graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); + + const std::shared_ptr modelDummy = + create_dummy_model(graph->get_metadata().inputs, graph->get_metadata().outputs); + + compiledModel = std::make_shared(modelDummy, shared_from_this(), device, graph, localConfig); + } catch (const std::exception& ex) { + OPENVINO_THROW("Can't import network: ", ex.what()); + } catch (...) { + OPENVINO_THROW("NPU import_model got unexpected exception from CompiledModel"); + } + + OV_ITT_TASK_SKIP(PLUGIN_IMPORT_MODEL); + + return compiledModel; +} + +std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const { + OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); + OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); + + const std::map propertiesMap = any_copy(properties); + auto localConfig = merge_configs(_globalConfig, propertiesMap, OptionMode::RunTime); + _logger.setLevel(localConfig.get()); + const auto platform = _backends->getCompilationPlatform(localConfig.get(), localConfig.get()); + localConfig.update({{ov::intel_npu::platform.name(), platform}}); + auto device = _backends->getDevice(localConfig.get()); + + set_batch_config(_backends->isBatchingSupported(), localConfig); + + const auto loadedFromCache = localConfig.get(); + if (!loadedFromCache) { + _logger.warning( + "The usage of a compiled model can lead to undefined behavior. Please use OpenVINO IR instead!"); + } + + OV_ITT_TASK_NEXT(PLUGIN_IMPORT_MODEL, "parse"); + + std::shared_ptr compiledModel; + + try { + auto compiler = getCompiler(localConfig); + + auto graph = compiler->parse(model_buffer, localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -869,12 +905,15 @@ std::shared_ptr Plugin::import_model(std::istream& stream, s std::shared_ptr Plugin::import_model(std::istream& stream, const ov::SoPtr& context, const ov::AnyMap& properties) const { + auto casted = std::dynamic_pointer_cast(context._ptr); + if (casted == nullptr) { + OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type"); + } - return import_model(stream, nullptr, context, properties); + return import_model(stream, context, properties); } -std::shared_ptr Plugin::import_model(std::istream& stream, - std::shared_ptr model_buffer, +std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const { auto casted = std::dynamic_pointer_cast(context._ptr); @@ -882,7 +921,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type"); } - return import_model(stream, model_buffer, properties); + return import_model(model_buffer, properties); } ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& model, From 28f0d0705b95eaa53f2ca4f2f1e3308155eaba14 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Tue, 26 Nov 2024 15:54:48 +0200 Subject: [PATCH 19/34] Re-add `DriverGraph::release_blob` method and adapt to `ov::AlignedBuffer` (no release for mmap shared object) --- .../include/driver_compiler_adapter.hpp | 2 +- .../compiler_adapter/include/driver_graph.hpp | 6 +++++ .../include/plugin_compiler_adapter.hpp | 2 +- .../compiler_adapter/include/plugin_graph.hpp | 2 +- .../src/driver_compiler_adapter.cpp | 6 ++--- .../src/compiler_adapter/src/driver_graph.cpp | 23 ++++++++----------- .../src/plugin_compiler_adapter.cpp | 10 ++++---- .../intel_npu/src/plugin/src/plugin.cpp | 3 ++- 8 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index b935c9b80f1bce..bb916ae0e8a4e8 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -23,7 +23,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(const std::shared_ptr& networkSO, const Config& config) const override; + std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index bca96067dac6b1..6bfc9ac04c8b0e 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -36,10 +36,16 @@ class DriverGraph final : public IGraph { ~DriverGraph() override; private: + bool release_blob(const Config& config); + std::shared_ptr _zeGraphExt; std::shared_ptr _zeroInitStruct; Logger _logger; + + // In the case of the import path, the blob is released after graph initialization so it can not be any longer + // exported + bool _blobIsReleased = false; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index 3c663a71bbd746..f27c02fac54296 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -22,7 +22,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(const std::shared_ptr& networkSO, const Config& config) const override; + std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp index 0a6da92ab269bd..1ddc4a81ec7267 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp @@ -23,7 +23,7 @@ class PluginGraph final : public IGraph { const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, NetworkMetadata metadata, - const std::shared_ptr& blobSO, + std::shared_ptr blobSOPtr, const Config& config); size_t export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 4b3d1addef49e6..2679a87927c3bd 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -203,11 +203,11 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr DriverCompilerAdapter::parse(const std::shared_ptr& networkSO, const Config& config) const { +std::shared_ptr DriverCompilerAdapter::parse(std::shared_ptr networkSOPtr, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); _logger.debug("parse start"); - ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(networkSO->get_ptr()), networkSO->size()); + ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(networkSOPtr->get_ptr()), networkSOPtr->size()); _logger.debug("parse end"); OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); @@ -218,7 +218,7 @@ std::shared_ptr DriverCompilerAdapter::parse(const std::shared_ptr>(networkSO)); + std::optional>(std::move(networkSOPtr))); } ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index 73727b807ca1f7..02a63316254d3e 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -15,8 +15,8 @@ DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::optional> blobPtr) - : IGraph(graphHandle, std::move(metadata), config, std::move(blobPtr)), + std::optional> blob) + : IGraph(graphHandle, std::move(metadata), config, std::move(blob)), _zeGraphExt(zeGraphExt), _zeroInitStruct(zeroInitStruct), _logger("DriverGraph", config.get()) { @@ -32,15 +32,13 @@ DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, initialize(config); } -size_t DriverGraph::export_blob(std::ostream& stream) { - if (_blob.get() == nullptr) { - const uint8_t* blobPtr = nullptr; - size_t blobSize = -1; - std::shared_ptr> blob; +size_t DriverGraph::export_blob(std::ostream& stream) const { + const uint8_t* blobPtr = nullptr; + size_t blobSize; + std::vector blob; - _zeGraphExt->getGraphBinary(_handle, *blob, blobPtr, blobSize); - _blob = std::make_shared>>>(reinterpret_cast(const_cast(blobPtr)), blobSize, blob); - } + _zeGraphExt->getGraphBinary(_handle, *blob, blobPtr, blobSize); + _blob = std::make_shared>>>(reinterpret_cast(const_cast(blobPtr)), blobSize, blob); stream.write(reinterpret_cast(_blob->get_ptr()), _blob->size()); @@ -60,7 +58,7 @@ size_t DriverGraph::export_blob(std::ostream& stream) { _logger.info(str.str().c_str()); } _logger.info("Write blob to stream successfully."); - return _blob->size(); + return blobSize; } std::vector DriverGraph::process_profiling_output(const std::vector& profData, @@ -121,7 +119,6 @@ void DriverGraph::initialize(const Config& config) { _zeGraphExt->initializeGraph(_handle, config); _logger.debug("Graph initialize finish"); -<<<<<<< HEAD // We are allowed to release the original blob because weights were loaded in NPU memory during // _zeGraphExt->initializeGraph(). The driver will not access the original blob from this moment on, so we are @@ -137,8 +134,6 @@ void DriverGraph::initialize(const Config& config) { _last_submitted_event.resize(number_of_command_lists); } -======= ->>>>>>> 25b5c05976 (Keep `shared_ptr` of blob in IGraph to fix `export_model` for import scenario) } DriverGraph::~DriverGraph() { diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 51bf744ee0c167..95e7edab03cb84 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -104,12 +104,12 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr PluginCompilerAdapter::parse(const std::shared_ptr& networkSO, const Config& config) const { +std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptr networkSOPtr, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse"); _logger.debug("parse start"); - std::vector network(networkSO->size()); - network.assign(reinterpret_cast(networkSO->get_ptr()), reinterpret_cast(networkSO->get_ptr()) + networkSO->size()); + std::vector network(networkSOPtr->size()); + network.assign(reinterpret_cast(networkSOPtr->get_ptr()), reinterpret_cast(networkSOPtr->get_ptr()) + networkSOPtr->size()); auto networkMeta = _compiler->parse(network, config); network.clear(); network.shrink_to_fit(); @@ -118,7 +118,7 @@ std::shared_ptr PluginCompilerAdapter::parse(const std::shared_ptrgetGraphHandle(reinterpret_cast(networkSO->get_ptr()), networkSO->size()); + graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(networkSOPtr->get_ptr()), networkSOPtr->size()); } return std::make_shared(_zeGraphExt, @@ -126,7 +126,7 @@ std::shared_ptr PluginCompilerAdapter::parse(const std::shared_ptr Plugin::import_model(std::istream& stream, c } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - auto graph = compiler->parse(std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, blobSO), localConfig); + auto blobSOPtr = std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, blobSO); + auto graph = compiler->parse(std::move(blobSOPtr), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = From f537364195c46acaf46d773d952fbc17177ef6bf Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Wed, 27 Nov 2024 11:56:07 +0200 Subject: [PATCH 20/34] Code clean-up --- .../intel_npu/src/common/include/intel_npu/common/igraph.hpp | 2 +- .../intel_npu/src/compiler_adapter/include/driver_graph.hpp | 1 - .../src/compiler_adapter/include/plugin_compiler_adapter.hpp | 1 - .../src/compiler_adapter/include/ze_graph_ext_wrappers.hpp | 2 -- src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp | 1 - 5 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index c7887a504886aa..3f2373ed7f616b 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -12,8 +12,8 @@ #include "intel_npu/utils/zero/zero_init.hpp" #include "intel_npu/utils/zero/zero_utils.hpp" #include "intel_npu/utils/zero/zero_wrappers.hpp" -#include "openvino/runtime/aligned_buffer.hpp" #include "openvino/runtime/profiling_info.hpp" +#include "openvino/runtime/shared_buffer.hpp" namespace intel_npu { diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index 6bfc9ac04c8b0e..9344e59d45a70e 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -10,7 +10,6 @@ #include "intel_npu/common/igraph.hpp" #include "intel_npu/utils/zero/zero_init.hpp" -#include "openvino/runtime/shared_buffer.hpp" #include "ze_graph_ext_wrappers.hpp" namespace intel_npu { diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index f27c02fac54296..3271c189bb8e72 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -10,7 +10,6 @@ #include "intel_npu/icompiler.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_init.hpp" -#include "openvino/runtime/shared_buffer.hpp" #include "openvino/runtime/so_ptr.hpp" #include "ze_graph_ext_wrappers.hpp" diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index f41a0071b5c5d9..23ee3f789941eb 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -15,8 +15,6 @@ #include "intel_npu/utils/zero/zero_init.hpp" #include "intel_npu/utils/zero/zero_types.hpp" -#include "openvino/runtime/aligned_buffer.hpp" - namespace intel_npu { using SerializedIR = std::pair>; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index dcb5959c0000a0..07e79fce3b9cbd 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -7,7 +7,6 @@ #include "intel_npu/config/common.hpp" #include "intel_npu/config/runtime.hpp" #include "intel_npu/utils/zero/zero_api.hpp" -#include "openvino/runtime/shared_buffer.hpp" namespace intel_npu { From f3e29dee79af59fbf459b949cc669aa92a872e07 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Wed, 27 Nov 2024 18:24:46 +0200 Subject: [PATCH 21/34] Revert changes in new `import_model` API, so `NPU` plugin will have `unusedStream` when `model_buffer` is given --- .../dev_api/openvino/runtime/iplugin.hpp | 6 ++- src/inference/src/dev/iplugin.cpp | 6 ++- src/inference/src/dev/plugin.cpp | 9 ++-- src/inference/src/dev/plugin.hpp | 5 ++- src/plugins/intel_cpu/src/plugin.cpp | 45 ------------------- src/plugins/intel_cpu/src/plugin.h | 6 ++- src/plugins/intel_cpu/src/utils/serialize.cpp | 5 ++- src/plugins/intel_cpu/src/utils/serialize.hpp | 4 +- 8 files changed, 25 insertions(+), 61 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp index 8796074297476a..58df9f1d92d640 100644 --- a/src/inference/dev_api/openvino/runtime/iplugin.hpp +++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp @@ -193,7 +193,8 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this import_model(std::shared_ptr model_buffer, + virtual std::shared_ptr import_model(std::istream& model, + std::shared_ptr model_buffer, const ov::AnyMap& properties) const; /** @@ -206,7 +207,8 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this import_model(std::shared_ptr model_buffer, + virtual std::shared_ptr import_model(std::istream& model, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const; diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp index c4c7a3fb5608a4..e5f346b6353cfb 100644 --- a/src/inference/src/dev/iplugin.cpp +++ b/src/inference/src/dev/iplugin.cpp @@ -58,12 +58,14 @@ const std::string& ov::IPlugin::get_device_name() const { return m_plugin_name; } -std::shared_ptr ov::IPlugin::import_model(std::shared_ptr model_buffer, +std::shared_ptr ov::IPlugin::import_model(std::istream& model, + std::shared_ptr model_buffer, const ov::AnyMap& properties) const{ OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); } -std::shared_ptr ov::IPlugin::import_model(std::shared_ptr model_buffer, +std::shared_ptr ov::IPlugin::import_model(std::istream& model, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const{ OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp index 7980f1dfd26d64..dd7d0267744c9c 100644 --- a/src/inference/src/dev/plugin.cpp +++ b/src/inference/src/dev/plugin.cpp @@ -79,14 +79,15 @@ ov::SoPtr ov::Plugin::import_model(std::istream& model, OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so}); } -ov::SoPtr ov::Plugin::import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const { - OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model_buffer, properties), m_so}); +ov::SoPtr ov::Plugin::import_model(std::istream& model, std::shared_ptr model_buffer, const ov::AnyMap& properties) const { + OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so}); } -ov::SoPtr ov::Plugin::import_model(std::shared_ptr model_buffer, +ov::SoPtr ov::Plugin::import_model(std::istream& model, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& config) const { - OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model_buffer, context, config), m_so}); + OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so}); } ov::SoPtr ov::Plugin::create_context(const AnyMap& params) const { diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp index 4ace7ea3e8016f..1d3ed5c4234ff0 100644 --- a/src/inference/src/dev/plugin.hpp +++ b/src/inference/src/dev/plugin.hpp @@ -59,9 +59,10 @@ class Plugin { const ov::SoPtr& context, const ov::AnyMap& config) const; - SoPtr import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const; + SoPtr import_model(std::istream& model, std::shared_ptr model_buffer, const ov::AnyMap& properties) const; - SoPtr import_model(std::shared_ptr model_buffer, + SoPtr import_model(std::istream& model, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& config) const; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 91b00168d8533f..6194438c928068 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -574,51 +574,6 @@ std::shared_ptr Plugin::import_model(std::istream& model_str ModelDeserializer deserializer( model_stream, - nullptr, - [this](const std::shared_ptr& model, const std::shared_ptr& weights) { - return get_core()->read_model(model, weights); - }, - decrypt, decript_from_string); - - std::shared_ptr model; - deserializer >> model; - - Config conf = engConfig; - Config::ModelType modelType = getModelType(model); - - // check ov::loaded_from_cache property and erase it to avoid exception in readProperties. - auto _config = config; - const auto& it = _config.find(ov::loaded_from_cache.name()); - bool loaded_from_cache = false; - if (it != _config.end()) { - loaded_from_cache = it->second.as(); - _config.erase(it); - } - conf.readProperties(_config, modelType); - - // import config props from caching model - calculate_streams(conf, model, true); - auto compiled_model = std::make_shared(model, shared_from_this(), conf, loaded_from_cache); - return compiled_model; -} - - -std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, - const ov::AnyMap& config) const { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); - - CacheDecrypt decrypt{ codec_xor }; - bool decript_from_string = false; - if (config.count(ov::cache_encryption_callbacks.name())) { - auto encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as(); - decrypt.m_decrypt_str = encryption_callbacks.decrypt; - decript_from_string = true; - } - - std::stringstream empty_model_stream(""); - - ModelDeserializer deserializer( - empty_model_stream, model_buffer, [this](const std::shared_ptr& model, const std::shared_ptr& weights) { return get_core()->read_model(model, weights); diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index aecafd89995b5d..5371da052b3077 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -32,9 +32,11 @@ class Plugin : public ov::IPlugin { OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!"); }; - std::shared_ptr import_model(std::shared_ptr model_buffer, + std::shared_ptr import_model(std::istream& model, + std::shared_ptr model_buffer, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::shared_ptr model_buffer, + std::shared_ptr import_model(std::istream& model, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override { OPENVINO_THROW_NOT_IMPLEMENTED( diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp index 1950c0043285ea..177cc817b8b3ab 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -59,7 +59,8 @@ void ModelDeserializer::operator>>(std::shared_ptr& model) { } } -void ModelDeserializer::process_mmap(const std::shared_ptr& mmemory) { +void ModelDeserializer::process_mmap(std::shared_ptr& model, + const std::shared_ptr& mmemory) { // Note: Don't use seekg with mmaped stream. This may affect the performance of some models. // Get file size before seek content. // Blob from cache may have other header, so need to skip this. @@ -116,7 +117,7 @@ void ModelDeserializer::process_mmap(const std::shared_ptr& m std::shared_ptr model_buf = std::make_shared>>(&((*xml_buff)[0]), hdr.model_size, xml_buff); - auto model = m_model_builder(model_buf, weights_buf); + model = m_model_builder(model_buf, weights_buf); // Set Info pugi::xml_node root = xml_in_out_doc.child("cnndata"); diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp index 3d4ac705d8556e..f88acf57921c50 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.hpp +++ b/src/plugins/intel_cpu/src/utils/serialize.hpp @@ -33,7 +33,7 @@ class ModelDeserializer { const std::shared_ptr&)> ModelBuilder; - ModelDeserializer(std::istream& model_stream, + ModelDeserializer(std::istream& model, std::shared_ptr model_buffer, ModelBuilder fn, const CacheDecrypt& encrypt_fn, @@ -46,7 +46,7 @@ class ModelDeserializer { protected: static void set_info(pugi::xml_node& root, std::shared_ptr& model); - void process_mmap(const std::shared_ptr& memory); + void process_mmap(std::shared_ptr& model, const std::shared_ptr& memory); void process_stream(std::shared_ptr& model); From 88e80a3a23c4b31a02d7b86eedb0362552ef1ad0 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Wed, 27 Nov 2024 18:29:41 +0200 Subject: [PATCH 22/34] Add `BlobContainer` class and derivates for each `std::vector` and `std::shared_ptr` blob types --- .../include/intel_npu/common/igraph.hpp | 57 +++++++++++++++++++ .../include/driver_compiler_adapter.hpp | 2 +- .../compiler_adapter/include/driver_graph.hpp | 2 +- .../include/plugin_compiler_adapter.hpp | 2 +- .../compiler_adapter/include/plugin_graph.hpp | 2 +- .../src/driver_compiler_adapter.cpp | 6 +- .../src/compiler_adapter/src/driver_graph.cpp | 23 ++++++++ .../src/plugin_compiler_adapter.cpp | 17 +++--- .../src/compiler_adapter/src/plugin_graph.cpp | 6 +- .../intel_npu/src/plugin/include/plugin.hpp | 8 ++- .../intel_npu/src/plugin/src/plugin.cpp | 22 ++++--- 11 files changed, 114 insertions(+), 33 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index 3f2373ed7f616b..677750c1b714dd 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -17,6 +17,63 @@ namespace intel_npu { +class BlobContainer { +public: + virtual void* get_ptr() { + OPENVINO_THROW("const BlobContainer::get_ptr() method is not implemented!"); + } + + virtual size_t size() const { + OPENVINO_THROW("BlobContainer::size() method is not implemented!"); + } + + virtual bool release_from_memory() { + OPENVINO_THROW("BlobContainer::release_from_memory() method is not implemented!"); + } +}; + +class BlobContainerVector : public BlobContainer { +public: + BlobContainerVector(std::vector blob) : _ownershipBlob(std::move(blob)) {} + + void* get_ptr() override { + return reinterpret_cast(_ownershipBlob.data()); + } + + size_t size() const override { + return _ownershipBlob.size(); + } + + bool release_from_memory() override { + _ownershipBlob.clear(); + _ownershipBlob.shrink_to_fit(); + return true; + } + +private: + std::vector _ownershipBlob; +}; + +class BlobContainerAlignedBuffer : public BlobContainer { +public: + BlobContainerAlignedBuffer(const std::shared_ptr& blobSO) : _ownershipBlob(blobSO) {} + + void* get_ptr() override { + return _ownershipBlob->get_ptr(); + } + + size_t size() const override { + return _ownershipBlob->size(); + } + + bool release_from_memory() override { + return false; + } + +private: + std::shared_ptr _ownershipBlob; +}; + class IGraph : public std::enable_shared_from_this { public: IGraph(ze_graph_handle_t handle, diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index bb916ae0e8a4e8..3a2af03df8cead 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -23,7 +23,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const override; + std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index 9344e59d45a70e..b81b8b8679aca5 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -21,7 +21,7 @@ class DriverGraph final : public IGraph { ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::optional> blob); + std::optional> blob); size_t export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index 3271c189bb8e72..c60b80bcfaa314 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -21,7 +21,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const override; + std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp index 1ddc4a81ec7267..61d4a6ed866529 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp @@ -23,7 +23,7 @@ class PluginGraph final : public IGraph { const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, NetworkMetadata metadata, - std::shared_ptr blobSOPtr, + std::unique_ptr blobPtr, const Config& config); size_t export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 2679a87927c3bd..696c3e8b59a32d 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -203,11 +203,11 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr DriverCompilerAdapter::parse(std::shared_ptr networkSOPtr, const Config& config) const { +std::shared_ptr DriverCompilerAdapter::parse(std::unique_ptr blobPtr, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); _logger.debug("parse start"); - ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(networkSOPtr->get_ptr()), networkSOPtr->size()); + ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); _logger.debug("parse end"); OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); @@ -218,7 +218,7 @@ std::shared_ptr DriverCompilerAdapter::parse(std::shared_ptr>(std::move(networkSOPtr))); + std::optional>(std::move(blobPtr))); } ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index 02a63316254d3e..73d26eb0ad851f 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -136,6 +136,29 @@ void DriverGraph::initialize(const Config& config) { } } +bool DriverGraph::release_blob(const Config& config) { + if (_blob == nullptr || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 || + config.get()) { + return false; + } + + ze_graph_properties_2_t properties = {}; + properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; + _zeroInitStruct->getGraphDdiTable().pfnGetProperties2(_handle, &properties); + + if (~properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) { + return false; + } + + if(!_blob->release_from_memory()) { + return false; + } + + _logger.debug("Blob is released"); + + return true; +}; + DriverGraph::~DriverGraph() { if (_handle != nullptr) { auto result = _zeGraphExt->destroyGraph(_handle); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 95e7edab03cb84..d30fc613ecb4c6 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -80,7 +80,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrcompile(model, config); - auto networkSO = std::make_shared>(std::move(networkDesc.compiledNetwork)); + auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); _logger.debug("compile end"); ze_graph_handle_t graphHandle = nullptr; @@ -88,28 +88,27 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphHandle(networkSO->data(), networkSO->size()); + graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); } } - auto networkSOPtr = std::make_shared>>>(reinterpret_cast(networkSO->data()), networkSO->size(), networkSO); return std::make_shared(_zeGraphExt, _compiler, _zeroInitStruct, graphHandle, std::move(networkDesc.metadata), - networkSOPtr, + std::move(blobPtr), config); } -std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptr networkSOPtr, const Config& config) const { +std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptr blobPtr, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse"); _logger.debug("parse start"); - std::vector network(networkSOPtr->size()); - network.assign(reinterpret_cast(networkSOPtr->get_ptr()), reinterpret_cast(networkSOPtr->get_ptr()) + networkSOPtr->size()); + std::vector network(blobPtr->size()); + network.assign(reinterpret_cast(blobPtr->get_ptr()), reinterpret_cast(blobPtr->get_ptr()) + blobPtr->size()); auto networkMeta = _compiler->parse(network, config); network.clear(); network.shrink_to_fit(); @@ -118,7 +117,7 @@ std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptrgetGraphHandle(reinterpret_cast(networkSOPtr->get_ptr()), networkSOPtr->size()); + graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); } return std::make_shared(_zeGraphExt, @@ -126,7 +125,7 @@ std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptr PluginGraph::process_profiling_output(const std::vector& profData, const Config& config) const { - - // Need to fix increased memory usage below, ov::SharedBuffer won't permit us to get underlying shared buffer as it is private - // Only if we work with std::vector blobs, but then IGraph needs to have 2 declarations for the same blob - // Maybe if we templatize blob in IGraph to be either std::vector or std::shared_ptr? std::vector blob(_blob->size()); - blob.assign(reinterpret_cast(_blob->get_ptr()), reinterpret_cast(_blob->get_ptr()) + _blob->size()); + blob.assign(reinterpret_cast(_blob->get_ptr()), reinterpret_cast(_blob->get_ptr()) + _blob->size()); return _compiler->process_profiling_output(profData, blob, config); } diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index 85c5df3afc2cf1..b9da04e070e9c5 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -44,14 +44,16 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& stream, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::shared_ptr model_buffer, - const ov::AnyMap& properties) const override; + std::shared_ptr import_model(std::istream& /* unusedStream */, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const override; std::shared_ptr import_model(std::istream& stream, const ov::SoPtr& context, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::shared_ptr model_buffer, + std::shared_ptr import_model(std::istream& stream, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index c2def3a9555005..5d2e14b207e736 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -833,15 +833,16 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); auto graphSize = getFileSize(stream); - auto blobSO = std::make_shared>(graphSize); - stream.read(reinterpret_cast(blobSO->data()), graphSize); + + std::vector blob(graphSize); + stream.read(reinterpret_cast(blob.data()), graphSize); if (!stream) { OPENVINO_THROW("Failed to read data from stream!"); } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - auto blobSOPtr = std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, blobSO); - auto graph = compiler->parse(std::move(blobSOPtr), localConfig); + auto blobContainerPtr = std::make_unique(std::move(blob)); + auto graph = compiler->parse(std::move(blobContainerPtr), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -859,7 +860,9 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c return compiledModel; } -std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const { +std::shared_ptr Plugin::import_model(std::istream& /* unusedStream */, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); @@ -884,8 +887,8 @@ std::shared_ptr Plugin::import_model(std::shared_ptrparse(model_buffer, localConfig); + auto blobContainerPtr = std::make_unique(model_buffer); + auto graph = compiler->parse(std::move(blobContainerPtr), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -914,7 +917,8 @@ std::shared_ptr Plugin::import_model(std::istream& stream, return import_model(stream, context, properties); } -std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, +std::shared_ptr Plugin::import_model(std::istream& stream, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const { auto casted = std::dynamic_pointer_cast(context._ptr); @@ -922,7 +926,7 @@ std::shared_ptr Plugin::import_model(std::shared_ptr& model, From 379c3107920ee4f56de5203b2891cc580b456ad7 Mon Sep 17 00:00:00 2001 From: Mircea-Aurelian Dan Date: Mon, 9 Dec 2024 09:00:20 +0000 Subject: [PATCH 23/34] Fix clang formats --- src/core/dev_api/openvino/runtime/shared_buffer.hpp | 4 ++-- src/inference/src/dev/compilation_context.cpp | 3 ++- src/inference/src/dev/iplugin.cpp | 4 ++-- src/inference/src/dev/plugin.cpp | 4 +++- src/inference/src/dev/plugin.hpp | 5 +++-- src/plugins/intel_cpu/src/plugin.h | 3 +-- .../src/common/include/intel_npu/common/igraph.hpp | 4 +++- .../src/common/include/intel_npu/common/npu.hpp | 10 ++++++++++ .../compiler_adapter/src/driver_compiler_adapter.cpp | 6 ++++-- .../src/compiler_adapter/src/driver_graph.cpp | 2 +- .../compiler_adapter/src/plugin_compiler_adapter.cpp | 12 ++++++++---- .../src/compiler_adapter/src/plugin_graph.cpp | 12 +++++++++--- .../compiler_adapter/src/ze_graph_ext_wrappers.cpp | 8 ++------ src/plugins/intel_npu/src/plugin/include/metrics.hpp | 3 ++- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 4 ++-- 15 files changed, 54 insertions(+), 30 deletions(-) diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp index fb0fd01403e763..34ac5447ff2a57 100644 --- a/src/core/dev_api/openvino/runtime/shared_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp @@ -95,8 +95,8 @@ class OwningSharedStreamBuffer : public SharedStreamBuffer { } pos_type seekoff(off_type off, - std::ios_base::seekdir dir, - std::ios_base::openmode which = std::ios_base::in) override { + std::ios_base::seekdir dir, + std::ios_base::openmode which = std::ios_base::in) override { auto pos = SharedStreamBuffer::seekoff(off, dir, which); m_shared_obj->updateOffset(m_offset); return pos; diff --git a/src/inference/src/dev/compilation_context.cpp b/src/inference/src/dev/compilation_context.cpp index 2729249fa85495..ff5ec0f1ff1331 100644 --- a/src/inference/src/dev/compilation_context.cpp +++ b/src/inference/src/dev/compilation_context.cpp @@ -156,7 +156,8 @@ std::string ModelCache::compute_hash(const std::string& modelStr, ////////////////////////////////////////////////// -CompiledBlobHeader::CompiledBlobHeader(std::shared_ptr model_buffer) : m_model_buffer(model_buffer) {} +CompiledBlobHeader::CompiledBlobHeader(std::shared_ptr model_buffer) + : m_model_buffer(model_buffer) {} CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp index e5f346b6353cfb..bae79e1d704992 100644 --- a/src/inference/src/dev/iplugin.cpp +++ b/src/inference/src/dev/iplugin.cpp @@ -60,14 +60,14 @@ const std::string& ov::IPlugin::get_device_name() const { std::shared_ptr ov::IPlugin::import_model(std::istream& model, std::shared_ptr model_buffer, - const ov::AnyMap& properties) const{ + const ov::AnyMap& properties) const { OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); } std::shared_ptr ov::IPlugin::import_model(std::istream& model, std::shared_ptr model_buffer, const ov::SoPtr& context, - const ov::AnyMap& properties) const{ + const ov::AnyMap& properties) const { OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); } diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp index dd7d0267744c9c..2e30a94e226f0c 100644 --- a/src/inference/src/dev/plugin.cpp +++ b/src/inference/src/dev/plugin.cpp @@ -79,7 +79,9 @@ ov::SoPtr ov::Plugin::import_model(std::istream& model, OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so}); } -ov::SoPtr ov::Plugin::import_model(std::istream& model, std::shared_ptr model_buffer, const ov::AnyMap& properties) const { +ov::SoPtr ov::Plugin::import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const { OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so}); } diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp index 1d3ed5c4234ff0..8bace286818875 100644 --- a/src/inference/src/dev/plugin.hpp +++ b/src/inference/src/dev/plugin.hpp @@ -59,7 +59,9 @@ class Plugin { const ov::SoPtr& context, const ov::AnyMap& config) const; - SoPtr import_model(std::istream& model, std::shared_ptr model_buffer, const ov::AnyMap& properties) const; + SoPtr import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const; SoPtr import_model(std::istream& model, std::shared_ptr model_buffer, @@ -85,4 +87,3 @@ class Plugin { }; } // namespace ov - diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index 5371da052b3077..6baec7160448c6 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -39,8 +39,7 @@ class Plugin : public ov::IPlugin { std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override { - OPENVINO_THROW_NOT_IMPLEMENTED( - "import_model with RemoteContext is not supported by CPU plugin!"); + OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!"); }; ov::SupportedOpsMap query_model(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index 677750c1b714dd..9e0a67cb5e2be1 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -30,6 +30,8 @@ class BlobContainer { virtual bool release_from_memory() { OPENVINO_THROW("BlobContainer::release_from_memory() method is not implemented!"); } + + virtual ~BlobContainer() = default; }; class BlobContainerVector : public BlobContainer { @@ -147,7 +149,7 @@ class IGraph : public std::enable_shared_from_this { // first inference starts running std::mutex _mutex; - std::shared_ptr _blob; + std::unique_ptr _blob; uint32_t _unique_id = 0; uint32_t _last_submitted_id; diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp index 0b6daf4e71b56e..00d14ead447a36 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp @@ -57,6 +57,16 @@ class IEngineBackend : public std::enable_shared_from_this { //------------------------------------------------------------------------------ +class ICompilerAdapter { +public: + virtual std::shared_ptr compile(const std::shared_ptr& model, + const Config& config) const = 0; + virtual std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const = 0; + virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; + + virtual ~ICompilerAdapter() = default; +}; + //------------------------------------------------------------------------------ class IDevice : public std::enable_shared_from_this { diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 696c3e8b59a32d..bdf728f005cb99 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -203,11 +203,13 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr DriverCompilerAdapter::parse(std::unique_ptr blobPtr, const Config& config) const { +std::shared_ptr DriverCompilerAdapter::parse(std::unique_ptr blobPtr, + const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); _logger.debug("parse start"); - ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); + ze_graph_handle_t graphHandle = + _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); _logger.debug("parse end"); OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index 73d26eb0ad851f..115e26f982e72c 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -150,7 +150,7 @@ bool DriverGraph::release_blob(const Config& config) { return false; } - if(!_blob->release_from_memory()) { + if (!_blob->release_from_memory()) { return false; } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index d30fc613ecb4c6..3223501ba0fd53 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -88,7 +88,8 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); + graphHandle = + _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); @@ -103,12 +104,14 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptr blobPtr, const Config& config) const { +std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptr blobPtr, + const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse"); _logger.debug("parse start"); std::vector network(blobPtr->size()); - network.assign(reinterpret_cast(blobPtr->get_ptr()), reinterpret_cast(blobPtr->get_ptr()) + blobPtr->size()); + network.assign(reinterpret_cast(blobPtr->get_ptr()), + reinterpret_cast(blobPtr->get_ptr()) + blobPtr->size()); auto networkMeta = _compiler->parse(network, config); network.clear(); network.shrink_to_fit(); @@ -117,7 +120,8 @@ std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptrgetGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); + graphHandle = + _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); } return std::make_shared(_zeGraphExt, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index eb91de86ca5a1c..8f87c893067c7e 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -17,7 +17,10 @@ PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, NetworkMetadata metadata, std::unique_ptr blobPtr, const Config& config) - : IGraph(graphHandle, std::move(metadata), config, std::optional>(std::move(blobPtr))), + : IGraph(graphHandle, + std::move(metadata), + config, + std::optional>(std::move(blobPtr))), _zeGraphExt(zeGraphExt), _zeroInitStruct(zeroInitStruct), _compiler(compiler), @@ -40,7 +43,9 @@ size_t PluginGraph::export_blob(std::ostream& stream) { if (_logger.level() >= ov::log::Level::INFO) { std::uint32_t result = 1171117u; - for (const uint8_t* it = reinterpret_cast(_blob->get_ptr()); it != reinterpret_cast(_blob->get_ptr()) + _blob->size(); ++it) { + for (const uint8_t* it = reinterpret_cast(_blob->get_ptr()); + it != reinterpret_cast(_blob->get_ptr()) + _blob->size(); + ++it) { result = ((result << 7) + result) + static_cast(*it); } @@ -55,7 +60,8 @@ size_t PluginGraph::export_blob(std::ostream& stream) { std::vector PluginGraph::process_profiling_output(const std::vector& profData, const Config& config) const { std::vector blob(_blob->size()); - blob.assign(reinterpret_cast(_blob->get_ptr()), reinterpret_cast(_blob->get_ptr()) + _blob->size()); + blob.assign(reinterpret_cast(_blob->get_ptr()), + reinterpret_cast(_blob->get_ptr()) + _blob->size()); return _compiler->process_profiling_output(profData, blob, config); } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index 6c3011802ff925..b6a40573c5f45d 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -372,12 +372,8 @@ ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const uint8_t* blobData, si OPENVINO_THROW("Empty blob"); } - ze_graph_desc_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - nullptr, - ZE_GRAPH_FORMAT_NATIVE, - blobSize, - blobData, - nullptr}; + ze_graph_desc_t desc = + {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, nullptr, ZE_GRAPH_FORMAT_NATIVE, blobSize, blobData, nullptr}; _logger.debug("getGraphHandle - perform pfnCreate"); auto result = _zeroInitStruct->getGraphDdiTable().pfnCreate(_zeroInitStruct->getContext(), diff --git a/src/plugins/intel_npu/src/plugin/include/metrics.hpp b/src/plugins/intel_npu/src/plugin/include/metrics.hpp index d0c35ef43ec15d..357d8b51da475a 100644 --- a/src/plugins/intel_npu/src/plugin/include/metrics.hpp +++ b/src/plugins/intel_npu/src/plugin/include/metrics.hpp @@ -68,7 +68,8 @@ class Metrics final { ov::intel_npu::batch_mode.name(), ov::hint::execution_mode.name()}; - const std::vector _internalSupportedProperties = {ov::internal::caching_properties.name(), ov::internal::caching_with_mmap.name()}; + const std::vector _internalSupportedProperties = {ov::internal::caching_properties.name(), + ov::internal::caching_with_mmap.name()}; // Metric to provide a hint for a range for number of async infer requests. (bottom bound, upper bound, step) const std::tuple _rangeForAsyncInferRequests{1u, 10u, 1u}; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 5d2e14b207e736..053db4d9dc1d7c 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -861,8 +861,8 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c } std::shared_ptr Plugin::import_model(std::istream& /* unusedStream */, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const { + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); From 8060dbbed7e3b9ccc7bcbf0a80a46c424f46ce62 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Wed, 11 Dec 2024 10:37:13 +0200 Subject: [PATCH 24/34] Use alternative from `PR #27981` instead for memory mapped buffers --- .../openvino/runtime/aligned_buffer.hpp | 8 +- .../openvino/runtime/shared_buffer.hpp | 21 ----- .../openvino/runtime/compilation_context.hpp | 7 +- .../dev_api/openvino/runtime/iplugin.hpp | 27 ------ src/inference/src/cache_manager.hpp | 1 - src/inference/src/dev/compilation_context.cpp | 7 +- src/inference/src/dev/core_impl.cpp | 2 +- src/inference/src/dev/iplugin.cpp | 13 --- src/inference/src/dev/plugin.cpp | 13 --- src/inference/src/dev/plugin.hpp | 10 +-- src/plugins/intel_cpu/src/plugin.cpp | 23 +++-- src/plugins/intel_cpu/src/plugin.h | 16 +--- src/plugins/intel_cpu/src/utils/serialize.cpp | 56 +++++------- src/plugins/intel_cpu/src/utils/serialize.hpp | 8 +- .../intel_npu/common/icompiler_adapter.hpp | 2 +- .../include/intel_npu/common/igraph.hpp | 9 +- .../common/include/intel_npu/common/npu.hpp | 10 --- .../intel_npu/src/common/src/igraph.cpp | 6 +- .../src/compiler_adapter/src/driver_graph.cpp | 19 ++-- .../src/plugin_compiler_adapter.cpp | 1 + .../src/compiler_adapter/src/plugin_graph.cpp | 18 ++-- .../intel_npu/src/plugin/include/plugin.hpp | 9 -- .../intel_npu/src/plugin/src/plugin.cpp | 88 +++++-------------- 23 files changed, 99 insertions(+), 275 deletions(-) diff --git a/src/core/dev_api/openvino/runtime/aligned_buffer.hpp b/src/core/dev_api/openvino/runtime/aligned_buffer.hpp index c9344e1cd84911..992d258895f5f4 100644 --- a/src/core/dev_api/openvino/runtime/aligned_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/aligned_buffer.hpp @@ -30,17 +30,14 @@ class OPENVINO_API AlignedBuffer { size_t size() const { return m_byte_size; } - void updateOffset(size_t offset) { - m_offset = offset; - } void* get_ptr(size_t offset) const { return m_aligned_buffer + offset; } void* get_ptr() { - return m_aligned_buffer + m_offset; + return m_aligned_buffer; } const void* get_ptr() const { - return m_aligned_buffer + m_offset; + return m_aligned_buffer; } template T* get_ptr() { @@ -64,7 +61,6 @@ class OPENVINO_API AlignedBuffer { char* m_allocated_buffer; char* m_aligned_buffer; size_t m_byte_size; - size_t m_offset = 0; }; template <> diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp index 34ac5447ff2a57..3ea97db6a1989f 100644 --- a/src/core/dev_api/openvino/runtime/shared_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp @@ -16,7 +16,6 @@ class SharedBuffer : public ov::AlignedBuffer { m_allocated_buffer = data; m_aligned_buffer = data; m_byte_size = size; - m_offset = 0; } virtual ~SharedBuffer() { @@ -82,26 +81,6 @@ class OwningSharedStreamBuffer : public SharedStreamBuffer { return m_shared_obj; } - std::streamsize xsgetn(char* s, std::streamsize count) override { - auto streamSize = SharedStreamBuffer::xsgetn(s, count); - m_shared_obj->updateOffset(m_offset); - return streamSize; - } - - int_type uflow() override { - auto val = SharedStreamBuffer::uflow(); - m_shared_obj->updateOffset(m_offset); - return val; - } - - pos_type seekoff(off_type off, - std::ios_base::seekdir dir, - std::ios_base::openmode which = std::ios_base::in) override { - auto pos = SharedStreamBuffer::seekoff(off, dir, which); - m_shared_obj->updateOffset(m_offset); - return pos; - } - protected: std::shared_ptr m_shared_obj; }; diff --git a/src/inference/dev_api/openvino/runtime/compilation_context.hpp b/src/inference/dev_api/openvino/runtime/compilation_context.hpp index 03a11feaf5facc..21bda52882ffec 100644 --- a/src/inference/dev_api/openvino/runtime/compilation_context.hpp +++ b/src/inference/dev_api/openvino/runtime/compilation_context.hpp @@ -32,10 +32,9 @@ class CompiledBlobHeader final { std::string m_ieVersion; std::string m_fileInfo; std::string m_runtimeInfo; - std::shared_ptr m_model_buffer; public: - CompiledBlobHeader(std::shared_ptr model_buffer); + CompiledBlobHeader(); CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, const std::string& runtimeInfo); const std::string& get_openvino_version() const { @@ -50,10 +49,6 @@ class CompiledBlobHeader final { return m_runtimeInfo; } - const std::shared_ptr get_model_buffer() const { - return m_model_buffer; - } - friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header); friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header); diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp index 58df9f1d92d640..a513af9ffa4502 100644 --- a/src/inference/dev_api/openvino/runtime/iplugin.hpp +++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp @@ -185,33 +185,6 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this& context, const ov::AnyMap& properties) const = 0; - /** - * @brief Creates an compiled model from an previously exported model using plugin implementation - * and removes OpenVINO Runtime magic and plugin name - * @param model Reference to model output stream - * @param weights_buffer AlignedBuffer with cached model - * @param properties A ov::AnyMap of properties - * @return An Compiled model - */ - virtual std::shared_ptr import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const; - - /** - * @brief Creates an compiled model from an previously exported model using plugin implementation - * and removes OpenVINO Runtime magic and plugin name - * @param model Reference to model output stream - * @param weights_buffer AlignedBuffer with cached model - * @param context A pointer to plugin context derived from RemoteContext class used to - * execute the network - * @param properties A ov::AnyMap of properties - * @return An Compiled model - */ - virtual std::shared_ptr import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::SoPtr& context, - const ov::AnyMap& properties) const; - /** * @brief Queries a plugin about supported layers in model * @param model Model object to query. diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp index 1f6effa02b0d92..eb5d92bed1ea39 100644 --- a/src/inference/src/cache_manager.hpp +++ b/src/inference/src/cache_manager.hpp @@ -141,7 +141,6 @@ class FileStorageCacheManager final : public ICacheManager { auto mmap = ov::load_mmap_object(blob_file_name); auto shared_buffer = std::make_shared>>(mmap->data(), mmap->size(), mmap); -#if 0 OwningSharedStreamBuffer buf(shared_buffer); std::istream stream(&buf); reader(stream, shared_buffer); diff --git a/src/inference/src/dev/compilation_context.cpp b/src/inference/src/dev/compilation_context.cpp index ff5ec0f1ff1331..bf1a7197826f49 100644 --- a/src/inference/src/dev/compilation_context.cpp +++ b/src/inference/src/dev/compilation_context.cpp @@ -156,8 +156,7 @@ std::string ModelCache::compute_hash(const std::string& modelStr, ////////////////////////////////////////////////// -CompiledBlobHeader::CompiledBlobHeader(std::shared_ptr model_buffer) - : m_model_buffer(model_buffer) {} +CompiledBlobHeader::CompiledBlobHeader() {} CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, @@ -169,10 +168,6 @@ CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) { std::string xmlStr; std::getline(stream, xmlStr); - auto model_buffer = header.get_model_buffer(); - if (model_buffer != nullptr) { - model_buffer->updateOffset(stream.tellg()); - } pugi::xml_document document; pugi::xml_parse_result res = document.load_string(xmlStr.c_str()); diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 845afe9719293c..0cad1840e5d1a8 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1452,7 +1452,7 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( ov::itt::domains::LoadTime, "Core::load_model_from_cache::ReadStreamAndImport"); try { - ov::CompiledBlobHeader header(model_buffer); + ov::CompiledBlobHeader header; networkStream >> header; if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) { // Original file is changed, don't use cache diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp index bae79e1d704992..d7dc6ae617eb05 100644 --- a/src/inference/src/dev/iplugin.cpp +++ b/src/inference/src/dev/iplugin.cpp @@ -58,19 +58,6 @@ const std::string& ov::IPlugin::get_device_name() const { return m_plugin_name; } -std::shared_ptr ov::IPlugin::import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const { - OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); -} - -std::shared_ptr ov::IPlugin::import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::SoPtr& context, - const ov::AnyMap& properties) const { - OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); -} - void ov::IPlugin::set_core(const std::weak_ptr& core) { OPENVINO_ASSERT(!core.expired()); m_core = core; diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp index 2e30a94e226f0c..9c22b0ae7e77dc 100644 --- a/src/inference/src/dev/plugin.cpp +++ b/src/inference/src/dev/plugin.cpp @@ -79,19 +79,6 @@ ov::SoPtr ov::Plugin::import_model(std::istream& model, OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so}); } -ov::SoPtr ov::Plugin::import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const { - OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so}); -} - -ov::SoPtr ov::Plugin::import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::SoPtr& context, - const ov::AnyMap& config) const { - OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so}); -} - ov::SoPtr ov::Plugin::create_context(const AnyMap& params) const { OV_PLUGIN_CALL_STATEMENT({ auto remote = m_ptr->create_context(params); diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp index 8bace286818875..b6968adda5c695 100644 --- a/src/inference/src/dev/plugin.hpp +++ b/src/inference/src/dev/plugin.hpp @@ -59,15 +59,6 @@ class Plugin { const ov::SoPtr& context, const ov::AnyMap& config) const; - SoPtr import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const; - - SoPtr import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::SoPtr& context, - const ov::AnyMap& config) const; - ov::SoPtr create_context(const AnyMap& params) const; ov::SoPtr get_default_context(const AnyMap& params) const; @@ -87,3 +78,4 @@ class Plugin { }; } // namespace ov + diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 6194438c928068..8d4e72d665f121 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -7,7 +7,6 @@ #include "cpu_streams_calculation.hpp" #include "internal_properties.hpp" #include "itt.h" -#include "openvino/op/paged_attention.hpp" #include "openvino/runtime/intel_cpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/properties.hpp" @@ -20,6 +19,7 @@ #include "utils/precision_support.h" #include "utils/serialize.hpp" #include "weights_cache.hpp" +#include "openvino/op/paged_attention.hpp" #if defined(__linux__) # include @@ -200,7 +200,7 @@ static Config::ModelType getModelType(const std::shared_ptr& model) return Config::ModelType::CNN; if ((op::util::has_op_with_type(model) && model->get_variables().size() > 0) || - op::util::has_op_with_type(model)) + op::util::has_op_with_type(model)) return Config::ModelType::LLM; return Config::ModelType::Unknown; @@ -445,17 +445,15 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio return decltype(ov::supported_properties)::value_type(std::move(supportedProperties)); } else if (ov::internal::supported_properties == name) { - return decltype(ov::internal::supported_properties)::value_type { + return decltype(ov::internal::supported_properties)::value_type{ ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, #if !defined(OPENVINO_ARCH_ARM) && !(defined(__APPLE__) || defined(__MACOSX)) - ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO}, #endif - ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, - ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName { - ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO - } - }; + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), + ov::PropertyMutability::RO}}; } else if (name == ov::device::full_name) { return decltype(ov::device::full_name)::value_type(deviceFullName); } else if (name == ov::available_devices) { @@ -557,7 +555,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); - CacheDecrypt decrypt{codec_xor}; + CacheDecrypt decrypt{ codec_xor }; bool decript_from_string = false; if (config.count(ov::cache_encryption_callbacks.name())) { const auto& encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as(); @@ -578,8 +576,7 @@ std::shared_ptr Plugin::import_model(std::istream& model_str [this](const std::shared_ptr& model, const std::shared_ptr& weights) { return get_core()->read_model(model, weights); }, - decrypt, - decript_from_string); + decrypt, decript_from_string); std::shared_ptr model; deserializer >> model; diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index 6baec7160448c6..916e02a868c974 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -20,7 +20,8 @@ class Plugin : public ov::IPlugin { std::shared_ptr compile_model(const std::shared_ptr& model, const ov::AnyMap& properties, const ov::SoPtr& context) const override { - OPENVINO_THROW_NOT_IMPLEMENTED("compile_model with RemoteContext is not supported by CPU plugin!"); + OPENVINO_THROW_NOT_IMPLEMENTED( + "compile_model with RemoteContext is not supported by CPU plugin!"); }; void set_property(const ov::AnyMap& properties) override; @@ -29,17 +30,8 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& model, const ov::SoPtr& context, const ov::AnyMap& properties) const override { - OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!"); - }; - - std::shared_ptr import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::SoPtr& context, - const ov::AnyMap& properties) const override { - OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!"); + OPENVINO_THROW_NOT_IMPLEMENTED( + "import_model with RemoteContext is not supported by CPU plugin!"); }; ov::SupportedOpsMap query_model(const std::shared_ptr& model, diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp index 177cc817b8b3ab..145067b8e935e4 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -16,8 +16,7 @@ namespace intel_cpu { ////////// ModelSerializer ////////// ModelSerializer::ModelSerializer(std::ostream& ostream, CacheEncrypt encrypt_fn) - : m_ostream(ostream), - m_cache_encrypt(std::move(encrypt_fn)) {} + : m_ostream(ostream), m_cache_encrypt(std::move(encrypt_fn)) {} void ModelSerializer::operator<<(const std::shared_ptr& model) { auto serialize_info = [&](std::ostream& stream) { @@ -47,16 +46,15 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream, } else { m_cache_decrypt.m_decrypt_char = decrypt_fn.m_decrypt_char; } -} -void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) {} + void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) {} -void ModelDeserializer::operator>>(std::shared_ptr& model) { - if (m_model_buffer) { - process_mmap(model, m_model_buffer); - } else { - process_stream(model); - } + void ModelDeserializer::operator>>(std::shared_ptr& model) { + if (m_model_buffer) { + process_mmap(model, m_model_buffer); + } else { + process_stream(model); + } } void ModelDeserializer::process_mmap(std::shared_ptr& model, @@ -83,10 +81,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, // Read model input/output precisions. pugi::xml_document xml_in_out_doc; if (hdr.custom_data_size > 0lu) { - auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, - hdr.custom_data_size, - pugi::parse_default, - pugi::encoding_utf8); + auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, hdr.custom_data_size, pugi::parse_default, pugi::encoding_utf8); if (res.status != pugi::status_ok) { OPENVINO_THROW("[CPU] Could to deserialize custom data."); } @@ -95,10 +90,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, // Map blob content std::shared_ptr weights_buf; if (hdr.consts_size) { - weights_buf = - std::make_shared>>(buffer_base + hdr.consts_offset, - hdr.consts_size, - mmemory); + weights_buf = std::make_shared>>(buffer_base + hdr.consts_offset, hdr.consts_size, mmemory); } // XML content @@ -115,7 +107,9 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, xml_buff->assign(buffer_base + hdr.model_offset, hdr.model_size); } std::shared_ptr model_buf = - std::make_shared>>(&((*xml_buff)[0]), hdr.model_size, xml_buff); + std::make_shared>>(&((*xml_buff)[0]), + hdr.model_size, + xml_buff); model = m_model_builder(model_buf, weights_buf); @@ -160,7 +154,7 @@ void ModelDeserializer::process_stream(std::shared_ptr& model) { auto data_blob = std::make_shared(ov::element::u8, ov::Shape({hdr.consts_size})); m_istream.seekg(hdr.consts_offset); if (hdr.consts_size) { - m_istream.read(static_cast(data_blob->data(ov::element::u8)), hdr.consts_size); + m_istream.read(static_cast(data_blob->data(ov::element::u8)), hdr.consts_size); } // read XML content @@ -172,20 +166,16 @@ void ModelDeserializer::process_stream(std::shared_ptr& model) { if (m_decript_from_string) { *xml_string = m_cache_decrypt.m_decrypt_str(*xml_string); } else { - m_cache_decrypt.m_decrypt_char(const_cast(xml_string->data()), - xml_string->data(), - xml_string->size()); + m_cache_decrypt.m_decrypt_char(const_cast(xml_string->data()), xml_string->data(), xml_string->size()); } } - auto model_buf = - std::make_shared>>(const_cast(xml_string->data()), - xml_string->size(), - xml_string); - auto weights_buf = std::make_shared>>( - reinterpret_cast(data_blob->data(ov::element::u8)), - hdr.consts_size, - data_blob); + auto model_buf = std::make_shared>>(const_cast(xml_string->data()), + xml_string->size(), + xml_string); + auto weights_buf = std::make_shared>>(reinterpret_cast(data_blob->data(ov::element::u8)), + hdr.consts_size, + data_blob); model = m_model_builder(model_buf, weights_buf); @@ -194,5 +184,5 @@ void ModelDeserializer::process_stream(std::shared_ptr& model) { set_info(root, model); } -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp index f88acf57921c50..0d17020fb1cf37 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.hpp +++ b/src/plugins/intel_cpu/src/utils/serialize.hpp @@ -29,9 +29,7 @@ class ModelSerializer { class ModelDeserializer { public: - typedef std::function(const std::shared_ptr&, - const std::shared_ptr&)> - ModelBuilder; + typedef std::function(const std::shared_ptr&, const std::shared_ptr&)> ModelBuilder; ModelDeserializer(std::istream& model, std::shared_ptr model_buffer, @@ -57,5 +55,5 @@ class ModelDeserializer { std::shared_ptr m_model_buffer; }; -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp index bedf0aaeeca966..a86d942627c6b5 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp @@ -12,7 +12,7 @@ class ICompilerAdapter { public: virtual std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const = 0; - virtual std::shared_ptr parse(std::vector network, const Config& config) const = 0; + virtual std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const = 0; virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; virtual uint32_t get_version() const = 0; diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index 9e0a67cb5e2be1..85ccdcf8d8dfc7 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -58,10 +58,12 @@ class BlobContainerVector : public BlobContainer { class BlobContainerAlignedBuffer : public BlobContainer { public: - BlobContainerAlignedBuffer(const std::shared_ptr& blobSO) : _ownershipBlob(blobSO) {} + BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, size_t offset) + : _ownershipBlob(blobSO), + _offset(offset) {} void* get_ptr() override { - return _ownershipBlob->get_ptr(); + return _ownershipBlob->get_ptr(_offset); } size_t size() const override { @@ -74,6 +76,7 @@ class BlobContainerAlignedBuffer : public BlobContainer { private: std::shared_ptr _ownershipBlob; + size_t _offset; }; class IGraph : public std::enable_shared_from_this { @@ -149,7 +152,7 @@ class IGraph : public std::enable_shared_from_this { // first inference starts running std::mutex _mutex; - std::unique_ptr _blob; + std::unique_ptr _blobPtr; uint32_t _unique_id = 0; uint32_t _last_submitted_id; diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp index 00d14ead447a36..0b6daf4e71b56e 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp @@ -57,16 +57,6 @@ class IEngineBackend : public std::enable_shared_from_this { //------------------------------------------------------------------------------ -class ICompilerAdapter { -public: - virtual std::shared_ptr compile(const std::shared_ptr& model, - const Config& config) const = 0; - virtual std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const = 0; - virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; - - virtual ~ICompilerAdapter() = default; -}; - //------------------------------------------------------------------------------ class IDevice : public std::enable_shared_from_this { diff --git a/src/plugins/intel_npu/src/common/src/igraph.cpp b/src/plugins/intel_npu/src/common/src/igraph.cpp index c7b46f2d9b44cb..5dfcdc7ee7466e 100644 --- a/src/plugins/intel_npu/src/common/src/igraph.cpp +++ b/src/plugins/intel_npu/src/common/src/igraph.cpp @@ -17,12 +17,12 @@ namespace intel_npu { IGraph::IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, const Config& config, - std::optional> blob) + std::optional> blobPtr) : _handle(handle), _metadata(std::move(metadata)), _logger("IGraph", config.get()) { - if (blob.has_value()) { - _blob = std::move(*blob); + if (blobPtr.has_value()) { + _blobPtr = std::move(*blobPtr); } } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index 115e26f982e72c..2949343ba4c387 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -34,13 +34,16 @@ DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, size_t DriverGraph::export_blob(std::ostream& stream) const { const uint8_t* blobPtr = nullptr; - size_t blobSize; + size_t blobSize = -1; std::vector blob; - _zeGraphExt->getGraphBinary(_handle, *blob, blobPtr, blobSize); - _blob = std::make_shared>>>(reinterpret_cast(const_cast(blobPtr)), blobSize, blob); + if (_blobIsReleased) { + OPENVINO_THROW("Model was imported (not compiled) by the plugin. Model export is forbidden in this case!"); + } + + _zeGraphExt->getGraphBinary(_handle, blob, blobPtr, blobSize); - stream.write(reinterpret_cast(_blob->get_ptr()), _blob->size()); + stream.write(reinterpret_cast(blobPtr), blobSize); if (!stream) { _logger.error("Write blob to stream failed. Blob is broken!"); @@ -49,12 +52,12 @@ size_t DriverGraph::export_blob(std::ostream& stream) const { if (_logger.level() >= ov::log::Level::INFO) { std::uint32_t result = 1171117u; - for (const uint8_t* it = reinterpret_cast(_blob->get_ptr()); it != reinterpret_cast(_blob->get_ptr()) + _blob->size(); ++it) { + for (const uint8_t* it = blobPtr; it != blobPtr + blobSize; ++it) { result = ((result << 7) + result) + static_cast(*it); } std::stringstream str; - str << "Blob size: " << _blob->size() << ", hash: " << std::hex << result; + str << "Blob size: " << blobSize << ", hash: " << std::hex << result; _logger.info(str.str().c_str()); } _logger.info("Write blob to stream successfully."); @@ -137,7 +140,7 @@ void DriverGraph::initialize(const Config& config) { } bool DriverGraph::release_blob(const Config& config) { - if (_blob == nullptr || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 || + if (_blobPtr == nullptr || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 || config.get()) { return false; } @@ -150,7 +153,7 @@ bool DriverGraph::release_blob(const Config& config) { return false; } - if (!_blob->release_from_memory()) { + if (!_blobPtr->release_from_memory()) { return false; } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 3223501ba0fd53..7b2582a6a30e91 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -95,6 +95,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr(_zeGraphExt, _compiler, _zeroInitStruct, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index 8f87c893067c7e..ec30999232c9d9 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -33,8 +33,8 @@ PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, initialize(config); } -size_t PluginGraph::export_blob(std::ostream& stream) { - stream.write(reinterpret_cast(_blob->get_ptr()), _blob->size()); +void PluginGraph::export_blob(std::ostream& stream) const { + stream.write(reinterpret_cast(_blobPtr->get_ptr()), _blobPtr->size()); if (!stream) { _logger.error("Write blob to stream failed. Blob is broken!"); @@ -43,25 +43,25 @@ size_t PluginGraph::export_blob(std::ostream& stream) { if (_logger.level() >= ov::log::Level::INFO) { std::uint32_t result = 1171117u; - for (const uint8_t* it = reinterpret_cast(_blob->get_ptr()); - it != reinterpret_cast(_blob->get_ptr()) + _blob->size(); + for (const uint8_t* it = reinterpret_cast(_blobPtr->get_ptr()); + it != reinterpret_cast(_blobPtr->get_ptr()) + _blobPtr->size(); ++it) { result = ((result << 7) + result) + static_cast(*it); } std::stringstream str; - str << "Blob size: " << _blob->size() << ", hash: " << std::hex << result; + str << "Blob size: " << _blobPtr->size() << ", hash: " << std::hex << result; _logger.info(str.str().c_str()); } _logger.info("Write blob to stream successfully."); - return _blob->size(); + return _blobPtr->size(); } std::vector PluginGraph::process_profiling_output(const std::vector& profData, const Config& config) const { - std::vector blob(_blob->size()); - blob.assign(reinterpret_cast(_blob->get_ptr()), - reinterpret_cast(_blob->get_ptr()) + _blob->size()); + std::vector blob(_blobPtr->size()); + blob.assign(reinterpret_cast(_blobPtr->get_ptr()), + reinterpret_cast(_blobPtr->get_ptr()) + _blobPtr->size()); return _compiler->process_profiling_output(profData, blob, config); } diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index b9da04e070e9c5..b13be5000513ec 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -44,16 +44,7 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& stream, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::istream& /* unusedStream */, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const override; - - std::shared_ptr import_model(std::istream& stream, - const ov::SoPtr& context, - const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::istream& stream, - std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 053db4d9dc1d7c..3e035c34958269 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -808,6 +808,13 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c npu_plugin_properties.insert(*it); } } + + std::shared_ptr modelBuffer; + if (npu_plugin_properties.count(ov::internal::cached_model_buffer.name())) { + modelBuffer = npu_plugin_properties.at(ov::internal::cached_model_buffer.name()).as>(); + npu_plugin_properties.erase(ov::internal::cached_model_buffer.name()); + } + const std::map propertiesMap = any_copy(npu_plugin_properties); auto localConfig = merge_configs(_globalConfig, propertiesMap, OptionMode::RunTime); @@ -832,63 +839,24 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c CompilerAdapterFactory compilerAdapterFactory; auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); - auto graphSize = getFileSize(stream); - - std::vector blob(graphSize); - stream.read(reinterpret_cast(blob.data()), graphSize); - if (!stream) { - OPENVINO_THROW("Failed to read data from stream!"); - } - _logger.debug("Successfully read %zu bytes into blob.", graphSize); - - auto blobContainerPtr = std::make_unique(std::move(blob)); - auto graph = compiler->parse(std::move(blobContainerPtr), localConfig); - graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); - - const std::shared_ptr modelDummy = - create_dummy_model(graph->get_metadata().inputs, graph->get_metadata().outputs); - - compiledModel = std::make_shared(modelDummy, shared_from_this(), device, graph, localConfig); - } catch (const std::exception& ex) { - OPENVINO_THROW("Can't import network: ", ex.what()); - } catch (...) { - OPENVINO_THROW("NPU import_model got unexpected exception from CompiledModel"); - } - - OV_ITT_TASK_SKIP(PLUGIN_IMPORT_MODEL); - - return compiledModel; -} - -std::shared_ptr Plugin::import_model(std::istream& /* unusedStream */, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const { - OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); - OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); - - const std::map propertiesMap = any_copy(properties); - auto localConfig = merge_configs(_globalConfig, propertiesMap, OptionMode::RunTime); - _logger.setLevel(localConfig.get()); - const auto platform = _backends->getCompilationPlatform(localConfig.get(), localConfig.get()); - localConfig.update({{ov::intel_npu::platform.name(), platform}}); - auto device = _backends->getDevice(localConfig.get()); - - set_batch_config(_backends->isBatchingSupported(), localConfig); + std::unique_ptr blobPtr; - const auto loadedFromCache = localConfig.get(); - if (!loadedFromCache) { - _logger.warning( - "The usage of a compiled model can lead to undefined behavior. Please use OpenVINO IR instead!"); - } + if (modelBuffer == nullptr) { + auto graphSize = getFileSize(stream); - OV_ITT_TASK_NEXT(PLUGIN_IMPORT_MODEL, "parse"); + std::vector blob(graphSize); + stream.read(reinterpret_cast(blob.data()), graphSize); + if (!stream) { + OPENVINO_THROW("Failed to read data from stream!"); + } + _logger.debug("Successfully read %zu bytes into blob.", graphSize); - std::shared_ptr compiledModel; + blobPtr = std::make_unique(std::move(blob)); + } else { + blobPtr = std::make_unique(modelBuffer, stream.tellg()); + } - try { - auto compiler = getCompiler(localConfig); - auto blobContainerPtr = std::make_unique(model_buffer); - auto graph = compiler->parse(std::move(blobContainerPtr), localConfig); + auto graph = compiler->parse(std::move(blobPtr), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -914,19 +882,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type"); } - return import_model(stream, context, properties); -} - -std::shared_ptr Plugin::import_model(std::istream& stream, - std::shared_ptr model_buffer, - const ov::SoPtr& context, - const ov::AnyMap& properties) const { - auto casted = std::dynamic_pointer_cast(context._ptr); - if (casted == nullptr) { - OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type"); - } - - return import_model(stream, model_buffer, properties); + return import_model(stream, properties); } ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& model, From 09204af8045b97ba88d6fcfaf0bf6552f1530432 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Thu, 12 Dec 2024 11:40:40 +0200 Subject: [PATCH 25/34] Add suggested changes --- .../intel_npu/common/blob_container.hpp | 70 +++++++++++++++++++ .../include/intel_npu/common/igraph.hpp | 66 +---------------- .../intel_npu/src/common/src/igraph.cpp | 9 +-- .../compiler_adapter/include/driver_graph.hpp | 2 +- .../include/ze_graph_ext_wrappers.hpp | 2 +- .../src/driver_compiler_adapter.cpp | 6 +- .../src/compiler_adapter/src/driver_graph.cpp | 4 +- .../src/plugin_compiler_adapter.cpp | 4 +- .../src/compiler_adapter/src/plugin_graph.cpp | 5 +- .../src/ze_graph_ext_wrappers.cpp | 6 +- 10 files changed, 88 insertions(+), 86 deletions(-) create mode 100644 src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp new file mode 100644 index 00000000000000..fbceccb26824e0 --- /dev/null +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp @@ -0,0 +1,70 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "openvino/runtime/shared_buffer.hpp" + +namespace intel_npu { + +class BlobContainer { +public: + virtual void* get_ptr() = 0; + + virtual size_t size() const = 0; + + virtual bool release_from_memory() = 0; + + virtual ~BlobContainer() = default; +}; + +class BlobContainerVector : public BlobContainer { +public: + BlobContainerVector(std::vector blob) : _ownershipBlob(std::move(blob)) {} + + void* get_ptr() override { + return reinterpret_cast(_ownershipBlob.data()); + } + + size_t size() const override { + return _ownershipBlob.size(); + } + + bool release_from_memory() override { + _ownershipBlob.clear(); + _ownershipBlob.shrink_to_fit(); + return true; + } + +private: + std::vector _ownershipBlob; +}; + +class BlobContainerAlignedBuffer : public BlobContainer { +public: + BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, size_t offset) + : _ownershipBlob(blobSO), + _offset(offset) {} + + void* get_ptr() override { + return _ownershipBlob->get_ptr(_offset); + } + + size_t size() const override { + return _ownershipBlob->size(); + } + + bool release_from_memory() override { + return false; + } + +private: + std::shared_ptr _ownershipBlob; + size_t _offset; +}; + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index 85ccdcf8d8dfc7..ec4d7091ac6345 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -8,83 +8,21 @@ #include #include +#include "intel_npu/common/blob_container.hpp" #include "intel_npu/network_metadata.hpp" #include "intel_npu/utils/zero/zero_init.hpp" #include "intel_npu/utils/zero/zero_utils.hpp" #include "intel_npu/utils/zero/zero_wrappers.hpp" #include "openvino/runtime/profiling_info.hpp" -#include "openvino/runtime/shared_buffer.hpp" namespace intel_npu { -class BlobContainer { -public: - virtual void* get_ptr() { - OPENVINO_THROW("const BlobContainer::get_ptr() method is not implemented!"); - } - - virtual size_t size() const { - OPENVINO_THROW("BlobContainer::size() method is not implemented!"); - } - - virtual bool release_from_memory() { - OPENVINO_THROW("BlobContainer::release_from_memory() method is not implemented!"); - } - - virtual ~BlobContainer() = default; -}; - -class BlobContainerVector : public BlobContainer { -public: - BlobContainerVector(std::vector blob) : _ownershipBlob(std::move(blob)) {} - - void* get_ptr() override { - return reinterpret_cast(_ownershipBlob.data()); - } - - size_t size() const override { - return _ownershipBlob.size(); - } - - bool release_from_memory() override { - _ownershipBlob.clear(); - _ownershipBlob.shrink_to_fit(); - return true; - } - -private: - std::vector _ownershipBlob; -}; - -class BlobContainerAlignedBuffer : public BlobContainer { -public: - BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, size_t offset) - : _ownershipBlob(blobSO), - _offset(offset) {} - - void* get_ptr() override { - return _ownershipBlob->get_ptr(_offset); - } - - size_t size() const override { - return _ownershipBlob->size(); - } - - bool release_from_memory() override { - return false; - } - -private: - std::shared_ptr _ownershipBlob; - size_t _offset; -}; - class IGraph : public std::enable_shared_from_this { public: IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, const Config& config, - std::optional> blobPtr); + std::unique_ptr blobPtr); virtual size_t export_blob(std::ostream& stream) const = 0; diff --git a/src/plugins/intel_npu/src/common/src/igraph.cpp b/src/plugins/intel_npu/src/common/src/igraph.cpp index 5dfcdc7ee7466e..f641813e44c0e7 100644 --- a/src/plugins/intel_npu/src/common/src/igraph.cpp +++ b/src/plugins/intel_npu/src/common/src/igraph.cpp @@ -17,14 +17,11 @@ namespace intel_npu { IGraph::IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, const Config& config, - std::optional> blobPtr) + std::unique_ptr blobPtr) : _handle(handle), _metadata(std::move(metadata)), - _logger("IGraph", config.get()) { - if (blobPtr.has_value()) { - _blobPtr = std::move(*blobPtr); - } -} + _blobPtr(std::move(blobPtr)), + _logger("IGraph", config.get()) {} const NetworkMetadata& IGraph::get_metadata() const { return _metadata; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index b81b8b8679aca5..f88358cc5021e4 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -21,7 +21,7 @@ class DriverGraph final : public IGraph { ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::optional> blob); + std::unique_ptr blob); size_t export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index 23ee3f789941eb..df538521d856f1 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -35,7 +35,7 @@ class ZeGraphExtWrappers { const std::string& buildFlags, const uint32_t& flags) const; - ze_graph_handle_t getGraphHandle(const uint8_t* data, size_t size) const; + ze_graph_handle_t getGraphHandle(const uint8_t& data, size_t size) const; NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index bdf728f005cb99..1d19854618a237 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -200,7 +200,7 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr DriverCompilerAdapter::parse(std::unique_ptr blobPtr, @@ -209,7 +209,7 @@ std::shared_ptr DriverCompilerAdapter::parse(std::unique_ptrgetGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); + _zeGraphExt->getGraphHandle(*reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); _logger.debug("parse end"); OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); @@ -220,7 +220,7 @@ std::shared_ptr DriverCompilerAdapter::parse(std::unique_ptr>(std::move(blobPtr))); + std::move(blobPtr)); } ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index 2949343ba4c387..1543cc1fdd9f51 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -15,8 +15,8 @@ DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::optional> blob) - : IGraph(graphHandle, std::move(metadata), config, std::move(blob)), + std::unique_ptr blobPtr) + : IGraph(graphHandle, std::move(metadata), config, std::move(blobPtr)), _zeGraphExt(zeGraphExt), _zeroInitStruct(zeroInitStruct), _logger("DriverGraph", config.get()) { diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 7b2582a6a30e91..809e1c88e05a71 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -89,7 +89,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); + _zeGraphExt->getGraphHandle(*reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); @@ -122,7 +122,7 @@ std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptrgetGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); + _zeGraphExt->getGraphHandle(*reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); } return std::make_shared(_zeGraphExt, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index ec30999232c9d9..e4e989009088ec 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -17,10 +17,7 @@ PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, NetworkMetadata metadata, std::unique_ptr blobPtr, const Config& config) - : IGraph(graphHandle, - std::move(metadata), - config, - std::optional>(std::move(blobPtr))), + : IGraph(graphHandle, std::move(metadata), config, std::move(blobPtr)), _zeGraphExt(zeGraphExt), _zeroInitStruct(zeroInitStruct), _compiler(compiler), diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index b6a40573c5f45d..d5e793d4fff9fe 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -365,15 +365,15 @@ ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(std::pairgetGraphDdiTable().pfnCreate(_zeroInitStruct->getContext(), From 9b0bc5b75b41b79e86d2d4b1e2ee935a7488b279 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Thu, 12 Dec 2024 16:37:04 +0200 Subject: [PATCH 26/34] Prepare `BlobContainerAlignedBuffer` for `OV versioning metadata` --- src/plugins/intel_cpu/src/plugin.cpp | 23 ++++---- src/plugins/intel_cpu/src/plugin.h | 6 +- src/plugins/intel_cpu/src/utils/serialize.cpp | 56 +++++++++++-------- src/plugins/intel_cpu/src/utils/serialize.hpp | 8 ++- .../intel_npu/common/blob_container.hpp | 16 ++++-- .../compiler_adapter/include/driver_graph.hpp | 2 +- .../src/compiler_adapter/src/driver_graph.cpp | 2 +- .../src/compiler_adapter/src/plugin_graph.cpp | 2 +- .../intel_npu/src/plugin/src/plugin.cpp | 13 +++-- 9 files changed, 75 insertions(+), 53 deletions(-) diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 8d4e72d665f121..6194438c928068 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -7,6 +7,7 @@ #include "cpu_streams_calculation.hpp" #include "internal_properties.hpp" #include "itt.h" +#include "openvino/op/paged_attention.hpp" #include "openvino/runtime/intel_cpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/properties.hpp" @@ -19,7 +20,6 @@ #include "utils/precision_support.h" #include "utils/serialize.hpp" #include "weights_cache.hpp" -#include "openvino/op/paged_attention.hpp" #if defined(__linux__) # include @@ -200,7 +200,7 @@ static Config::ModelType getModelType(const std::shared_ptr& model) return Config::ModelType::CNN; if ((op::util::has_op_with_type(model) && model->get_variables().size() > 0) || - op::util::has_op_with_type(model)) + op::util::has_op_with_type(model)) return Config::ModelType::LLM; return Config::ModelType::Unknown; @@ -445,15 +445,17 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio return decltype(ov::supported_properties)::value_type(std::move(supportedProperties)); } else if (ov::internal::supported_properties == name) { - return decltype(ov::internal::supported_properties)::value_type{ + return decltype(ov::internal::supported_properties)::value_type { ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, #if !defined(OPENVINO_ARCH_ARM) && !(defined(__APPLE__) || defined(__MACOSX)) - ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO}, #endif - ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, - ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), - ov::PropertyMutability::RO}}; + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, + ov::PropertyName { + ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO + } + }; } else if (name == ov::device::full_name) { return decltype(ov::device::full_name)::value_type(deviceFullName); } else if (name == ov::available_devices) { @@ -555,7 +557,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); - CacheDecrypt decrypt{ codec_xor }; + CacheDecrypt decrypt{codec_xor}; bool decript_from_string = false; if (config.count(ov::cache_encryption_callbacks.name())) { const auto& encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as(); @@ -576,7 +578,8 @@ std::shared_ptr Plugin::import_model(std::istream& model_str [this](const std::shared_ptr& model, const std::shared_ptr& weights) { return get_core()->read_model(model, weights); }, - decrypt, decript_from_string); + decrypt, + decript_from_string); std::shared_ptr model; deserializer >> model; diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index 916e02a868c974..b063b70dba1983 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -20,8 +20,7 @@ class Plugin : public ov::IPlugin { std::shared_ptr compile_model(const std::shared_ptr& model, const ov::AnyMap& properties, const ov::SoPtr& context) const override { - OPENVINO_THROW_NOT_IMPLEMENTED( - "compile_model with RemoteContext is not supported by CPU plugin!"); + OPENVINO_THROW_NOT_IMPLEMENTED("compile_model with RemoteContext is not supported by CPU plugin!"); }; void set_property(const ov::AnyMap& properties) override; @@ -30,8 +29,7 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& model, const ov::SoPtr& context, const ov::AnyMap& properties) const override { - OPENVINO_THROW_NOT_IMPLEMENTED( - "import_model with RemoteContext is not supported by CPU plugin!"); + OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!"); }; ov::SupportedOpsMap query_model(const std::shared_ptr& model, diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp index 145067b8e935e4..177cc817b8b3ab 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -16,7 +16,8 @@ namespace intel_cpu { ////////// ModelSerializer ////////// ModelSerializer::ModelSerializer(std::ostream& ostream, CacheEncrypt encrypt_fn) - : m_ostream(ostream), m_cache_encrypt(std::move(encrypt_fn)) {} + : m_ostream(ostream), + m_cache_encrypt(std::move(encrypt_fn)) {} void ModelSerializer::operator<<(const std::shared_ptr& model) { auto serialize_info = [&](std::ostream& stream) { @@ -46,15 +47,16 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream, } else { m_cache_decrypt.m_decrypt_char = decrypt_fn.m_decrypt_char; } +} - void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) {} +void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) {} - void ModelDeserializer::operator>>(std::shared_ptr& model) { - if (m_model_buffer) { - process_mmap(model, m_model_buffer); - } else { - process_stream(model); - } +void ModelDeserializer::operator>>(std::shared_ptr& model) { + if (m_model_buffer) { + process_mmap(model, m_model_buffer); + } else { + process_stream(model); + } } void ModelDeserializer::process_mmap(std::shared_ptr& model, @@ -81,7 +83,10 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, // Read model input/output precisions. pugi::xml_document xml_in_out_doc; if (hdr.custom_data_size > 0lu) { - auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, hdr.custom_data_size, pugi::parse_default, pugi::encoding_utf8); + auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, + hdr.custom_data_size, + pugi::parse_default, + pugi::encoding_utf8); if (res.status != pugi::status_ok) { OPENVINO_THROW("[CPU] Could to deserialize custom data."); } @@ -90,7 +95,10 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, // Map blob content std::shared_ptr weights_buf; if (hdr.consts_size) { - weights_buf = std::make_shared>>(buffer_base + hdr.consts_offset, hdr.consts_size, mmemory); + weights_buf = + std::make_shared>>(buffer_base + hdr.consts_offset, + hdr.consts_size, + mmemory); } // XML content @@ -107,9 +115,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, xml_buff->assign(buffer_base + hdr.model_offset, hdr.model_size); } std::shared_ptr model_buf = - std::make_shared>>(&((*xml_buff)[0]), - hdr.model_size, - xml_buff); + std::make_shared>>(&((*xml_buff)[0]), hdr.model_size, xml_buff); model = m_model_builder(model_buf, weights_buf); @@ -154,7 +160,7 @@ void ModelDeserializer::process_stream(std::shared_ptr& model) { auto data_blob = std::make_shared(ov::element::u8, ov::Shape({hdr.consts_size})); m_istream.seekg(hdr.consts_offset); if (hdr.consts_size) { - m_istream.read(static_cast(data_blob->data(ov::element::u8)), hdr.consts_size); + m_istream.read(static_cast(data_blob->data(ov::element::u8)), hdr.consts_size); } // read XML content @@ -166,16 +172,20 @@ void ModelDeserializer::process_stream(std::shared_ptr& model) { if (m_decript_from_string) { *xml_string = m_cache_decrypt.m_decrypt_str(*xml_string); } else { - m_cache_decrypt.m_decrypt_char(const_cast(xml_string->data()), xml_string->data(), xml_string->size()); + m_cache_decrypt.m_decrypt_char(const_cast(xml_string->data()), + xml_string->data(), + xml_string->size()); } } - auto model_buf = std::make_shared>>(const_cast(xml_string->data()), - xml_string->size(), - xml_string); - auto weights_buf = std::make_shared>>(reinterpret_cast(data_blob->data(ov::element::u8)), - hdr.consts_size, - data_blob); + auto model_buf = + std::make_shared>>(const_cast(xml_string->data()), + xml_string->size(), + xml_string); + auto weights_buf = std::make_shared>>( + reinterpret_cast(data_blob->data(ov::element::u8)), + hdr.consts_size, + data_blob); model = m_model_builder(model_buf, weights_buf); @@ -184,5 +194,5 @@ void ModelDeserializer::process_stream(std::shared_ptr& model) { set_info(root, model); } -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp index 0d17020fb1cf37..f88acf57921c50 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.hpp +++ b/src/plugins/intel_cpu/src/utils/serialize.hpp @@ -29,7 +29,9 @@ class ModelSerializer { class ModelDeserializer { public: - typedef std::function(const std::shared_ptr&, const std::shared_ptr&)> ModelBuilder; + typedef std::function(const std::shared_ptr&, + const std::shared_ptr&)> + ModelBuilder; ModelDeserializer(std::istream& model, std::shared_ptr model_buffer, @@ -55,5 +57,5 @@ class ModelDeserializer { std::shared_ptr m_model_buffer; }; -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp index fbceccb26824e0..6dcc30c487e46a 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp @@ -46,16 +46,19 @@ class BlobContainerVector : public BlobContainer { class BlobContainerAlignedBuffer : public BlobContainer { public: - BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, size_t offset) - : _ownershipBlob(blobSO), - _offset(offset) {} + BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, + size_t ovHeaderOffset, + uint64_t blobSize) + : _blobSize(blobSize), + _ovHeaderOffset(ovHeaderOffset), + _ownershipBlob(blobSO) {} void* get_ptr() override { - return _ownershipBlob->get_ptr(_offset); + return _ownershipBlob->get_ptr(_ovHeaderOffset); } size_t size() const override { - return _ownershipBlob->size(); + return _blobSize; } bool release_from_memory() override { @@ -63,8 +66,9 @@ class BlobContainerAlignedBuffer : public BlobContainer { } private: + uint64_t _blobSize; + size_t _ovHeaderOffset; std::shared_ptr _ownershipBlob; - size_t _offset; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index f88358cc5021e4..ac89a790291d2e 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -21,7 +21,7 @@ class DriverGraph final : public IGraph { ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::unique_ptr blob); + std::unique_ptr blobPtr); size_t export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index 1543cc1fdd9f51..fb6f86eaf7d44f 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -52,7 +52,7 @@ size_t DriverGraph::export_blob(std::ostream& stream) const { if (_logger.level() >= ov::log::Level::INFO) { std::uint32_t result = 1171117u; - for (const uint8_t* it = blobPtr; it != blobPtr + blobSize; ++it) { + for (const uint8_t* it = blobPtr; it != blobPtr + blobSize; ++it) { result = ((result << 7) + result) + static_cast(*it); } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index e4e989009088ec..726a1196b7c88b 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -30,7 +30,7 @@ PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, initialize(config); } -void PluginGraph::export_blob(std::ostream& stream) const { +size_t PluginGraph::export_blob(std::ostream& stream) const { stream.write(reinterpret_cast(_blobPtr->get_ptr()), _blobPtr->size()); if (!stream) { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 3e035c34958269..9e2a4e2a56950f 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -811,7 +811,8 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c std::shared_ptr modelBuffer; if (npu_plugin_properties.count(ov::internal::cached_model_buffer.name())) { - modelBuffer = npu_plugin_properties.at(ov::internal::cached_model_buffer.name()).as>(); + modelBuffer = + npu_plugin_properties.at(ov::internal::cached_model_buffer.name()).as>(); npu_plugin_properties.erase(ov::internal::cached_model_buffer.name()); } @@ -839,11 +840,15 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c CompilerAdapterFactory compilerAdapterFactory; auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); + auto storedMeta = read_metadata_from(stream); + if (!storedMeta->is_compatible()) { + OPENVINO_THROW("Incompatible blob version!"); + } + std::unique_ptr blobPtr; + auto graphSize = storedMeta->get_blob_size(); if (modelBuffer == nullptr) { - auto graphSize = getFileSize(stream); - std::vector blob(graphSize); stream.read(reinterpret_cast(blob.data()), graphSize); if (!stream) { @@ -853,7 +858,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c blobPtr = std::make_unique(std::move(blob)); } else { - blobPtr = std::make_unique(modelBuffer, stream.tellg()); + blobPtr = std::make_unique(modelBuffer, stream.tellg(), graphSize); } auto graph = compiler->parse(std::move(blobPtr), localConfig); From fe97c1265eb42845210d5777aa2aeb1c9b94eba0 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Tue, 14 Jan 2025 12:52:27 +0200 Subject: [PATCH 27/34] Fix broken stream processed by NPUW --- .../src/common/include/intel_npu/common/blob_container.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp index 6dcc30c487e46a..fa6f0cead53e50 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2024 Intel Corporation +// Copyright (C) 2018-2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // From 1e9a6713cf8926e6b70723c3741f3d3d4ad968ac Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Tue, 14 Jan 2025 18:25:04 +0200 Subject: [PATCH 28/34] Fix offsets mismatch for HETERO plugin blob headers --- .../src/compiler_adapter/src/driver_graph.cpp | 2 +- .../intel_npu/src/plugin/include/metadata.hpp | 52 ------------------- .../intel_npu/src/plugin/src/plugin.cpp | 2 +- .../tests/unit/npu/metadata_version.cpp | 41 --------------- 4 files changed, 2 insertions(+), 95 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index fb6f86eaf7d44f..48ae84a6c841ea 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -34,7 +34,7 @@ DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, size_t DriverGraph::export_blob(std::ostream& stream) const { const uint8_t* blobPtr = nullptr; - size_t blobSize = -1; + size_t blobSize; std::vector blob; if (_blobIsReleased) { diff --git a/src/plugins/intel_npu/src/plugin/include/metadata.hpp b/src/plugins/intel_npu/src/plugin/include/metadata.hpp index d2be257c02bb8f..f4ae25e84c9136 100644 --- a/src/plugins/intel_npu/src/plugin/include/metadata.hpp +++ b/src/plugins/intel_npu/src/plugin/include/metadata.hpp @@ -64,58 +64,6 @@ struct MetadataBase { } }; -struct MetadataBase { -protected: - uint32_t _version; - -public: - MetadataBase(uint32_t version) : _version(version) {} - - /** - * @brief Reads metadata from a stream. - */ - virtual void read(std::istream& stream) = 0; - - /** - * @brief Writes metadata to a stream. - */ - virtual void write(std::ostream& stream) = 0; - - virtual bool is_compatible() = 0; - - virtual uint64_t get_blob_size() const = 0; - - virtual ~MetadataBase() = default; - - /** - * @brief Returns a uint32_t value which represents two uint16_t values concatenated. - * @details Convention for bumping the metadata version: - * - Increment Major in case of: removing a current field OR adding a new field in between fields. - * - Increment Minor in case of: adding a new field at the end. - * - * @return Major and minor versions concatenated into a single uint32_t value. - */ - static constexpr uint32_t make_version(uint16_t major, uint16_t minor) { - return major << 16 | (minor & 0x0000ffff); - } - - /** - * @brief Gets the major version. - * @return Major version. - */ - static constexpr uint16_t get_major(uint32_t version) { - return static_cast(version >> 16); - } - - /** - * @brief Gets the minor version. - * @return Minor version. - */ - static constexpr uint16_t get_minor(uint32_t version) { - return static_cast(version); - } -}; - /** * @brief Magic bytes used for identifying NPU blobs. */ diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 9e2a4e2a56950f..fcef9b6a12a563 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -816,7 +816,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c npu_plugin_properties.erase(ov::internal::cached_model_buffer.name()); } - const std::map propertiesMap = any_copy(npu_plugin_properties); + const auto propertiesMap = any_copy(npu_plugin_properties); auto localConfig = merge_configs(_globalConfig, propertiesMap, OptionMode::RunTime); _logger.setLevel(localConfig.get()); diff --git a/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp b/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp index 9bd2193db3f6ef..0c94a1e5334b36 100644 --- a/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp +++ b/src/plugins/intel_npu/tests/unit/npu/metadata_version.cpp @@ -199,44 +199,3 @@ TEST_F(MetadataUnitTests, writeAndReadMetadataWithRemovedField) { std::unique_ptr storedMeta; EXPECT_ANY_THROW(storedMeta = read_metadata_from(stream)); } - -struct MetadataVersionTestFixture : Metadata, ::testing::TestWithParam { -public: - std::stringstream blob; - - void set_version(uint32_t newVersion) { - _version = newVersion; - } - - MetadataVersionTestFixture() : Metadata(0, std::nullopt) {} - - MetadataVersionTestFixture(uint64_t blobSize, std::optional ovVersion) - : Metadata(blobSize, ovVersion) {} - - void TestBody() override {} -}; - -TEST_P(MetadataVersionTestFixture, readInvalidMetadataVersion) { - auto dummyMeta = MetadataVersionTestFixture(0, ov::get_openvino_version().buildNumber); - auto metaVersion = GetParam(); - - dummyMeta.set_version(metaVersion); - - OV_ASSERT_NO_THROW(dummyMeta.write(blob)); - - ASSERT_ANY_THROW(read_metadata_from(blob)); -} - -constexpr uint16_t currentMajor = get_major(CURRENT_METADATA_VERSION), - currentMinor = get_minor(CURRENT_METADATA_VERSION); - -INSTANTIATE_TEST_CASE_P(MetadataUnitTests, - MetadataVersionTestFixture, - ::testing::Values(make_version(currentMajor, currentMinor + 1), - make_version(currentMajor, currentMinor - 1), - make_version(currentMajor + 1, currentMinor), - make_version(currentMajor + 1, currentMinor + 1), - make_version(currentMajor + 1, currentMinor - 1), - make_version(currentMajor - 1, currentMinor), - make_version(currentMajor - 1, currentMinor + 1), - make_version(currentMajor - 1, currentMinor - 1))); From 1d3880531730a4894bef7da9fb6b3de4a326ab71 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Fri, 17 Jan 2025 15:32:28 +0200 Subject: [PATCH 29/34] Optimize CIP path --- .../include/intel_npu/common/blob_container.hpp | 12 ++++++++++++ .../compiler_adapter/src/plugin_compiler_adapter.cpp | 8 ++------ .../src/compiler_adapter/src/plugin_graph.cpp | 4 +--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp index fa6f0cead53e50..8cb6fe8b2c44db 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp @@ -19,6 +19,8 @@ class BlobContainer { virtual bool release_from_memory() = 0; + virtual std::vector get_ownership_blob() = 0; + virtual ~BlobContainer() = default; }; @@ -40,6 +42,10 @@ class BlobContainerVector : public BlobContainer { return true; } + std::vector get_ownership_blob() override { + return _ownershipBlob; + } + private: std::vector _ownershipBlob; }; @@ -65,6 +71,12 @@ class BlobContainerAlignedBuffer : public BlobContainer { return false; } + std::vector get_ownership_blob() override { + std::vector blob(_blobSize); + blob.assign(reinterpret_cast(this->get_ptr()), reinterpret_cast(this->get_ptr()) + this->size()); + return blob; + } + private: uint64_t _blobSize; size_t _ovHeaderOffset; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 809e1c88e05a71..d10d2e194449aa 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -110,12 +110,8 @@ std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptr network(blobPtr->size()); - network.assign(reinterpret_cast(blobPtr->get_ptr()), - reinterpret_cast(blobPtr->get_ptr()) + blobPtr->size()); - auto networkMeta = _compiler->parse(network, config); - network.clear(); - network.shrink_to_fit(); + const auto& blob = blobPtr->get_ownership_blob(); + auto networkMeta = _compiler->parse(blob, config); _logger.debug("parse end"); ze_graph_handle_t graphHandle = nullptr; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index 726a1196b7c88b..8546f04a84ae95 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -56,9 +56,7 @@ size_t PluginGraph::export_blob(std::ostream& stream) const { std::vector PluginGraph::process_profiling_output(const std::vector& profData, const Config& config) const { - std::vector blob(_blobPtr->size()); - blob.assign(reinterpret_cast(_blobPtr->get_ptr()), - reinterpret_cast(_blobPtr->get_ptr()) + _blobPtr->size()); + const auto& blob = _blobPtr->get_ownership_blob(); return _compiler->process_profiling_output(profData, blob, config); } From e26d470710842d1bf346259d88e772c236a85590 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Fri, 17 Jan 2025 22:14:36 +0200 Subject: [PATCH 30/34] Add fix for new CIP optimization --- .../intel_npu/common/blob_container.hpp | 76 ++++++++++--------- .../src/plugin_compiler_adapter.cpp | 5 +- .../src/compiler_adapter/src/plugin_graph.cpp | 11 ++- .../intel_npu/src/plugin/src/plugin.cpp | 6 +- 4 files changed, 55 insertions(+), 43 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp index 8cb6fe8b2c44db..b347b457fc7e5e 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp @@ -13,41 +13,42 @@ namespace intel_npu { class BlobContainer { public: - virtual void* get_ptr() = 0; + BlobContainer() = default; - virtual size_t size() const = 0; + BlobContainer(std::vector blob) : _blob(std::move(blob)) {} - virtual bool release_from_memory() = 0; - - virtual std::vector get_ownership_blob() = 0; - - virtual ~BlobContainer() = default; -}; - -class BlobContainerVector : public BlobContainer { -public: - BlobContainerVector(std::vector blob) : _ownershipBlob(std::move(blob)) {} - - void* get_ptr() override { - return reinterpret_cast(_ownershipBlob.data()); + virtual const void* get_ptr() const { + return _blob.data(); } - size_t size() const override { - return _ownershipBlob.size(); + virtual size_t size() const { + return _blob.size(); } - bool release_from_memory() override { - _ownershipBlob.clear(); - _ownershipBlob.shrink_to_fit(); - return true; + virtual bool release_from_memory() const { + if (_shouldDeallocate) { + _blob.clear(); + _blob.shrink_to_fit(); + return true; + } + _shouldDeallocate = true; + return false; } - std::vector get_ownership_blob() override { - return _ownershipBlob; + virtual const std::vector& get_blob() const { + // when unerlying blob object was accessed, + // prevent deallocation on next `release_from_memory` call + _shouldDeallocate = false; + return _blob; } + virtual ~BlobContainer() = default; + +protected: + mutable std::vector _blob; + private: - std::vector _ownershipBlob; + mutable bool _shouldDeallocate = true; }; class BlobContainerAlignedBuffer : public BlobContainer { @@ -55,32 +56,35 @@ class BlobContainerAlignedBuffer : public BlobContainer { BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, size_t ovHeaderOffset, uint64_t blobSize) - : _blobSize(blobSize), + : _size(blobSize), _ovHeaderOffset(ovHeaderOffset), - _ownershipBlob(blobSO) {} + _blobSO(blobSO) {} - void* get_ptr() override { - return _ownershipBlob->get_ptr(_ovHeaderOffset); + const void* get_ptr() const override { + return _blobSO->get_ptr(_ovHeaderOffset); } size_t size() const override { - return _blobSize; + return _size; } - bool release_from_memory() override { + bool release_from_memory() const override { + BlobContainer::release_from_memory(); return false; } - std::vector get_ownership_blob() override { - std::vector blob(_blobSize); - blob.assign(reinterpret_cast(this->get_ptr()), reinterpret_cast(this->get_ptr()) + this->size()); - return blob; + const std::vector& get_blob() const override { + BlobContainer::release_from_memory(); + _blob.resize(_size); + _blob.assign(reinterpret_cast(this->get_ptr()), + reinterpret_cast(this->get_ptr()) + _size); + return _blob; } private: - uint64_t _blobSize; + uint64_t _size; size_t _ovHeaderOffset; - std::shared_ptr _ownershipBlob; + std::shared_ptr _blobSO; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index d10d2e194449aa..6f728ed5271678 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -80,7 +80,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrcompile(model, config); - auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); + auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); _logger.debug("compile end"); ze_graph_handle_t graphHandle = nullptr; @@ -110,8 +110,9 @@ std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptrget_ownership_blob(); + const auto& blob = blobPtr->get_blob(); auto networkMeta = _compiler->parse(blob, config); + blobPtr->release_from_memory(); _logger.debug("parse end"); ze_graph_handle_t graphHandle = nullptr; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index 8546f04a84ae95..b1e244db60d988 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -56,8 +56,15 @@ size_t PluginGraph::export_blob(std::ostream& stream) const { std::vector PluginGraph::process_profiling_output(const std::vector& profData, const Config& config) const { - const auto& blob = _blobPtr->get_ownership_blob(); - return _compiler->process_profiling_output(profData, blob, config); + std::vector profilingInfo; + const auto& blob = _blobPtr->get_blob(); + try { + profilingInfo = _compiler->process_profiling_output(profData, blob, config); + } catch (const std::exception& ex) { + _logger.error(ex.what()); + } + _blobPtr->release_from_memory(); + return profilingInfo; } void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index fcef9b6a12a563..58447afe0a767e 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -686,13 +686,13 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< auto localConfig = merge_configs(_globalConfig, localPropertiesMap); update_log_level(localPropertiesMap); - const auto set_cache_dir = localConfig.get(); + /* const auto set_cache_dir = localConfig.get(); if (!set_cache_dir.empty()) { const auto compilerType = localConfig.get(); if (compilerType == ov::intel_npu::CompilerType::MLIR) { OPENVINO_THROW("Option 'CACHE_DIR' is not supported with MLIR compiler type"); } - } + } */ const auto platform = _backends->getCompilationPlatform(localConfig.get(), localConfig.get()); auto device = _backends->getDevice(localConfig.get()); @@ -856,7 +856,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - blobPtr = std::make_unique(std::move(blob)); + blobPtr = std::make_unique(std::move(blob)); } else { blobPtr = std::make_unique(modelBuffer, stream.tellg(), graphSize); } From c647071290cd3c5a40d4e3aa8a89d170aa6ac5ec Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Sun, 19 Jan 2025 23:21:53 +0200 Subject: [PATCH 31/34] Add unit tests --- .../include/intel_npu/common/igraph.hpp | 1 + .../intel_npu/src/common/src/igraph.cpp | 4 + .../intel_npu/tests/unit/CMakeLists.txt | 2 + .../tests/unit/npu/blob_container.cpp | 195 ++++++++++++++++++ 4 files changed, 202 insertions(+) create mode 100644 src/plugins/intel_npu/tests/unit/npu/blob_container.cpp diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index ec4d7091ac6345..4f2583c4f92419 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -36,6 +36,7 @@ class IGraph : public std::enable_shared_from_this { virtual ~IGraph() = default; const NetworkMetadata& get_metadata() const; + const BlobContainer& get_blob_container() const; ze_graph_handle_t get_handle() const; void update_network_name(std::string_view name); diff --git a/src/plugins/intel_npu/src/common/src/igraph.cpp b/src/plugins/intel_npu/src/common/src/igraph.cpp index f641813e44c0e7..5552d77d2c0fe5 100644 --- a/src/plugins/intel_npu/src/common/src/igraph.cpp +++ b/src/plugins/intel_npu/src/common/src/igraph.cpp @@ -27,6 +27,10 @@ const NetworkMetadata& IGraph::get_metadata() const { return _metadata; } +const BlobContainer& IGraph::get_blob_container() const { + return *_blobPtr; +} + ze_graph_handle_t IGraph::get_handle() const { return _handle; } diff --git a/src/plugins/intel_npu/tests/unit/CMakeLists.txt b/src/plugins/intel_npu/tests/unit/CMakeLists.txt index 1097e183369fe4..b552979d8b1a51 100644 --- a/src/plugins/intel_npu/tests/unit/CMakeLists.txt +++ b/src/plugins/intel_npu/tests/unit/CMakeLists.txt @@ -10,6 +10,7 @@ set(MANDATORY_UNIT_TESTS_LIBS "openvino::gtest" "openvino::gtest_main" "openvino::runtime" + "openvino_runtime_s" "openvino::npu_common" "openvino::npu_al" "openvino::npu_logger_utils" @@ -29,6 +30,7 @@ ov_add_test_target( ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/utils/include ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/include ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/al/include + $ OBJECT_FILES ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/src/metadata.cpp LINK_LIBRARIES diff --git a/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp b/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp new file mode 100644 index 00000000000000..5aec4ec24b07bf --- /dev/null +++ b/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp @@ -0,0 +1,195 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include "common_test_utils/file_utils.hpp" +#include "common_test_utils/subgraph_builders/2_input_subtract.hpp" +#include "common_test_utils/test_assertions.hpp" +#include "common_test_utils/test_constants.hpp" +#include "dev/core_impl.hpp" +#include "intel_npu/common/icompiled_model.hpp" +#include "openvino/runtime/core.hpp" +#include "openvino/runtime/iasync_infer_request.hpp" +#include "openvino/runtime/intel_npu/properties.hpp" +#include "openvino/runtime/properties.hpp" +#include "openvino/util/file_path.hpp" + +using namespace intel_npu; + +using BlobContainerUnitTests = ::testing::Test; + +namespace { +const char* dummyBlobHeader = "blobwillstartafterspace correctblob!"; +const char* testCacheDir = "blob_container_test_cache_dir"; +const char* testFileName = "blob_container_test.blob"; + +} // namespace + +TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) { + auto core = std::make_shared(); + core->register_compile_time_plugins(); + auto model = ov::test::utils::make_2_input_subtract(); + + { + auto compiledModel = core->compile_model(model, + ov::test::utils::DEVICE_NPU, + {ov::cache_dir(testCacheDir), ov::enable_profiling(true)}); + auto inferRequest = compiledModel->create_infer_request(); + inferRequest->infer(); + OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); + + auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); + OPENVINO_ASSERT(compiledModelPtr != nullptr); + const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); + auto* blobContainerAlignedBufferPtr = + dynamic_cast(&blobContainer); + OPENVINO_ASSERT(blobContainerAlignedBufferPtr == nullptr, + "Blob after compilation should not be memory mapped!"); + } + + { + auto compiledModel = core->compile_model(model, + ov::test::utils::DEVICE_NPU, + {ov::cache_dir(testCacheDir), ov::enable_profiling(true)}); + auto inferRequest = compiledModel->create_infer_request(); + inferRequest->infer(); + OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); + auto outputFile = + std::ofstream(std::filesystem::path(testCacheDir) / testFileName, std::ios::out | std::ios::binary); + OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile)); + + auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); + OPENVINO_ASSERT(compiledModelPtr != nullptr); + const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); + auto* blobContainerAlignedBufferPtr = + dynamic_cast(&blobContainer); + OPENVINO_ASSERT(blobContainerAlignedBufferPtr != nullptr, "Cached blob should be memory mapped!"); + } + ov::test::utils::removeDir(testCacheDir); +} + +TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForFStream) { + auto core = std::make_shared(); + core->register_compile_time_plugins(); + auto model = ov::test::utils::make_2_input_subtract(); + + { + auto compiledModel = core->compile_model(model, ov::test::utils::DEVICE_NPU, {ov::enable_profiling(true)}); + auto inferRequest = compiledModel->create_infer_request(); + inferRequest->infer(); + OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); + auto outputFile = std::ofstream(testFileName, std::ios::out | std::ios::binary); + OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile)); + } + + { + auto inputFile = std::ifstream(testFileName, std::ios::in | std::ios::binary); + auto compiledModel = core->import_model(inputFile, ov::test::utils::DEVICE_NPU, {ov::enable_profiling(true)}); + inputFile.close(); + auto inferRequest = compiledModel->create_infer_request(); + inferRequest->infer(); + OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); + auto outputFile = std::ofstream(testFileName, std::ios::out | std::ios::binary); + OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile)); + + auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); + OPENVINO_ASSERT(compiledModelPtr != nullptr); + const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); + auto* blobContainerAlignedBufferPtr = + dynamic_cast(&blobContainer); + OPENVINO_ASSERT(blobContainerAlignedBufferPtr == nullptr, "Cannot have memory mapped blob for std::fstream!"); + } + ov::test::utils::removeFile(testFileName); +} + +TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForSStream) { + auto core = std::make_shared(); + core->register_compile_time_plugins(); + auto model = ov::test::utils::make_2_input_subtract(); + std::stringstream blobStream; + + { + auto compiledModel = core->compile_model(model, ov::test::utils::DEVICE_NPU, {ov::enable_profiling(true)}); + auto inferRequest = compiledModel->create_infer_request(); + inferRequest->infer(); + OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); + OV_ASSERT_NO_THROW(compiledModel->export_model(blobStream)); + } + + { + auto compiledModel = core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::enable_profiling(true)}); + blobStream = std::stringstream(); + auto inferRequest = compiledModel->create_infer_request(); + inferRequest->infer(); + OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); + OV_ASSERT_NO_THROW(compiledModel->export_model(blobStream)); + + auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); + OPENVINO_ASSERT(compiledModelPtr != nullptr); + const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); + auto* blobContainerAlignedBufferPtr = + dynamic_cast(&blobContainer); + OPENVINO_ASSERT(blobContainerAlignedBufferPtr == nullptr, + "Cannot have memory mapped blob for std::stringstream!"); + } +} + +TEST_F(BlobContainerUnitTests, isBlobHeaderHandledCorrectly) { + auto core = std::make_shared(); + core->register_compile_time_plugins(); + auto model = ov::test::utils::make_2_input_subtract(); + std::stringstream blobStream; + blobStream << dummyBlobHeader; + + { + auto compiledModel = core->compile_model(model, ov::test::utils::DEVICE_NPU, {ov::enable_profiling(true)}); + auto inferRequest = compiledModel->create_infer_request(); + inferRequest->infer(); + OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); + auto outputFile = std::ofstream(testFileName, std::ios::out | std::ios::binary); + outputFile << dummyBlobHeader; + OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile)); + OV_ASSERT_NO_THROW(compiledModel->export_model(blobStream)); + } + + { + std::string parseDummyHeader; + std::string blob; + blobStream >> parseDummyHeader; + + EXPECT_THAT(parseDummyHeader, testing::HasSubstr("blobwillstartafterspace")); + auto compiledModel = + core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)}); + blobStream = {}; + + auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); + OPENVINO_ASSERT(compiledModelPtr != nullptr); + const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); + blob.assign(reinterpret_cast(blobContainer.get_ptr()), blobContainer.size()); + EXPECT_THAT(blob, testing::HasSubstr("correctblob!")); + } + + { + std::string parseDummyHeader; + std::string blob; + auto inputFile = std::ifstream(testFileName, std::ios::in | std::ios::binary); + blobStream >> parseDummyHeader; + + EXPECT_THAT(parseDummyHeader, testing::HasSubstr("blobwillstartafterspace")); + auto compiledModel = + core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)}); + + auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); + OPENVINO_ASSERT(compiledModelPtr != nullptr); + const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); + blob.assign(reinterpret_cast(blobContainer.get_ptr()), blobContainer.size()); + EXPECT_THAT(blob, testing::HasSubstr("correctblob!")); + } + + ov::test::utils::removeFile(testFileName); +} From 1801088d72fa568215188cd5d8810357d8e16a8f Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Mon, 20 Jan 2025 18:24:37 +0200 Subject: [PATCH 32/34] Revert changes for `CIP Optimization` --- .../intel_npu/common/blob_container.hpp | 71 ++++++++----------- .../src/plugin_compiler_adapter.cpp | 11 +-- .../src/compiler_adapter/src/plugin_graph.cpp | 13 ++-- .../intel_npu/src/plugin/src/plugin.cpp | 6 +- .../tests/unit/npu/blob_container.cpp | 62 ++++++++++------ 5 files changed, 87 insertions(+), 76 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp index b347b457fc7e5e..2f6b31aceacd5d 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp @@ -13,50 +13,50 @@ namespace intel_npu { class BlobContainer { public: - BlobContainer() = default; + /** + * @brief Returns the address at the beginning of the blob. + */ + virtual const void* get_ptr() const = 0; - BlobContainer(std::vector blob) : _blob(std::move(blob)) {} + /** + * @brief Size of the blob. + */ + virtual size_t size() const = 0; - virtual const void* get_ptr() const { - return _blob.data(); - } + /** + * @brief Returns true if the blob can be deallocated from memory, false otherwise. + */ + virtual bool release_from_memory() = 0; - virtual size_t size() const { - return _blob.size(); - } + virtual ~BlobContainer() = default; +}; - virtual bool release_from_memory() const { - if (_shouldDeallocate) { - _blob.clear(); - _blob.shrink_to_fit(); - return true; - } - _shouldDeallocate = true; - return false; - } +class BlobContainerVector : public BlobContainer { +public: + BlobContainerVector(std::vector blob) : _blob(std::move(blob)) {} - virtual const std::vector& get_blob() const { - // when unerlying blob object was accessed, - // prevent deallocation on next `release_from_memory` call - _shouldDeallocate = false; - return _blob; + const void* get_ptr() const override { + return reinterpret_cast(_blob.data()); } - virtual ~BlobContainer() = default; + size_t size() const override { + return _blob.size(); + } -protected: - mutable std::vector _blob; + bool release_from_memory() override { + _blob.clear(); + _blob.shrink_to_fit(); + return true; + } private: - mutable bool _shouldDeallocate = true; + std::vector _blob; }; class BlobContainerAlignedBuffer : public BlobContainer { public: - BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, - size_t ovHeaderOffset, - uint64_t blobSize) - : _size(blobSize), + BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, size_t ovHeaderOffset, uint64_t size) + : _size(size), _ovHeaderOffset(ovHeaderOffset), _blobSO(blobSO) {} @@ -68,19 +68,10 @@ class BlobContainerAlignedBuffer : public BlobContainer { return _size; } - bool release_from_memory() const override { - BlobContainer::release_from_memory(); + bool release_from_memory() override { return false; } - const std::vector& get_blob() const override { - BlobContainer::release_from_memory(); - _blob.resize(_size); - _blob.assign(reinterpret_cast(this->get_ptr()), - reinterpret_cast(this->get_ptr()) + _size); - return _blob; - } - private: uint64_t _size; size_t _ovHeaderOffset; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 6f728ed5271678..809e1c88e05a71 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -80,7 +80,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrcompile(model, config); - auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); + auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); _logger.debug("compile end"); ze_graph_handle_t graphHandle = nullptr; @@ -110,9 +110,12 @@ std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptrget_blob(); - auto networkMeta = _compiler->parse(blob, config); - blobPtr->release_from_memory(); + std::vector network(blobPtr->size()); + network.assign(reinterpret_cast(blobPtr->get_ptr()), + reinterpret_cast(blobPtr->get_ptr()) + blobPtr->size()); + auto networkMeta = _compiler->parse(network, config); + network.clear(); + network.shrink_to_fit(); _logger.debug("parse end"); ze_graph_handle_t graphHandle = nullptr; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index b1e244db60d988..726a1196b7c88b 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -56,15 +56,10 @@ size_t PluginGraph::export_blob(std::ostream& stream) const { std::vector PluginGraph::process_profiling_output(const std::vector& profData, const Config& config) const { - std::vector profilingInfo; - const auto& blob = _blobPtr->get_blob(); - try { - profilingInfo = _compiler->process_profiling_output(profData, blob, config); - } catch (const std::exception& ex) { - _logger.error(ex.what()); - } - _blobPtr->release_from_memory(); - return profilingInfo; + std::vector blob(_blobPtr->size()); + blob.assign(reinterpret_cast(_blobPtr->get_ptr()), + reinterpret_cast(_blobPtr->get_ptr()) + _blobPtr->size()); + return _compiler->process_profiling_output(profData, blob, config); } void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 58447afe0a767e..fcef9b6a12a563 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -686,13 +686,13 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< auto localConfig = merge_configs(_globalConfig, localPropertiesMap); update_log_level(localPropertiesMap); - /* const auto set_cache_dir = localConfig.get(); + const auto set_cache_dir = localConfig.get(); if (!set_cache_dir.empty()) { const auto compilerType = localConfig.get(); if (compilerType == ov::intel_npu::CompilerType::MLIR) { OPENVINO_THROW("Option 'CACHE_DIR' is not supported with MLIR compiler type"); } - } */ + } const auto platform = _backends->getCompilationPlatform(localConfig.get(), localConfig.get()); auto device = _backends->getDevice(localConfig.get()); @@ -856,7 +856,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - blobPtr = std::make_unique(std::move(blob)); + blobPtr = std::make_unique(std::move(blob)); } else { blobPtr = std::make_unique(modelBuffer, stream.tellg(), graphSize); } diff --git a/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp b/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp index 5aec4ec24b07bf..6732058c0e5820 100644 --- a/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp +++ b/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp @@ -21,14 +21,22 @@ using namespace intel_npu; -using BlobContainerUnitTests = ::testing::Test; - -namespace { -const char* dummyBlobHeader = "blobwillstartafterspace correctblob!"; -const char* testCacheDir = "blob_container_test_cache_dir"; -const char* testFileName = "blob_container_test.blob"; +class BlobContainerUnitTests : public ::testing::Test { +protected: + void TearDown() override { + ov::util::iterate_files(testCacheDir, [](const std::string& file, bool is_dir) { + if (!is_dir) { + ov::test::utils::removeFile(file); + } + }); + ov::test::utils::removeDir(testCacheDir); + ov::test::utils::removeFile(testFileName); + } -} // namespace + const char* dummyBlobHeader = "blobwillstartafterspace "; + const char* testCacheDir = "blob_container_test_cache_dir"; + const char* testFileName = "blob_container_test.blob"; +}; TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) { auto core = std::make_shared(); @@ -59,9 +67,12 @@ TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) { auto inferRequest = compiledModel->create_infer_request(); inferRequest->infer(); OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); - auto outputFile = - std::ofstream(std::filesystem::path(testCacheDir) / testFileName, std::ios::out | std::ios::binary); + + auto testCacheDirPath = ov::util::Path(testCacheDir); + auto outputFile = std::ofstream(testCacheDirPath / testFileName, std::ios::out | std::ios::binary); + std::ostringstream blobStream; OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile)); + OV_ASSERT_NO_THROW(compiledModel->export_model(blobStream)); auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); OPENVINO_ASSERT(compiledModelPtr != nullptr); @@ -69,8 +80,13 @@ TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) { auto* blobContainerAlignedBufferPtr = dynamic_cast(&blobContainer); OPENVINO_ASSERT(blobContainerAlignedBufferPtr != nullptr, "Cached blob should be memory mapped!"); + + // Expect output stream with metadata to be larger than actual blob size + OPENVINO_ASSERT(outputFile.tellp() > 0 && blobContainer.size() > 0 && + static_cast(outputFile.tellp()) > blobContainer.size()); + OPENVINO_ASSERT(blobStream.tellp() > 0 && blobContainer.size() > 0 && + static_cast(blobStream.tellp()) > blobContainer.size()); } - ov::test::utils::removeDir(testCacheDir); } TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForFStream) { @@ -104,7 +120,6 @@ TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForFStream) { dynamic_cast(&blobContainer); OPENVINO_ASSERT(blobContainerAlignedBufferPtr == nullptr, "Cannot have memory mapped blob for std::fstream!"); } - ov::test::utils::removeFile(testFileName); } TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForSStream) { @@ -161,35 +176,42 @@ TEST_F(BlobContainerUnitTests, isBlobHeaderHandledCorrectly) { std::string parseDummyHeader; std::string blob; blobStream >> parseDummyHeader; + blobStream.get(); - EXPECT_THAT(parseDummyHeader, testing::HasSubstr("blobwillstartafterspace")); auto compiledModel = core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)}); - blobStream = {}; auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); OPENVINO_ASSERT(compiledModelPtr != nullptr); const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); blob.assign(reinterpret_cast(blobContainer.get_ptr()), blobContainer.size()); - EXPECT_THAT(blob, testing::HasSubstr("correctblob!")); + ASSERT_EQ(blobStream.str().substr(std::strlen(dummyBlobHeader), blobContainer.size()), blob); } { std::string parseDummyHeader; std::string blob; + std::string referenceBlob; auto inputFile = std::ifstream(testFileName, std::ios::in | std::ios::binary); - blobStream >> parseDummyHeader; + inputFile >> parseDummyHeader; + inputFile.get(); + + std::streampos currentPos = inputFile.tellg(); + inputFile.seekg(0, std::ios::end); + std::streampos endPos = inputFile.tellg(); + inputFile.seekg(currentPos, std::ios::beg); + referenceBlob.resize(endPos - currentPos); + inputFile.read(&referenceBlob[0], referenceBlob.size()); + inputFile.seekg(currentPos, std::ios::beg); - EXPECT_THAT(parseDummyHeader, testing::HasSubstr("blobwillstartafterspace")); auto compiledModel = - core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)}); + core->import_model(inputFile, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)}); auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); OPENVINO_ASSERT(compiledModelPtr != nullptr); const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); blob.assign(reinterpret_cast(blobContainer.get_ptr()), blobContainer.size()); - EXPECT_THAT(blob, testing::HasSubstr("correctblob!")); + referenceBlob.resize(blobContainer.size()); // exclude metadata + ASSERT_EQ(referenceBlob, blob); } - - ov::test::utils::removeFile(testFileName); } From 02683cf7546205b1aecf96206bfda510d353069a Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Tue, 21 Jan 2025 18:42:42 +0200 Subject: [PATCH 33/34] Remove unit tests due to the need of extending `IGraph` api with `get_blob_container` method --- .../include/intel_npu/common/igraph.hpp | 1 - .../intel_npu/src/common/src/igraph.cpp | 4 - .../intel_npu/tests/unit/CMakeLists.txt | 2 - .../tests/unit/npu/blob_container.cpp | 217 ------------------ 4 files changed, 224 deletions(-) delete mode 100644 src/plugins/intel_npu/tests/unit/npu/blob_container.cpp diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index 4f2583c4f92419..ec4d7091ac6345 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -36,7 +36,6 @@ class IGraph : public std::enable_shared_from_this { virtual ~IGraph() = default; const NetworkMetadata& get_metadata() const; - const BlobContainer& get_blob_container() const; ze_graph_handle_t get_handle() const; void update_network_name(std::string_view name); diff --git a/src/plugins/intel_npu/src/common/src/igraph.cpp b/src/plugins/intel_npu/src/common/src/igraph.cpp index 5552d77d2c0fe5..f641813e44c0e7 100644 --- a/src/plugins/intel_npu/src/common/src/igraph.cpp +++ b/src/plugins/intel_npu/src/common/src/igraph.cpp @@ -27,10 +27,6 @@ const NetworkMetadata& IGraph::get_metadata() const { return _metadata; } -const BlobContainer& IGraph::get_blob_container() const { - return *_blobPtr; -} - ze_graph_handle_t IGraph::get_handle() const { return _handle; } diff --git a/src/plugins/intel_npu/tests/unit/CMakeLists.txt b/src/plugins/intel_npu/tests/unit/CMakeLists.txt index b552979d8b1a51..1097e183369fe4 100644 --- a/src/plugins/intel_npu/tests/unit/CMakeLists.txt +++ b/src/plugins/intel_npu/tests/unit/CMakeLists.txt @@ -10,7 +10,6 @@ set(MANDATORY_UNIT_TESTS_LIBS "openvino::gtest" "openvino::gtest_main" "openvino::runtime" - "openvino_runtime_s" "openvino::npu_common" "openvino::npu_al" "openvino::npu_logger_utils" @@ -30,7 +29,6 @@ ov_add_test_target( ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/utils/include ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/include ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/al/include - $ OBJECT_FILES ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/src/metadata.cpp LINK_LIBRARIES diff --git a/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp b/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp deleted file mode 100644 index 6732058c0e5820..00000000000000 --- a/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright (C) 2018-2025 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include -#include - -#include "common_test_utils/file_utils.hpp" -#include "common_test_utils/subgraph_builders/2_input_subtract.hpp" -#include "common_test_utils/test_assertions.hpp" -#include "common_test_utils/test_constants.hpp" -#include "dev/core_impl.hpp" -#include "intel_npu/common/icompiled_model.hpp" -#include "openvino/runtime/core.hpp" -#include "openvino/runtime/iasync_infer_request.hpp" -#include "openvino/runtime/intel_npu/properties.hpp" -#include "openvino/runtime/properties.hpp" -#include "openvino/util/file_path.hpp" - -using namespace intel_npu; - -class BlobContainerUnitTests : public ::testing::Test { -protected: - void TearDown() override { - ov::util::iterate_files(testCacheDir, [](const std::string& file, bool is_dir) { - if (!is_dir) { - ov::test::utils::removeFile(file); - } - }); - ov::test::utils::removeDir(testCacheDir); - ov::test::utils::removeFile(testFileName); - } - - const char* dummyBlobHeader = "blobwillstartafterspace "; - const char* testCacheDir = "blob_container_test_cache_dir"; - const char* testFileName = "blob_container_test.blob"; -}; - -TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) { - auto core = std::make_shared(); - core->register_compile_time_plugins(); - auto model = ov::test::utils::make_2_input_subtract(); - - { - auto compiledModel = core->compile_model(model, - ov::test::utils::DEVICE_NPU, - {ov::cache_dir(testCacheDir), ov::enable_profiling(true)}); - auto inferRequest = compiledModel->create_infer_request(); - inferRequest->infer(); - OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); - - auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); - OPENVINO_ASSERT(compiledModelPtr != nullptr); - const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); - auto* blobContainerAlignedBufferPtr = - dynamic_cast(&blobContainer); - OPENVINO_ASSERT(blobContainerAlignedBufferPtr == nullptr, - "Blob after compilation should not be memory mapped!"); - } - - { - auto compiledModel = core->compile_model(model, - ov::test::utils::DEVICE_NPU, - {ov::cache_dir(testCacheDir), ov::enable_profiling(true)}); - auto inferRequest = compiledModel->create_infer_request(); - inferRequest->infer(); - OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); - - auto testCacheDirPath = ov::util::Path(testCacheDir); - auto outputFile = std::ofstream(testCacheDirPath / testFileName, std::ios::out | std::ios::binary); - std::ostringstream blobStream; - OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile)); - OV_ASSERT_NO_THROW(compiledModel->export_model(blobStream)); - - auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); - OPENVINO_ASSERT(compiledModelPtr != nullptr); - const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); - auto* blobContainerAlignedBufferPtr = - dynamic_cast(&blobContainer); - OPENVINO_ASSERT(blobContainerAlignedBufferPtr != nullptr, "Cached blob should be memory mapped!"); - - // Expect output stream with metadata to be larger than actual blob size - OPENVINO_ASSERT(outputFile.tellp() > 0 && blobContainer.size() > 0 && - static_cast(outputFile.tellp()) > blobContainer.size()); - OPENVINO_ASSERT(blobStream.tellp() > 0 && blobContainer.size() > 0 && - static_cast(blobStream.tellp()) > blobContainer.size()); - } -} - -TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForFStream) { - auto core = std::make_shared(); - core->register_compile_time_plugins(); - auto model = ov::test::utils::make_2_input_subtract(); - - { - auto compiledModel = core->compile_model(model, ov::test::utils::DEVICE_NPU, {ov::enable_profiling(true)}); - auto inferRequest = compiledModel->create_infer_request(); - inferRequest->infer(); - OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); - auto outputFile = std::ofstream(testFileName, std::ios::out | std::ios::binary); - OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile)); - } - - { - auto inputFile = std::ifstream(testFileName, std::ios::in | std::ios::binary); - auto compiledModel = core->import_model(inputFile, ov::test::utils::DEVICE_NPU, {ov::enable_profiling(true)}); - inputFile.close(); - auto inferRequest = compiledModel->create_infer_request(); - inferRequest->infer(); - OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); - auto outputFile = std::ofstream(testFileName, std::ios::out | std::ios::binary); - OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile)); - - auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); - OPENVINO_ASSERT(compiledModelPtr != nullptr); - const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); - auto* blobContainerAlignedBufferPtr = - dynamic_cast(&blobContainer); - OPENVINO_ASSERT(blobContainerAlignedBufferPtr == nullptr, "Cannot have memory mapped blob for std::fstream!"); - } -} - -TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForSStream) { - auto core = std::make_shared(); - core->register_compile_time_plugins(); - auto model = ov::test::utils::make_2_input_subtract(); - std::stringstream blobStream; - - { - auto compiledModel = core->compile_model(model, ov::test::utils::DEVICE_NPU, {ov::enable_profiling(true)}); - auto inferRequest = compiledModel->create_infer_request(); - inferRequest->infer(); - OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); - OV_ASSERT_NO_THROW(compiledModel->export_model(blobStream)); - } - - { - auto compiledModel = core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::enable_profiling(true)}); - blobStream = std::stringstream(); - auto inferRequest = compiledModel->create_infer_request(); - inferRequest->infer(); - OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); - OV_ASSERT_NO_THROW(compiledModel->export_model(blobStream)); - - auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); - OPENVINO_ASSERT(compiledModelPtr != nullptr); - const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); - auto* blobContainerAlignedBufferPtr = - dynamic_cast(&blobContainer); - OPENVINO_ASSERT(blobContainerAlignedBufferPtr == nullptr, - "Cannot have memory mapped blob for std::stringstream!"); - } -} - -TEST_F(BlobContainerUnitTests, isBlobHeaderHandledCorrectly) { - auto core = std::make_shared(); - core->register_compile_time_plugins(); - auto model = ov::test::utils::make_2_input_subtract(); - std::stringstream blobStream; - blobStream << dummyBlobHeader; - - { - auto compiledModel = core->compile_model(model, ov::test::utils::DEVICE_NPU, {ov::enable_profiling(true)}); - auto inferRequest = compiledModel->create_infer_request(); - inferRequest->infer(); - OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); - auto outputFile = std::ofstream(testFileName, std::ios::out | std::ios::binary); - outputFile << dummyBlobHeader; - OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile)); - OV_ASSERT_NO_THROW(compiledModel->export_model(blobStream)); - } - - { - std::string parseDummyHeader; - std::string blob; - blobStream >> parseDummyHeader; - blobStream.get(); - - auto compiledModel = - core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)}); - - auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); - OPENVINO_ASSERT(compiledModelPtr != nullptr); - const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); - blob.assign(reinterpret_cast(blobContainer.get_ptr()), blobContainer.size()); - ASSERT_EQ(blobStream.str().substr(std::strlen(dummyBlobHeader), blobContainer.size()), blob); - } - - { - std::string parseDummyHeader; - std::string blob; - std::string referenceBlob; - auto inputFile = std::ifstream(testFileName, std::ios::in | std::ios::binary); - inputFile >> parseDummyHeader; - inputFile.get(); - - std::streampos currentPos = inputFile.tellg(); - inputFile.seekg(0, std::ios::end); - std::streampos endPos = inputFile.tellg(); - inputFile.seekg(currentPos, std::ios::beg); - referenceBlob.resize(endPos - currentPos); - inputFile.read(&referenceBlob[0], referenceBlob.size()); - inputFile.seekg(currentPos, std::ios::beg); - - auto compiledModel = - core->import_model(inputFile, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)}); - - auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); - OPENVINO_ASSERT(compiledModelPtr != nullptr); - const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); - blob.assign(reinterpret_cast(blobContainer.get_ptr()), blobContainer.size()); - referenceBlob.resize(blobContainer.size()); // exclude metadata - ASSERT_EQ(referenceBlob, blob); - } -} From 473b488e0a6fd9a374f691821ef68a488bad48a6 Mon Sep 17 00:00:00 2001 From: Mircea-Aurelian Dan Date: Wed, 22 Jan 2025 11:37:47 +0000 Subject: [PATCH 34/34] Add comment regarding `ov::internal::cached_model_buffer` not having corresponding `Config` implementation --- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index fcef9b6a12a563..23e2e04fbe0ba7 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -810,6 +810,8 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c } std::shared_ptr modelBuffer; + // ov::internal::cached_model_buffer has no corresponding "Config" implementation thus we need to remove it from the + // list of properties if (npu_plugin_properties.count(ov::internal::cached_model_buffer.name())) { modelBuffer = npu_plugin_properties.at(ov::internal::cached_model_buffer.name()).as>();