From 1801088d72fa568215188cd5d8810357d8e16a8f Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Mon, 20 Jan 2025 18:24:37 +0200 Subject: [PATCH] Revert changes for `CIP Optimization` --- .../intel_npu/common/blob_container.hpp | 71 ++++++++----------- .../src/plugin_compiler_adapter.cpp | 11 +-- .../src/compiler_adapter/src/plugin_graph.cpp | 13 ++-- .../intel_npu/src/plugin/src/plugin.cpp | 6 +- .../tests/unit/npu/blob_container.cpp | 62 ++++++++++------ 5 files changed, 87 insertions(+), 76 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp index b347b457fc7e5e..2f6b31aceacd5d 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp @@ -13,50 +13,50 @@ namespace intel_npu { class BlobContainer { public: - BlobContainer() = default; + /** + * @brief Returns the address at the beginning of the blob. + */ + virtual const void* get_ptr() const = 0; - BlobContainer(std::vector blob) : _blob(std::move(blob)) {} + /** + * @brief Size of the blob. + */ + virtual size_t size() const = 0; - virtual const void* get_ptr() const { - return _blob.data(); - } + /** + * @brief Returns true if the blob can be deallocated from memory, false otherwise. + */ + virtual bool release_from_memory() = 0; - virtual size_t size() const { - return _blob.size(); - } + virtual ~BlobContainer() = default; +}; - virtual bool release_from_memory() const { - if (_shouldDeallocate) { - _blob.clear(); - _blob.shrink_to_fit(); - return true; - } - _shouldDeallocate = true; - return false; - } +class BlobContainerVector : public BlobContainer { +public: + BlobContainerVector(std::vector blob) : _blob(std::move(blob)) {} - virtual const std::vector& get_blob() const { - // when unerlying blob object was accessed, - // prevent deallocation on next `release_from_memory` call - _shouldDeallocate = false; - return _blob; + const void* get_ptr() const override { + return reinterpret_cast(_blob.data()); } - virtual ~BlobContainer() = default; + size_t size() const override { + return _blob.size(); + } -protected: - mutable std::vector _blob; + bool release_from_memory() override { + _blob.clear(); + _blob.shrink_to_fit(); + return true; + } private: - mutable bool _shouldDeallocate = true; + std::vector _blob; }; class BlobContainerAlignedBuffer : public BlobContainer { public: - BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, - size_t ovHeaderOffset, - uint64_t blobSize) - : _size(blobSize), + BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, size_t ovHeaderOffset, uint64_t size) + : _size(size), _ovHeaderOffset(ovHeaderOffset), _blobSO(blobSO) {} @@ -68,19 +68,10 @@ class BlobContainerAlignedBuffer : public BlobContainer { return _size; } - bool release_from_memory() const override { - BlobContainer::release_from_memory(); + bool release_from_memory() override { return false; } - const std::vector& get_blob() const override { - BlobContainer::release_from_memory(); - _blob.resize(_size); - _blob.assign(reinterpret_cast(this->get_ptr()), - reinterpret_cast(this->get_ptr()) + _size); - return _blob; - } - private: uint64_t _size; size_t _ovHeaderOffset; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 6f728ed5271678..809e1c88e05a71 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -80,7 +80,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrcompile(model, config); - auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); + auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); _logger.debug("compile end"); ze_graph_handle_t graphHandle = nullptr; @@ -110,9 +110,12 @@ std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptrget_blob(); - auto networkMeta = _compiler->parse(blob, config); - blobPtr->release_from_memory(); + std::vector network(blobPtr->size()); + network.assign(reinterpret_cast(blobPtr->get_ptr()), + reinterpret_cast(blobPtr->get_ptr()) + blobPtr->size()); + auto networkMeta = _compiler->parse(network, config); + network.clear(); + network.shrink_to_fit(); _logger.debug("parse end"); ze_graph_handle_t graphHandle = nullptr; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index b1e244db60d988..726a1196b7c88b 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -56,15 +56,10 @@ size_t PluginGraph::export_blob(std::ostream& stream) const { std::vector PluginGraph::process_profiling_output(const std::vector& profData, const Config& config) const { - std::vector profilingInfo; - const auto& blob = _blobPtr->get_blob(); - try { - profilingInfo = _compiler->process_profiling_output(profData, blob, config); - } catch (const std::exception& ex) { - _logger.error(ex.what()); - } - _blobPtr->release_from_memory(); - return profilingInfo; + std::vector blob(_blobPtr->size()); + blob.assign(reinterpret_cast(_blobPtr->get_ptr()), + reinterpret_cast(_blobPtr->get_ptr()) + _blobPtr->size()); + return _compiler->process_profiling_output(profData, blob, config); } void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 58447afe0a767e..fcef9b6a12a563 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -686,13 +686,13 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< auto localConfig = merge_configs(_globalConfig, localPropertiesMap); update_log_level(localPropertiesMap); - /* const auto set_cache_dir = localConfig.get(); + const auto set_cache_dir = localConfig.get(); if (!set_cache_dir.empty()) { const auto compilerType = localConfig.get(); if (compilerType == ov::intel_npu::CompilerType::MLIR) { OPENVINO_THROW("Option 'CACHE_DIR' is not supported with MLIR compiler type"); } - } */ + } const auto platform = _backends->getCompilationPlatform(localConfig.get(), localConfig.get()); auto device = _backends->getDevice(localConfig.get()); @@ -856,7 +856,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - blobPtr = std::make_unique(std::move(blob)); + blobPtr = std::make_unique(std::move(blob)); } else { blobPtr = std::make_unique(modelBuffer, stream.tellg(), graphSize); } diff --git a/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp b/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp index 5aec4ec24b07bf..6732058c0e5820 100644 --- a/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp +++ b/src/plugins/intel_npu/tests/unit/npu/blob_container.cpp @@ -21,14 +21,22 @@ using namespace intel_npu; -using BlobContainerUnitTests = ::testing::Test; - -namespace { -const char* dummyBlobHeader = "blobwillstartafterspace correctblob!"; -const char* testCacheDir = "blob_container_test_cache_dir"; -const char* testFileName = "blob_container_test.blob"; +class BlobContainerUnitTests : public ::testing::Test { +protected: + void TearDown() override { + ov::util::iterate_files(testCacheDir, [](const std::string& file, bool is_dir) { + if (!is_dir) { + ov::test::utils::removeFile(file); + } + }); + ov::test::utils::removeDir(testCacheDir); + ov::test::utils::removeFile(testFileName); + } -} // namespace + const char* dummyBlobHeader = "blobwillstartafterspace "; + const char* testCacheDir = "blob_container_test_cache_dir"; + const char* testFileName = "blob_container_test.blob"; +}; TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) { auto core = std::make_shared(); @@ -59,9 +67,12 @@ TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) { auto inferRequest = compiledModel->create_infer_request(); inferRequest->infer(); OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info()); - auto outputFile = - std::ofstream(std::filesystem::path(testCacheDir) / testFileName, std::ios::out | std::ios::binary); + + auto testCacheDirPath = ov::util::Path(testCacheDir); + auto outputFile = std::ofstream(testCacheDirPath / testFileName, std::ios::out | std::ios::binary); + std::ostringstream blobStream; OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile)); + OV_ASSERT_NO_THROW(compiledModel->export_model(blobStream)); auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); OPENVINO_ASSERT(compiledModelPtr != nullptr); @@ -69,8 +80,13 @@ TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) { auto* blobContainerAlignedBufferPtr = dynamic_cast(&blobContainer); OPENVINO_ASSERT(blobContainerAlignedBufferPtr != nullptr, "Cached blob should be memory mapped!"); + + // Expect output stream with metadata to be larger than actual blob size + OPENVINO_ASSERT(outputFile.tellp() > 0 && blobContainer.size() > 0 && + static_cast(outputFile.tellp()) > blobContainer.size()); + OPENVINO_ASSERT(blobStream.tellp() > 0 && blobContainer.size() > 0 && + static_cast(blobStream.tellp()) > blobContainer.size()); } - ov::test::utils::removeDir(testCacheDir); } TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForFStream) { @@ -104,7 +120,6 @@ TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForFStream) { dynamic_cast(&blobContainer); OPENVINO_ASSERT(blobContainerAlignedBufferPtr == nullptr, "Cannot have memory mapped blob for std::fstream!"); } - ov::test::utils::removeFile(testFileName); } TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForSStream) { @@ -161,35 +176,42 @@ TEST_F(BlobContainerUnitTests, isBlobHeaderHandledCorrectly) { std::string parseDummyHeader; std::string blob; blobStream >> parseDummyHeader; + blobStream.get(); - EXPECT_THAT(parseDummyHeader, testing::HasSubstr("blobwillstartafterspace")); auto compiledModel = core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)}); - blobStream = {}; auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); OPENVINO_ASSERT(compiledModelPtr != nullptr); const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); blob.assign(reinterpret_cast(blobContainer.get_ptr()), blobContainer.size()); - EXPECT_THAT(blob, testing::HasSubstr("correctblob!")); + ASSERT_EQ(blobStream.str().substr(std::strlen(dummyBlobHeader), blobContainer.size()), blob); } { std::string parseDummyHeader; std::string blob; + std::string referenceBlob; auto inputFile = std::ifstream(testFileName, std::ios::in | std::ios::binary); - blobStream >> parseDummyHeader; + inputFile >> parseDummyHeader; + inputFile.get(); + + std::streampos currentPos = inputFile.tellg(); + inputFile.seekg(0, std::ios::end); + std::streampos endPos = inputFile.tellg(); + inputFile.seekg(currentPos, std::ios::beg); + referenceBlob.resize(endPos - currentPos); + inputFile.read(&referenceBlob[0], referenceBlob.size()); + inputFile.seekg(currentPos, std::ios::beg); - EXPECT_THAT(parseDummyHeader, testing::HasSubstr("blobwillstartafterspace")); auto compiledModel = - core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)}); + core->import_model(inputFile, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)}); auto* compiledModelPtr = dynamic_cast(compiledModel._ptr.get()); OPENVINO_ASSERT(compiledModelPtr != nullptr); const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container(); blob.assign(reinterpret_cast(blobContainer.get_ptr()), blobContainer.size()); - EXPECT_THAT(blob, testing::HasSubstr("correctblob!")); + referenceBlob.resize(blobContainer.size()); // exclude metadata + ASSERT_EQ(referenceBlob, blob); } - - ov::test::utils::removeFile(testFileName); }