Clean up code

openvinotoolkit · Nov 13, 2024 · d5bda07 · d5bda07
1 parent 8de22e5
commit d5bda07
Show file tree

Hide file tree

Showing 26 changed files with 67 additions and 402 deletions.
diff --git a/src/inference/dev_api/openvino/runtime/memory_solver.hpp b/src/inference/dev_api/openvino/runtime/memory_solver.hpp
@@ -52,15 +52,13 @@ class MemorySolver {
     struct Box {
         /** Execution order index of first use. The data will be produced here. */
         int start;
-        // intel_cpu::GlobalExecutionIndex start;
 
         /**
          * The execution order index of last use. After that data will be released.
          * -1 is a reserved value for "till to end". The data will be alive to very
          * end of execution.
          */
         int finish;
-        // intel_cpu::GlobalExecutionIndex finish;
 
         /** Size of data. In abstract unit of measure (byte, simd, cache line, ...) */
         int64_t size;

diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -158,17 +158,14 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
                 GraphContext::Ptr ctx;
                 {
                     std::lock_guard<std::mutex> lock{*m_mutex.get()};
-                    MemoryControl* memoryControl = m_networkMemoryControl->createMemoryControlUnit();
                     auto isQuantizedFlag = (m_cfg.lpTransformsMode == Config::On) &&
                                            ov::pass::low_precision::LowPrecision::isFunctionQuantized(m_model);
                     ctx = std::make_shared<GraphContext>(m_cfg,
                                                          m_socketWeights[socketId],
                                                          isQuantizedFlag,
-                                                         memoryControl,
-                                                         m_networkMemoryControl,
+                                                         m_networkMemoryControl->createMemoryControlUnit(),
                                                          streamsExecutor,
-                                                         m_sub_memory_manager,
-                                                         true);
+                                                         m_sub_memory_manager);
                 }
 
                 const std::shared_ptr<const ov::Model> model = m_model;
@@ -356,7 +353,7 @@ void CompiledModel::release_memory() {
     for (auto&& graph : m_graphs) {
         GraphGuard::Lock graph_lock{graph};
         auto ctx = graph_lock._graph.getGraphContext();
-        m_networkMemoryControl->releaseMemory();
+        ctx->getMemoryControl()->releaseMemory();
     }
 }
 

diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h
@@ -14,7 +14,6 @@
 #include "openvino/runtime/iinfer_request.hpp"
 #include "openvino/runtime/iplugin.hpp"
 #include "openvino/runtime/isync_infer_request.hpp"
-#include "openvino/runtime/threading/thread_local.hpp"
 #include "sub_memory_manager.hpp"
 
 namespace ov {

diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp
@@ -204,24 +204,10 @@ void MemoryBlockWithReuse::setExtBuff(void *ptr, size_t size) {
     m_data = decltype(m_data)(ptr, release);
 }
 
-// class MemoryUsage {
-// public:
-//     MemoryUsage() {}
-
-//     ~MemoryUsage() {
-//         std::cout << "Total memory usage: " << total << "\n";
-//     }
-
-//     int total = 0;
-// };
-
 bool MemoryBlockWithReuse::resize(size_t size) {
-    // static MemoryUsage mu;
-
     constexpr int cacheLineSize = 64;
     bool sizeChanged = false;
     if (size > m_memUpperBound) {
-        // mu.total += size;
         void *ptr = dnnl::impl::malloc(size, cacheLineSize);
         if (!ptr) {
             OPENVINO_THROW("Failed to allocate ", size, " bytes of memory");

diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp
@@ -293,11 +293,6 @@ std::string Edge::name() const {
     std::stringstream result;
 
     result << parentPtr->getName() << " port " << parent_port << " <-> " << childPtr->getName() << " port " << child_port;
-    // result << parentPtr->getName()<< " port " << parent_port
-    //        << " <-> "
-    //        << childPtr->getName() << " port " << child_port
-    //        << " status: "
-    //        << static_cast<int>(getStatus());
 
     return  result.str();
 }

diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp
@@ -7,7 +7,6 @@
 #include <algorithm>
 #include <cstddef>
 #include <cstdlib>
-#include <iostream>
 #include <iterator>
 #include <limits>
 #include <map>
@@ -71,7 +70,7 @@ template<typename NET>
 void Graph::CreateGraph(NET &model, const GraphContext::CPtr context) {
     OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "CreateGraph");
 
-    Init(model, std::make_shared<GraphContext>(context->disableMemoryReuse()));
+    Init(model, context);
 
     Activate();
 }
@@ -83,7 +82,7 @@ void Graph::CreateGraph(const std::vector<NodePtr>& graphNodes,
     if (IsReady())
         ForgetGraphData();
 
-    m_context = std::make_shared<GraphContext>(context->disableMemoryReuse());
+    m_context = context;
     m_stream = dnnl::stream(getEngine());
 
     this->_name = std::move(name);
@@ -285,8 +284,6 @@ static std::tuple<std::vector<NodePtr>, std::vector<size_t>> ExtractExecutableNo
     for (size_t i = 0; i < graphNodes.size(); i++) {
         const auto& graphNode = graphNodes[i];
         if ((!graphNode->isConstant() && CPU_DEBUG_CAPS_ALWAYS_TRUE(!graphNode->canBeSkipped())) || // non-constant executable or
-        // if ((!graphNode->isConstant()) || // non-constant executable or
-        // if ((!graphNode->isConstant() && !graphNode->canBeSkipped()) || // non-constant executable or
             (graphNode->isDynamicNode() && !one_of(graphNode->getType(), Type::Input, Type::Output))) { // dynamic, except inputs / outputs
             /* @todo
              * Revise implementation.
@@ -360,20 +357,11 @@ static void UseExternalOutputMemory(const std::map<std::size_t, NodePtr>& output
 
 void Graph::Activate(const std::vector<MemoryPtr>& externalInputMemory,
                      const std::vector<MemoryPtr>& externalOutputMemory) {
-    // OPENVINO_ASSERT(status == Status::Initialized, "Invalid graph status");
-
-    // const bool hasDynNodes = ProcessDynNodes();
-    // const auto syncNodesInds = hasDynNodes ? IdentifySyncPoints(graphNodes) : std::vector<size_t>{};
+    OPENVINO_ASSERT(status == Status::Initialized, "Invalid graph status");
 
     UseExternalInputMemory(inputNodesMap, externalInputMemory);
     UseExternalOutputMemory(outputNodesMap, externalOutputMemory);
 
-    // std::tie(m_executableGraphNodes, m_executableSyncNodesInds) = ExtractExecutableNodesAndSyncPoints(syncNodesInds, graphNodes);
-
-    // status = hasDynNodes ? (parallel_get_max_threads() > 1 ? Status::ReadyDynamic : Status::ReadyDynamicSeq)
-    //     : Status::ReadyStatic;
-
-    // CPU_DEBUG_CAP_ENABLE(serialize(*this));
     Allocate();
 
     CreatePrimitivesAndExecConstants();
@@ -713,7 +701,7 @@ void Graph::ResolveComplexInplaceConflicts() {
 }
 
 /**
- * Partition the \clusters of Edges, by moving and allocating at the same time
+ * Partition the \clusters of Edges, by moving to the end and allocating at the same time
  * the clusters which cannot be handled as part of generic memory solver algorithm.
  * Such clusters meet one of the following criteria:
  * - base edge of a cluster is already Allocated
@@ -744,7 +732,6 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters,
     };
 
     auto allocateConstantEdge = [context](const EdgePtr& edge) {
-        // std::cout << "Allocating constant edge: " << edge->name() << " wc: " << context->getWeightsCache() << "\n";
         if (edge->getParent()->getType() == Type::Input) {
             auto constNode = std::static_pointer_cast<node::Input>(edge->getParent());
             edge->reuse(std::const_pointer_cast<IMemory>(constNode->getMemoryPtr()));
@@ -753,7 +740,7 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters,
         }
     };
 
-    auto endOfNotAllocatedPartition =
+    auto notAllocatedPartitionEnd =
         std::partition(clusters.begin(), clusters.end(),
                        [&allocateStringMemory, &allocateConstantEdge, &context](const EdgeCluster& cluster) {
                            if (cluster.empty()) return false;
@@ -764,15 +751,11 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters,
 
                            OPENVINO_ASSERT(baseEdgeIt != cluster.end(), "Unexpected cluster state");
 
-                           // const auto& baseEdge = cluster.front();
                            const auto& baseEdge = *baseEdgeIt;
-                           // Skip already allocated cluster
                            if (baseEdge->getStatus() == Edge::Status::Allocated) {
                                return false;
                            }
 
-                           // std::cout << "Processing string/const for base edge: " << baseEdge->name() << "\n";
-
                            // Skip if the baseEdge does not require allocation
                            if (baseEdge->getStatus() != Edge::Status::NeedAllocation) {
                                return true;
@@ -807,22 +790,17 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters,
                            return true;
                        });
 
-    return std::distance(clusters.begin(), endOfNotAllocatedPartition);
+    return std::distance(clusters.begin(), notAllocatedPartitionEnd);
 }
 
 static void AllocateBaseEdges(const EdgeClusters& edgeClusters,
                               const MemoryControl::MemorySolution& memorySolution) {
     // attach all the not yet allocated edges to the memory control
     for (auto&& item : memorySolution) {
         int count = 0;
-        // std::cout << "Processing cluster: " << item.first << "\n";
         for (auto&& edge : edgeClusters[item.first]) {
-            // std::cout << "Processing base edge: " << edge->name() << "\n";
             if (edge->getStatus() == Edge::Status::NeedAllocation) {
-                // std::cout << "Allocating edge: " << edge->name() << "\n";
-
                 edge->allocate(item.second);
-
                 // TODO: WA for some test (like strided_slice_test) which use tensors with
                 //       shapes {0}. And it is implicitly converted into {1} tensor.
                 //       Zeroing of input data allow pass tests.
@@ -852,7 +830,6 @@ static void AllocatedReferencingEdges(const EdgeClusters& clusters) {
             }
 
             std::for_each(edges_to_process.rbegin(), edges_to_process.rend(), [](const EdgePtr& edge) {
-                // std::cout << "Processing referencing edge: " << edge->name() << "\n";
                 if (edge->getStatus() == Edge::Status::NotAllocated) {
                     if (edge->inPlace(Edge::LOOK_DOWN)) {
                         edge->getChild()->resolveInPlaceEdges(Edge::LOOK_DOWN);
@@ -861,7 +838,6 @@ static void AllocatedReferencingEdges(const EdgeClusters& clusters) {
                     } else {
                         auto sharedEdge = edge->getSharedEdge();
                         auto sharedEdgeParent = sharedEdge->getParent();
-                        // std::cout << "Allocating edge: " << edge->name() << " Using shared edge: " << sharedEdge->name() << "\n";
                         edge->allocate(sharedEdge->getMemoryPtr()->getMemoryBlock());
                         DEBUG_LOG(*edge, " sharedEdge with ", *sharedEdge);
                     }
@@ -899,23 +875,6 @@ std::vector<size_t> Graph::CreateExecutionGraph() {
     return syncNodesInds;
 }
 
-static void ResolveInOutInPlaceEdgesLegacy(const std::vector<EdgePtr>& edges) {
-    for (const auto& edge : edges) {
-        // std::cout << edge->name() << "\n";
-        if (edge->getStatus() == Edge::Status::Uninitialized) {
-            if (edge->getParent()->getParentEdges().empty() &&
-                one_of(edge->getParent()->getType(), Type::Input, Type::MemoryInput) &&
-                edge->inPlace(Edge::LOOK_UP)) {
-                edge->getParent()->resolveInPlaceEdges(Edge::LOOK_UP);
-            } else if (edge->getChild()->getChildEdges().empty() &&
-                one_of(edge->getChild()->getType(), Type::Output, Type::MemoryOutput) &&
-                edge->inPlace(Edge::LOOK_DOWN)) {
-                edge->getChild()->resolveInPlaceEdges(Edge::LOOK_DOWN);
-            }
-        }
-    }
-}
-
 static void ResolveInOutInPlaceEdges(const std::vector<EdgePtr>& edges) {
     for (const auto& edge : edges) {
         if (edge->getStatus() == Edge::Status::Uninitialized) {
@@ -936,7 +895,6 @@ int Graph::RegisterToAllocationContext(int offset, AllocationContext& context) {
     auto syncNodesInds = CreateExecutionGraph();
 
     ResolveInOutInPlaceEdges(graphEdges);
-    // std::cout << "RegisterToAllocationContext: " << offset << "\n";
 
     // nodes are expected to be topologically sorted
     for (size_t execIndex = 0, j = 0; execIndex < graphNodes.size(); execIndex++) {
@@ -947,7 +905,6 @@ int Graph::RegisterToAllocationContext(int offset, AllocationContext& context) {
         const auto outputExecIndex = offset;
         offset++;
         context.execIndex[node] = {inputExecIndex, outputExecIndex};
-        // std::cout << node->getName() << " - " << "[" << inputExecIndex << "," << outputExecIndex << "] offset " << offset << "\n";
 
         if (j < syncNodesInds.size() && syncNodesInds[j] == execIndex) {
             context.syncPoints.push_back(inputExecIndex);
@@ -960,28 +917,6 @@ int Graph::RegisterToAllocationContext(int offset, AllocationContext& context) {
     return offset - 1;
 }
 
-AllocationContext Graph::CreateAllocationContext(bool global) {
-    AllocationContext allocationContext;
-
-    if (global) {
-        RegisterToAllocationContext(0, allocationContext);
-    } else { // local allocation context. Used for the nodes with inner graph which are not updated yet
-        ResolveInOutInPlaceEdgesLegacy(graphEdges);
-
-        auto syncNodesInds = CreateExecutionGraph();
-
-        for (size_t i = 0; i < graphNodes.size(); i++) {
-            const auto& node = graphNodes[i];
-            allocationContext.execIndex[node] = {i, i};
-        }
-
-        allocationContext.edges = graphEdges;
-        allocationContext.syncPoints = syncNodesInds;
-    }
-
-    return allocationContext;
-}
-
 static void InitEdgeStatus(const std::vector<EdgePtr>& edges) {
     for (auto& edge : edges) edge->init();
 }
@@ -1064,22 +999,14 @@ static MemoryRegions FormMemoryRegions(const EdgeClusters& clusters,
         int64_t boxSize = 0;
         bool isConst = false, isOutput = false, isInput = false;
 
-        // std::cout << "Form memory region for cluster: " << i << "\n";
-
         for (auto &edge : clusters[i]) {
             const auto& parent = edge->getParent();
             const auto& child = edge->getChild();
 
-            // @todo this is can be considered as a property of the node, whether it is going to use input / output memory multiple times
-            // in scope of its execution routine
-            int e_start = parent->getType() == Type::TensorIterator ?  globalExecIndex.at(parent).first : globalExecIndex.at(parent).second;
-            int e_finish = child->getType() == Type::TensorIterator ?  globalExecIndex.at(child).second : globalExecIndex.at(child).first;
-
-            // std::cout << "[" << e_start << " - " << e_finish << "]"
-            //           << edge->name()
-            //           << "\n";
-
-            // int e_finish = edge->getChild()->getExecIndex();
+            // If node uses its input / output memory multiple times in scope of a single execution (i.e TensorIterator)
+            // prolong the lifetime of a memory region till execution is finished
+            int e_start = parent->usesInOutMemoryMultipleTimes() ?  globalExecIndex.at(parent).first : globalExecIndex.at(parent).second;
+            int e_finish = child->usesInOutMemoryMultipleTimes() ?  globalExecIndex.at(child).second : globalExecIndex.at(child).first;
 
             auto&& desc = edge->getDesc();
 
@@ -1140,7 +1067,7 @@ static OutputMemoryBlocks FilterOutDynamicOutputEdges(MemoryRegions& memoryRegio
             if (child->getType() == Type::Output && edge->getStatus() == Edge::Status::NeedAllocation) {
                 auto proxyMemBlock = std::make_shared<ProxyMemoryBlock>();
                 DEBUG_LOG("ProxyMemoryBlock ", proxyMemBlock);
-                // std::cout << "Allocating output edge: " << edge->name() << "\n";
+
                 edge->allocate(proxyMemBlock);
 
                 // Store the output memory blocks.
@@ -1172,7 +1099,7 @@ static OutputMemoryBlocks FilterOutDynamicOutputEdges(MemoryRegions& memoryRegio
  * 2) OutputMemoryBlocks - to allow memory sharing between graph and infer request
  */
 static std::tuple<MemoryControl::MemorySolution, EdgeClusters, OutputMemoryBlocks>
-SolveMemoryReuse(MemoryControl* memoryControl,
+SolveMemoryReuse(const std::shared_ptr<MemoryControl>& memoryControl,
                  const AllocationContext& allocationContext,
                  const GraphContext::CPtr graphContext,
                  const std::map<std::size_t, NodePtr>& outputNodesMap) {
@@ -1197,26 +1124,14 @@ SolveMemoryReuse(MemoryControl* memoryControl,
 }
 
 void Graph::Allocate() {
-    auto globalAllocation = m_context->memoryReuseGlobal();
-
-    if (std::getenv("LOCAL_REUSE")) {
-        globalAllocation = false;
-    }
-
-    // Set up the memory control subsystem.
-    auto memoryControl = globalAllocation ? m_context->getMemoryControl() : m_context->getNetworkMemoryControl()->createMemoryControlUnit();
+    auto memoryControl = m_context->getMemoryControl();
 
-    // memory is already allocated globally
     if (memoryControl->allocated()) {
-        return;
+        return; // memory is already allocated globally
     }
 
-    auto allocationContext = CreateAllocationContext(globalAllocation);
-
-    for (const auto& entry : allocationContext.execIndex) {
-        OPENVINO_ASSERT(entry.second.first >= 0);
-        OPENVINO_ASSERT(entry.second.second >= 0);
-    }
+    AllocationContext allocationContext;
+    RegisterToAllocationContext(0, allocationContext);
 
     const auto& edges = allocationContext.edges;
     InitEdgeStatus(edges);
@@ -1225,21 +1140,12 @@ void Graph::Allocate() {
     EdgeClusters edgeClusters;
     std::tie(solution, edgeClusters, m_outputNodesMemBlocks) = SolveMemoryReuse(memoryControl, allocationContext, m_context, outputNodesMap);
 
-    // std::cout << "### Global edges:" << "\n";
-    // for (const auto& edge : edges) {
-    //     const auto& parent = edge->getParent();
-    //     const auto& child = edge->getChild();
-    //     std::cout << "[" << allocationContext.execIndex[parent].second << " - "
-    //               << (child->getType() == Type::TensorIterator ? allocationContext.execIndex[child].second : allocationContext.execIndex[child].first) << "]"
-    //               << edge->name()
-    //               << "\n";
-    // }
-
     AllocateBaseEdges(edgeClusters, solution);
 
     memoryControl->allocateMemory();
 
     AllocatedReferencingEdges(edgeClusters);
+
     ValidateEdgeStatus(edges);
 }