[onert/cpu] Update ggml usage (#14194)

This commit updates usage of ggml in Gather and FullyConnected - Use vector for plan buffer - Use general thread number getter ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
Samsung · Oct 10, 2024 · 63d7ff2 · 63d7ff2
1 parent 21d3d02
commit 63d7ff2
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 9 deletions.
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
@@ -204,14 +204,12 @@ void FullyConnectedLayer::fullyConnectedGGMLWeight()
   }
 
   // get cplan
-  auto cplan = ggml_graph_plan(&graph, _external_context->ruy_context()->max_num_threads());
-  cplan.work_data = (uint8_t *)(malloc(cplan.work_size));
+  auto cplan = ggml_graph_plan(&graph, _external_context->maxNumThreads());
+  std::vector<uint8_t> buf(cplan.work_size);
+  cplan.work_data = buf.data();
 
   // compute
   ggml_graph_compute(&graph, &cplan);
-
-  // free
-  free(cplan.work_data);
 }
 
 void FullyConnectedLayer::fullyConnected16x1Float32()

diff --git a/runtime/onert/backend/cpu/ops/GatherLayer.cc b/runtime/onert/backend/cpu/ops/GatherLayer.cc
@@ -114,13 +114,11 @@ void GatherLayer::runByGGMLQuantInputType()
 
   // get cplan
   auto cplan = ggml_graph_plan(&graph, _ctx->maxNumThreads());
-  cplan.work_data = (uint8_t *)(malloc(cplan.work_size));
+  std::vector<uint8_t> buf(cplan.work_size);
+  cplan.work_data = buf.data();
 
   // compute
   ggml_graph_compute(&graph, &cplan);
-
-  // free
-  free(cplan.work_data);
 }
 
 void GatherLayer::run()