[onert/cpu] Support Q4_0 Gather (#14052)

This commit updates CPU backend to support Q4_0 Gather operation. It includes test for Q4_0. ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
Samsung · Sep 24, 2024 · 102163e · 102163e
1 parent 4d12964
commit 102163e
Show file tree

Hide file tree

Showing 8 changed files with 243 additions and 4 deletions.
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -571,7 +571,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
 
   auto fn = std::make_unique<ops::GatherLayer>();
 
-  fn->configure(input_tensor, indices_tensor, output_tensor, axis);
+  fn->configure(input_tensor, indices_tensor, output_tensor, axis, _external_context.get());
 
   _return_fn = std::move(fn);
 }

diff --git a/runtime/onert/backend/cpu/ops/GGMLHelper.cc b/runtime/onert/backend/cpu/ops/GGMLHelper.cc
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GGMLHelper.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ggml_type getGGMLType(ir::DataType type)
+{
+  switch (type)
+  {
+    case ir::DataType::FLOAT32:
+      return GGML_TYPE_F32;
+    case ir::DataType::QUANT_GGML_Q4_0:
+      return GGML_TYPE_Q4_0;
+    case ir::DataType::QUANT_GGML_Q8_0:
+      return GGML_TYPE_Q8_0;
+    case ir::DataType::INT32:
+      return GGML_TYPE_I32;
+    case ir::DataType::INT64:
+      return GGML_TYPE_I64;
+    default:
+      throw std::runtime_error("Unsupported data type");
+  }
+}
+
+struct ggml_tensor getGGMLTensor(const IPortableTensor *tensor)
+{
+  struct ggml_tensor res;
+
+  res.type = getGGMLType(tensor->data_type());
+  const auto rank = tensor->getShape().rank();
+  for (int i = 0; i < GGML_MAX_DIMS; ++i)
+  {
+    if (i >= rank)
+      res.ne[i] = 1;
+    else
+      res.ne[i] = tensor->getShape().dim(rank - i - 1);
+  }
+
+  res.nb[0] = ggml_type_size(res.type);
+  res.nb[1] = res.nb[0] * (res.ne[0] / ggml_blck_size(res.type));
+  for (int i = 2; i < GGML_MAX_DIMS; ++i)
+    res.nb[i] = res.nb[i - 1] * res.ne[i - 1];
+
+  res.op = GGML_OP_NONE;
+  res.grad = nullptr;
+  res.data = (void *)(tensor->buffer());
+
+  return res;
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/GGMLHelper.h b/runtime/onert/backend/cpu/ops/GGMLHelper.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_GGML_HELPER_H__
+#define __ONERT_BACKEND_CPU_GGML_HELPER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <ggml.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+struct ggml_tensor getGGMLTensor(const IPortableTensor *tensor);
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif
diff --git a/runtime/onert/backend/cpu/ops/GatherLayer.cc b/runtime/onert/backend/cpu/ops/GatherLayer.cc
@@ -17,6 +17,7 @@
 #include "GatherLayer.h"
 
 #include "OperationUtils.h"
+#include "GGMLHelper.h"
 
 #include <cker/operation/Gather.h>
 
@@ -30,12 +31,16 @@ namespace ops
 {
 
 void GatherLayer::configure(const IPortableTensor *input, const IPortableTensor *indices,
-                            IPortableTensor *output, int32_t axis)
+                            IPortableTensor *output, int32_t axis, ExternalContext *ctx)
 {
   _input = input;
   _indices = indices;
   _axis = axis;
   _output = output;
+  _ctx = ctx;
+
+  if (_input->data_type() == OperandType::QUANT_GGML_Q4_0)
+    ctx->initGgmlContext();
 }
 
 template <typename InputType> void GatherLayer::runByInputType()
@@ -69,6 +74,55 @@ template <typename InputType> void GatherLayer::runByInputType()
   }
 }
 
+void GatherLayer::runByGGMLQuantInputType()
+{
+  // Supporting condition
+  // Input: rank 2
+  // Indice: rank < 4 or rank 4 with dim(0) = 1, INT32
+  // Axis: 0
+  if (getShape(_input).DimensionsCount() != 2)
+    throw std::runtime_error("Gather: block quantized input tensor must be rank 2");
+
+  if (getShape(_indices).DimensionsCount() > 4 &&
+      (getShape(_indices).DimensionsCount() != 4 || getShape(_indices).Dims(0) != 1))
+    throw std::runtime_error("Gather: invalid indices tensor shape");
+
+  if (_indices->data_type() != ir::DataType::INT32)
+    throw std::runtime_error("Gather: indices tensor must be int32 type");
+
+  if (_axis != 0)
+    throw std::runtime_error("Gather: axis must be 0");
+
+  // convert tensor
+  auto input = getGGMLTensor(_input);
+  auto indices = getGGMLTensor(_indices);
+  auto output = getGGMLTensor(_output);
+  {
+    output.op = GGML_OP_GET_ROWS;
+    output.src[0] = &input;
+    output.src[1] = &indices;
+  }
+  auto *nodes = &output;
+
+  // create graph
+  struct ggml_cgraph graph;
+  {
+    memset(&graph, 0, sizeof(graph));
+    graph.n_nodes = 1;
+    graph.nodes = &nodes;
+  }
+
+  // get cplan
+  auto cplan = ggml_graph_plan(&graph, _ctx->ruy_context()->max_num_threads());
+  cplan.work_data = (uint8_t *)(malloc(cplan.work_size));
+
+  // compute
+  ggml_graph_compute(&graph, &cplan);
+
+  // free
+  free(cplan.work_data);
+}
+
 void GatherLayer::run()
 {
   switch (_input->data_type())
@@ -82,6 +136,9 @@ void GatherLayer::run()
     case OperandType::INT32:
       runByInputType<int32_t>();
       break;
+    case OperandType::QUANT_GGML_Q4_0:
+      runByGGMLQuantInputType();
+      break;
     default:
       throw std::runtime_error("Gather: unsupported input data type");
   }

diff --git a/runtime/onert/backend/cpu/ops/GatherLayer.h b/runtime/onert/backend/cpu/ops/GatherLayer.h
@@ -17,6 +17,8 @@
 #ifndef __ONERT_BACKEND_CPU_OPS_GATHERLAYER_H__
 #define __ONERT_BACKEND_CPU_OPS_GATHERLAYER_H__
 
+#include "../ExternalContext.h"
+
 #include <backend/IPortableTensor.h>
 
 #include <exec/IFunction.h>
@@ -33,26 +35,28 @@ namespace ops
 class GatherLayer : public ::onert::exec::IFunction
 {
 public:
-  GatherLayer() : _input{nullptr}, _indices{nullptr}, _output{nullptr}, _axis{-1}
+  GatherLayer() : _input{nullptr}, _indices{nullptr}, _output{nullptr}, _axis{-1}, _ctx{nullptr}
   {
     // DO NOTHING
   }
 
 public:
   void configure(const IPortableTensor *input, const IPortableTensor *indices,
-                 IPortableTensor *output, int32_t axis);
+                 IPortableTensor *output, int32_t axis, ExternalContext *ctx);
 
   void run() override;
 
 private:
   template <typename OpType> void runByInputType();
+  void runByGGMLQuantInputType();
 
 private:
   const IPortableTensor *_input;
   const IPortableTensor *_indices;
   IPortableTensor *_output;
 
   int32_t _axis;
+  ExternalContext *_ctx;
 };
 
 } // namespace ops

diff --git a/tests/nnfw_api/lib/CircleGen.cc b/tests/nnfw_api/lib/CircleGen.cc
@@ -280,6 +280,13 @@ uint32_t CircleGen::addOperatorFloorMod(const OperatorParams &params)
                                 circle::BuiltinOptions_NONE, 0);
 }
 
+uint32_t CircleGen::addOperatorGather(const OperatorParams &params, int axis, int batchdim)
+{
+  auto options = circle::CreateGatherOptions(_fbb, axis, batchdim).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_GATHER,
+                                circle::BuiltinOptions_GatherOptions, options);
+}
+
 uint32_t CircleGen::addOperatorGreater(const OperatorParams &params)
 {
   auto options = circle::CreateLessOptions(_fbb).Union();

diff --git a/tests/nnfw_api/lib/CircleGen.h b/tests/nnfw_api/lib/CircleGen.h
@@ -176,6 +176,7 @@ class CircleGen
   uint32_t addOperatorFullyConnected(const OperatorParams &params,
                                      circle::FullyConnectedOptionsWeightsFormat weights_format =
                                        circle::FullyConnectedOptionsWeightsFormat_DEFAULT);
+  uint32_t addOperatorGather(const OperatorParams &params, int axis = 0, int batchdim = 0);
   uint32_t addOperatorGreater(const OperatorParams &params);
   uint32_t addOperatorGreaterEqual(const OperatorParams &params);
   uint32_t addOperatorIf(const OperatorParams &params, uint32_t then_subg, uint32_t else_subg);

diff --git a/tests/nnfw_api/src/GenModelTests/one_op_tests/Gather.test.cc b/tests/nnfw_api/src/GenModelTests/one_op_tests/Gather.test.cc
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include "common.h"
+
+TEST_F(GenModelTest, OneOp_Gather_Q4_0)
+{
+  CircleGen cgen;
+
+  std::vector<float> params(4 * 32);
+  for (uint32_t i = 0; i < params.size(); i++)
+  {
+    uint32_t sign_bit = i % 2;
+    uint32_t multiple = i / 32 + 1;
+    uint32_t base = (i / 2) % 8;
+    if (sign_bit == 0)
+      base += 1;
+    params[i] = base * (0.01 * multiple) * (sign_bit ? -1 : 1);
+  }
+
+  auto input_vector = quantData(params, circle::TensorType::TensorType_GGML_Q4_0);
+  auto input_buf = cgen.addBuffer(input_vector);
+  int input = cgen.addTensor({{4, 32}, circle::TensorType::TensorType_GGML_Q4_0, input_buf});
+  int indice = cgen.addTensor({{1, 1}, circle::TensorType::TensorType_INT32});
+  int output = cgen.addTensor({{1, 32}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorGather({{input, indice}, {output}});
+  cgen.setInputsAndOutputs({indice}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+
+  TestCaseData tc;
+  tc.addInput<int32_t>({2});
+  tc.addOutput<float>(std::vector<float>{params.begin() + 64, params.begin() + 96});
+  _context->addTestCase(tc);
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}