Skip to content

Commit

Permalink
[onert/cpu] Support Q4_0 Gather (#14052)
Browse files Browse the repository at this point in the history
This commit updates CPU backend to support Q4_0 Gather operation.
It includes test for Q4_0.

ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
  • Loading branch information
hseok-oh authored Sep 24, 2024
1 parent 4d12964 commit 102163e
Show file tree
Hide file tree
Showing 8 changed files with 243 additions and 4 deletions.
2 changes: 1 addition & 1 deletion runtime/onert/backend/cpu/KernelGenerator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)

auto fn = std::make_unique<ops::GatherLayer>();

fn->configure(input_tensor, indices_tensor, output_tensor, axis);
fn->configure(input_tensor, indices_tensor, output_tensor, axis, _external_context.get());

_return_fn = std::move(fn);
}
Expand Down
76 changes: 76 additions & 0 deletions runtime/onert/backend/cpu/ops/GGMLHelper.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in riting, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "GGMLHelper.h"

namespace onert
{
namespace backend
{
namespace cpu
{
namespace ops
{

ggml_type getGGMLType(ir::DataType type)
{
switch (type)
{
case ir::DataType::FLOAT32:
return GGML_TYPE_F32;
case ir::DataType::QUANT_GGML_Q4_0:
return GGML_TYPE_Q4_0;
case ir::DataType::QUANT_GGML_Q8_0:
return GGML_TYPE_Q8_0;
case ir::DataType::INT32:
return GGML_TYPE_I32;
case ir::DataType::INT64:
return GGML_TYPE_I64;
default:
throw std::runtime_error("Unsupported data type");
}
}

struct ggml_tensor getGGMLTensor(const IPortableTensor *tensor)
{
struct ggml_tensor res;

res.type = getGGMLType(tensor->data_type());
const auto rank = tensor->getShape().rank();
for (int i = 0; i < GGML_MAX_DIMS; ++i)
{
if (i >= rank)
res.ne[i] = 1;
else
res.ne[i] = tensor->getShape().dim(rank - i - 1);
}

res.nb[0] = ggml_type_size(res.type);
res.nb[1] = res.nb[0] * (res.ne[0] / ggml_blck_size(res.type));
for (int i = 2; i < GGML_MAX_DIMS; ++i)
res.nb[i] = res.nb[i - 1] * res.ne[i - 1];

res.op = GGML_OP_NONE;
res.grad = nullptr;
res.data = (void *)(tensor->buffer());

return res;
}

} // namespace ops
} // namespace cpu
} // namespace backend
} // namespace onert
40 changes: 40 additions & 0 deletions runtime/onert/backend/cpu/ops/GGMLHelper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in riting, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __ONERT_BACKEND_CPU_GGML_HELPER_H__
#define __ONERT_BACKEND_CPU_GGML_HELPER_H__

#include <backend/IPortableTensor.h>

#include <ggml.h>

namespace onert
{
namespace backend
{
namespace cpu
{
namespace ops
{

struct ggml_tensor getGGMLTensor(const IPortableTensor *tensor);

} // namespace ops
} // namespace cpu
} // namespace backend
} // namespace onert

#endif
59 changes: 58 additions & 1 deletion runtime/onert/backend/cpu/ops/GatherLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "GatherLayer.h"

#include "OperationUtils.h"
#include "GGMLHelper.h"

#include <cker/operation/Gather.h>

Expand All @@ -30,12 +31,16 @@ namespace ops
{

void GatherLayer::configure(const IPortableTensor *input, const IPortableTensor *indices,
IPortableTensor *output, int32_t axis)
IPortableTensor *output, int32_t axis, ExternalContext *ctx)
{
_input = input;
_indices = indices;
_axis = axis;
_output = output;
_ctx = ctx;

if (_input->data_type() == OperandType::QUANT_GGML_Q4_0)
ctx->initGgmlContext();
}

template <typename InputType> void GatherLayer::runByInputType()
Expand Down Expand Up @@ -69,6 +74,55 @@ template <typename InputType> void GatherLayer::runByInputType()
}
}

void GatherLayer::runByGGMLQuantInputType()
{
// Supporting condition
// Input: rank 2
// Indice: rank < 4 or rank 4 with dim(0) = 1, INT32
// Axis: 0
if (getShape(_input).DimensionsCount() != 2)
throw std::runtime_error("Gather: block quantized input tensor must be rank 2");

if (getShape(_indices).DimensionsCount() > 4 &&
(getShape(_indices).DimensionsCount() != 4 || getShape(_indices).Dims(0) != 1))
throw std::runtime_error("Gather: invalid indices tensor shape");

if (_indices->data_type() != ir::DataType::INT32)
throw std::runtime_error("Gather: indices tensor must be int32 type");

if (_axis != 0)
throw std::runtime_error("Gather: axis must be 0");

// convert tensor
auto input = getGGMLTensor(_input);
auto indices = getGGMLTensor(_indices);
auto output = getGGMLTensor(_output);
{
output.op = GGML_OP_GET_ROWS;
output.src[0] = &input;
output.src[1] = &indices;
}
auto *nodes = &output;

// create graph
struct ggml_cgraph graph;
{
memset(&graph, 0, sizeof(graph));
graph.n_nodes = 1;
graph.nodes = &nodes;
}

// get cplan
auto cplan = ggml_graph_plan(&graph, _ctx->ruy_context()->max_num_threads());
cplan.work_data = (uint8_t *)(malloc(cplan.work_size));

// compute
ggml_graph_compute(&graph, &cplan);

// free
free(cplan.work_data);
}

void GatherLayer::run()
{
switch (_input->data_type())
Expand All @@ -82,6 +136,9 @@ void GatherLayer::run()
case OperandType::INT32:
runByInputType<int32_t>();
break;
case OperandType::QUANT_GGML_Q4_0:
runByGGMLQuantInputType();
break;
default:
throw std::runtime_error("Gather: unsupported input data type");
}
Expand Down
8 changes: 6 additions & 2 deletions runtime/onert/backend/cpu/ops/GatherLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#ifndef __ONERT_BACKEND_CPU_OPS_GATHERLAYER_H__
#define __ONERT_BACKEND_CPU_OPS_GATHERLAYER_H__

#include "../ExternalContext.h"

#include <backend/IPortableTensor.h>

#include <exec/IFunction.h>
Expand All @@ -33,26 +35,28 @@ namespace ops
class GatherLayer : public ::onert::exec::IFunction
{
public:
GatherLayer() : _input{nullptr}, _indices{nullptr}, _output{nullptr}, _axis{-1}
GatherLayer() : _input{nullptr}, _indices{nullptr}, _output{nullptr}, _axis{-1}, _ctx{nullptr}
{
// DO NOTHING
}

public:
void configure(const IPortableTensor *input, const IPortableTensor *indices,
IPortableTensor *output, int32_t axis);
IPortableTensor *output, int32_t axis, ExternalContext *ctx);

void run() override;

private:
template <typename OpType> void runByInputType();
void runByGGMLQuantInputType();

private:
const IPortableTensor *_input;
const IPortableTensor *_indices;
IPortableTensor *_output;

int32_t _axis;
ExternalContext *_ctx;
};

} // namespace ops
Expand Down
7 changes: 7 additions & 0 deletions tests/nnfw_api/lib/CircleGen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,13 @@ uint32_t CircleGen::addOperatorFloorMod(const OperatorParams &params)
circle::BuiltinOptions_NONE, 0);
}

uint32_t CircleGen::addOperatorGather(const OperatorParams &params, int axis, int batchdim)
{
auto options = circle::CreateGatherOptions(_fbb, axis, batchdim).Union();
return addOperatorWithOptions(params, circle::BuiltinOperator_GATHER,
circle::BuiltinOptions_GatherOptions, options);
}

uint32_t CircleGen::addOperatorGreater(const OperatorParams &params)
{
auto options = circle::CreateLessOptions(_fbb).Union();
Expand Down
1 change: 1 addition & 0 deletions tests/nnfw_api/lib/CircleGen.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ class CircleGen
uint32_t addOperatorFullyConnected(const OperatorParams &params,
circle::FullyConnectedOptionsWeightsFormat weights_format =
circle::FullyConnectedOptionsWeightsFormat_DEFAULT);
uint32_t addOperatorGather(const OperatorParams &params, int axis = 0, int batchdim = 0);
uint32_t addOperatorGreater(const OperatorParams &params);
uint32_t addOperatorGreaterEqual(const OperatorParams &params);
uint32_t addOperatorIf(const OperatorParams &params, uint32_t then_subg, uint32_t else_subg);
Expand Down
54 changes: 54 additions & 0 deletions tests/nnfw_api/src/GenModelTests/one_op_tests/Gather.test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "GenModelTest.h"

#include "common.h"

TEST_F(GenModelTest, OneOp_Gather_Q4_0)
{
CircleGen cgen;

std::vector<float> params(4 * 32);
for (uint32_t i = 0; i < params.size(); i++)
{
uint32_t sign_bit = i % 2;
uint32_t multiple = i / 32 + 1;
uint32_t base = (i / 2) % 8;
if (sign_bit == 0)
base += 1;
params[i] = base * (0.01 * multiple) * (sign_bit ? -1 : 1);
}

auto input_vector = quantData(params, circle::TensorType::TensorType_GGML_Q4_0);
auto input_buf = cgen.addBuffer(input_vector);
int input = cgen.addTensor({{4, 32}, circle::TensorType::TensorType_GGML_Q4_0, input_buf});
int indice = cgen.addTensor({{1, 1}, circle::TensorType::TensorType_INT32});
int output = cgen.addTensor({{1, 32}, circle::TensorType::TensorType_FLOAT32});

cgen.addOperatorGather({{input, indice}, {output}});
cgen.setInputsAndOutputs({indice}, {output});

_context = std::make_unique<GenModelTestContext>(cgen.finish());

TestCaseData tc;
tc.addInput<int32_t>({2});
tc.addOutput<float>(std::vector<float>{params.begin() + 64, params.begin() + 96});
_context->addTestCase(tc);
_context->setBackends({"cpu"});

SUCCEED();
}

0 comments on commit 102163e

Please sign in to comment.