Skip to content

Commit

Permalink
[GPU] Update dft to use ngraph shape inference (openvinotoolkit#22567)
Browse files Browse the repository at this point in the history
### Details:
 - Update dft to use ngraph shape inference
 - Single layer test for dynamic shape input

### Tickets:
 - 130775
 - 100218
  • Loading branch information
wilson-seok authored Feb 13, 2024
1 parent b7c8107 commit 777aac9
Show file tree
Hide file tree
Showing 8 changed files with 897 additions and 30 deletions.
43 changes: 43 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/primitives/dft.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,49 @@ struct dft : public primitive_base<dft> {
direction(direction),
mode(mode) {}

/// @brief Constructs DFT primitive for dynamic shape input. # of input is 2.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param axes Axes to perform DFT.
/// @param direction Direction of DFT operation.
/// @param mode Mode of DFT operation.
dft(const primitive_id& id,
const input_info& input,
const input_info& axes,
std::vector<int64_t> constant_axes,
dft_direction direction,
dft_mode mode,
const padding& output_padding = {})
: primitive_base(id, {input, axes}, {output_padding}),
axes(constant_axes),
signal_size({}),
output_shape(ov::Shape(0)),
direction(direction),
mode(mode) {}

/// @brief Constructs DFT primitive for dynamic shape input. # of input is 3.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param axes Axes to perform DFT.
/// @param signal_size Signal sizes for 'axes'.
/// @param direction Direction of DFT operation.
/// @param mode Mode of DFT operation.
dft(const primitive_id& id,
const input_info& input,
const input_info& axes,
const input_info& signal_size,
std::vector<int64_t> constant_axes,
std::vector<int64_t> constant_signal_size,
dft_direction direction,
dft_mode mode,
const padding& output_padding = {})
: primitive_base(id, {input, axes, signal_size}, {output_padding}),
axes(constant_axes),
signal_size(constant_signal_size),
output_shape(ov::Shape(0)),
direction(direction),
mode(mode) {}

std::vector<int64_t> axes;
std::vector<int64_t> signal_size;
ov::Shape output_shape;
Expand Down
94 changes: 90 additions & 4 deletions src/plugins/intel_gpu/src/graph/dft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@
// SPDX-License-Identifier: Apache-2.0
//

#include <dft_inst.h>
#include <primitive_type_base.h>
#include "dft_inst.h"
#include "primitive_type_base.h"
#include "fft_base_shape_inference.hpp"
#include "rdft_shape_inference.hpp"
#include "irdft_shape_inference.hpp"

#include "json_object.h"

namespace cldnn {
GPU_DEFINE_PRIMITIVE_TYPE_ID(dft)

layout dft_inst::calc_output_layout(const dft_node& node, const kernel_impl_params& impl_param) {
layout dft_inst::calc_output_layout(dft_node const& node, kernel_impl_params const& impl_param) {
const auto primitive = impl_param.typed_desc<dft>();
const auto input_layout = impl_param.get_input_layout();

Expand All @@ -36,7 +39,89 @@ layout dft_inst::calc_output_layout(const dft_node& node, const kernel_impl_para
return {input_layout.data_type, output_format, tensor(output_format, dims_converted)};
}

std::string dft_inst::to_string(const dft_node& node) {
template<typename ShapeType>
std::vector<layout> dft_inst::calc_output_layouts(dft_node const& /*node*/, kernel_impl_params const& impl_param) {
std::vector<layout> layouts;

const auto primitive = impl_param.typed_desc<dft>();
const auto input0_layout = impl_param.get_input_layout(0);
const auto input1_layout = impl_param.get_input_layout(1);

std::vector<ShapeType> input_shapes = {
input0_layout.get<ShapeType>(),
input1_layout.get<ShapeType>()
};

if (impl_param.input_layouts.size() == 3)
input_shapes.push_back(impl_param.get_input_layout(2).get<ShapeType>());

std::vector<ShapeType> output_shapes = {ShapeType()};
std::unordered_map<size_t, ov::Tensor> const_data;
ov::Tensor axes_tensor, signal_size_tensor;

auto& memory_deps = impl_param.memory_deps;

// Consider axes and signal_size are constant case
if ((primitive->axes.size() > 0) &&
((impl_param.input_layouts.size() == 2) || (primitive->signal_size.size() > 0))) {
auto axes_ptr = reinterpret_cast<uint8_t*>(const_cast<int64_t*>(primitive->axes.data()));
axes_tensor = ov::Tensor(ov::element::i64, ov::Shape({primitive->axes.size()}), axes_ptr, {});
const_data.emplace(1, axes_tensor);

if (primitive->signal_size.size() > 0) {
auto signal_size_ptr = reinterpret_cast<uint8_t*>(const_cast<int64_t*>(primitive->signal_size.data()));
signal_size_tensor = ov::Tensor(ov::element::i64, ov::Shape({primitive->signal_size.size()}), signal_size_ptr, {});
const_data.emplace(2, signal_size_tensor);
}
} else {
if (memory_deps.count(1)) {
auto axes_mem = memory_deps.at(1);
cldnn::mem_lock<uint8_t, mem_lock_type::read> axes_lock(axes_mem, impl_param.get_stream());
axes_tensor = make_tensor(axes_mem->get_layout(), axes_lock.data());
const_data.emplace(1, axes_tensor);

if (memory_deps.count(2)) {
auto signal_size_mem = memory_deps.at(2);
cldnn::mem_lock<uint8_t, mem_lock_type::read> signal_size_lock(signal_size_mem, impl_param.get_stream());
signal_size_tensor = make_tensor(signal_size_mem->get_layout(), signal_size_lock.data());
const_data.emplace(2, signal_size_tensor);
}
}
}

const auto tensor_accessor = ov::make_tensor_accessor(const_data);
if (primitive->mode == cldnn::dft_mode::complex) {
if (primitive->direction == cldnn::dft_direction::forward) {
ov::op::v7::DFT op;

output_shapes = ov::op::shape_infer(&op, input_shapes, tensor_accessor);
} else {
ov::op::v7::IDFT op;

output_shapes = ov::op::shape_infer(&op, input_shapes, tensor_accessor);
}
} else {
if (primitive->direction == cldnn::dft_direction::forward) {
ov::op::v9::RDFT op;

output_shapes = ov::op::v9::shape_infer(&op, input_shapes, tensor_accessor);
} else {
ov::op::v9::IRDFT op;

output_shapes = ov::op::v9::shape_infer(&op, input_shapes, tensor_accessor);
}
}

auto dt = primitive->get_output_data_type(0).value_or(impl_param.get_input_layout(0).data_type);
format output_format = format::adjust_to_rank(input0_layout.format, output_shapes[0].size());
layouts.push_back(layout{output_shapes[0], dt, output_format});

return layouts;
}

template std::vector<layout> dft_inst::calc_output_layouts<ov::PartialShape>(dft_node const& node, kernel_impl_params const& impl_param);

std::string dft_inst::to_string(dft_node const& node) {
auto desc = node.get_primitive();
auto node_info = node.desc_to_json();
json_composite dft_info;
Expand All @@ -53,4 +138,5 @@ std::string dft_inst::to_string(const dft_node& node) {
return os.str();
}

dft_inst::typed_primitive_inst(network& network, dft_node const& node) : parent(network, node) {}
} // namespace cldnn
45 changes: 41 additions & 4 deletions src/plugins/intel_gpu/src/graph/impls/ocl/dft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,49 @@ struct dft_impl : typed_primitive_impl_ocl<dft> {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
const auto primitive = impl_param.typed_desc<dft>();
auto params = get_default_params<kernel_selector::dft_params>(impl_param);
params.axes = primitive->axes;
auto& memory_deps = impl_param.memory_deps;

bool allow_new_shape_infer = impl_param.get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
if (allow_new_shape_infer && primitive->axes.empty() && primitive->signal_size.empty()) {
if (memory_deps.count(1)) {
auto axes_mem = memory_deps.at(1);
cldnn::mem_lock<uint8_t, mem_lock_type::read> axes_lock(axes_mem, impl_param.get_stream());

std::vector<int64_t> axes;
for (size_t i = 0; i < impl_param.get_input_layout(1).count(); i++) {
if (axes_mem->get_layout().data_type == cldnn::data_types::i64) {
axes.push_back(reinterpret_cast<int64_t*>(axes_lock.data())[i]);
} else {
axes.push_back(static_cast<int64_t>(reinterpret_cast<int32_t*>(axes_lock.data())[i]));
}
}
params.axes = axes;
}

if (primitive->signal_size.empty()) {
params.signal_size = std::vector<int64_t>(params.axes.size(), -1);
if (memory_deps.count(2)) {
auto signal_size_mem = memory_deps.at(2);
cldnn::mem_lock<uint8_t, mem_lock_type::read> signal_size_lock(signal_size_mem, impl_param.get_stream());

std::vector<int64_t> signal_size;
for (size_t i = 0; i < impl_param.get_input_layout(2).count(); i++) {
if (signal_size_mem->get_layout().data_type == cldnn::data_types::i64) {
signal_size.push_back(reinterpret_cast<int64_t*>(signal_size_lock.data())[i]);
} else {
signal_size.push_back(static_cast<int64_t>(reinterpret_cast<int32_t*>(signal_size_lock.data())[i]));
}
}
params.signal_size = signal_size;
} else {
params.signal_size = std::vector<int64_t>(params.axes.size(), -1);
}
} else {
params.signal_size = primitive->signal_size;
params.axes = primitive->axes;

if (primitive->signal_size.empty()) {
params.signal_size = std::vector<int64_t>(params.axes.size(), -1);
} else {
params.signal_size = primitive->signal_size;
}
}

if (primitive->direction == dft_direction::inverse) {
Expand Down
26 changes: 23 additions & 3 deletions src/plugins/intel_gpu/src/graph/include/dft_inst.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,36 @@
#include "primitive_inst.h"

namespace cldnn {
template <>
struct typed_program_node<dft> : public typed_program_node_base<dft> {
using parent = typed_program_node_base<dft>;

public:
using parent::parent;

program_node& input(size_t idx = 0) const { return get_dependency(idx); }
std::vector<size_t> get_shape_infer_dependencies() const override {
if (this->get_dependencies().size() == 3)
return {1, 2};
else
return {1};
}
};

using dft_node = typed_program_node<dft>;

template <>
class typed_primitive_inst<dft> : public typed_primitive_inst_base<dft> {
using parent = typed_primitive_inst_base<dft>;
using parent::parent;

public:
using typed_primitive_inst_base::typed_primitive_inst_base;
template<typename ShapeType>
static std::vector<layout> calc_output_layouts(dft_node const& /*node*/, kernel_impl_params const& impl_param);
static layout calc_output_layout(dft_node const& node, kernel_impl_params const& impl_param);
static std::string to_string(dft_node const& node);

static layout calc_output_layout(const dft_node& node, const kernel_impl_params& impl_param);
static std::string to_string(const dft_node& node);
typed_primitive_inst(network& network, dft_node const& node);
};

using dft_inst = typed_primitive_inst<dft>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ JitConstants DFTKernelRef::GetJitConstants(const dft_params& params) const {
auto jit = MakeBaseParamsJitConstants(params);
const auto out_rank = params.outputs.front().Dimentions();
const auto out_sizes = params.outputs.front().LogicalDims();
const auto in_rank = params.inputs.front().Dimentions();
const auto in_sizes = params.inputs.front().LogicalDims();
const auto dims_size = in_sizes.size() - 1;
auto signal_sizes = out_sizes;
Expand All @@ -210,6 +211,16 @@ JitConstants DFTKernelRef::GetJitConstants(const dft_params& params) const {
for (size_t i = 0; i < params.axes.size(); ++i) {
// opencl kernels have inverted order of dimensions with respect to axis spec: x is smallest index, b is largest
auto axis = params.axes[i];

// when axis is negative value, convert to positive.
if (axis < 0) {
// RDFT has converted by r + a, others r -1 + a by op specification
if (params.mode == dft_params::Mode::real && params.direction == dft_params::Direction::forward)
axis = out_rank -1 + axis; // (out_rank-1) is in_rank
else
axis = in_rank -1 + axis;
}

auto inverted_axis = dims_size - axis;
auto signal_size = params.signal_size[i];

Expand Down
66 changes: 47 additions & 19 deletions src/plugins/intel_gpu/src/plugin/ops/dft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,27 +27,55 @@ void createDft(ProgramBuilder& p,
const auto inputs = p.GetInputInfo(op);
const auto layer_name = layer_type_name_ID(op);
const auto& friendly_name = op->get_friendly_name();
const auto& out_shape = op->get_output_shape(0);

auto axes_constant = std::dynamic_pointer_cast<ov::op::v0::Constant>(op->get_input_node_shared_ptr(1));
OPENVINO_ASSERT(axes_constant != nullptr, "[GPU] Unsupported parameter nodes type in ", friendly_name, " (", op->get_type_name(), ")");
auto axes = axes_constant->cast_vector<int64_t>();
uint8_t axis_correction = static_cast<uint8_t>(op->get_input_shape(0).size());
if (direction != cldnn::dft_direction::forward || mode != cldnn::dft_mode::real) {
--axis_correction;
}
ov::util::normalize_axes(op.get(), axis_correction, axes);

std::vector<int64_t> signal_size;
if (op->get_input_size() == 3) {
auto signal_size_constant = std::dynamic_pointer_cast<ov::op::v0::Constant>(op->get_input_node_shared_ptr(2));
OPENVINO_ASSERT(signal_size_constant != nullptr, "[GPU] Unsupported parameter nodes type in ", friendly_name, " (", op->get_type_name(), ")");
signal_size = signal_size_constant->cast_vector<int64_t>();
if (op->is_dynamic() && p.use_new_shape_infer()) {
std::vector<int64_t> axes;
auto axes_constant = std::dynamic_pointer_cast<ov::op::v0::Constant>(op->get_input_node_shared_ptr(1));
if (axes_constant != nullptr) {
axes = axes_constant->cast_vector<int64_t>();
uint8_t axis_correction = static_cast<uint8_t>(op->get_input_partial_shape(0).size());
if (direction != cldnn::dft_direction::forward || mode != cldnn::dft_mode::real) {
--axis_correction;
}
ov::util::normalize_axes(op.get(), axis_correction, axes);
}

if (op->get_input_size() == 3) {
std::vector<int64_t> signal_size;
auto signal_size_constant = std::dynamic_pointer_cast<ov::op::v0::Constant>(op->get_input_node_shared_ptr(2));
if (signal_size_constant != nullptr) {
signal_size = signal_size_constant->cast_vector<int64_t>();
}

const cldnn::dft prim(layer_name, inputs[0], inputs[1], inputs[2], axes, signal_size, direction, mode);
p.add_primitive(*op, prim);
} else {
const cldnn::dft prim(layer_name, inputs[0], inputs[1], axes, direction, mode);
p.add_primitive(*op, prim);
}
} else {
const auto& out_shape = op->get_output_shape(0);

auto axes_constant = std::dynamic_pointer_cast<ov::op::v0::Constant>(op->get_input_node_shared_ptr(1));
OPENVINO_ASSERT(axes_constant != nullptr, "[GPU] Unsupported parameter nodes type in ", friendly_name, " (", op->get_type_name(), ")");
auto axes = axes_constant->cast_vector<int64_t>();
uint8_t axis_correction = static_cast<uint8_t>(op->get_input_shape(0).size());
if (direction != cldnn::dft_direction::forward || mode != cldnn::dft_mode::real) {
--axis_correction;
}
ov::util::normalize_axes(op.get(), axis_correction, axes);

std::vector<int64_t> signal_size;
if (op->get_input_size() == 3) {
auto signal_size_constant = std::dynamic_pointer_cast<ov::op::v0::Constant>(op->get_input_node_shared_ptr(2));
OPENVINO_ASSERT(signal_size_constant != nullptr, "[GPU] Unsupported parameter nodes type in ", friendly_name, " (", op->get_type_name(), ")");
signal_size = signal_size_constant->cast_vector<int64_t>();
}

const cldnn::dft prim(layer_name, inputs.front(), axes, signal_size, out_shape, direction, mode);

p.add_primitive(*op, prim);
}

const cldnn::dft prim(layer_name, inputs.front(), axes, signal_size, out_shape, direction, mode);

p.add_primitive(*op, prim);
}

void CreateDFTOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v7::DFT>& op) {
Expand Down
Loading

0 comments on commit 777aac9

Please sign in to comment.