diff --git a/src/frontends/onnx/frontend/include/openvino/frontend/onnx/decoder.hpp b/src/frontends/onnx/frontend/include/openvino/frontend/onnx/decoder.hpp index 69d1a518018616..9f325991a183b5 100644 --- a/src/frontends/onnx/frontend/include/openvino/frontend/onnx/decoder.hpp +++ b/src/frontends/onnx/frontend/include/openvino/frontend/onnx/decoder.hpp @@ -7,6 +7,7 @@ #include "openvino/core/type/element_type.hpp" #include "openvino/frontend/decoder.hpp" #include "openvino/frontend/onnx/visibility.hpp" +#include "openvino/runtime/aligned_buffer.hpp" namespace ov { namespace frontend { @@ -15,12 +16,8 @@ namespace onnx { struct ONNX_FRONTEND_API TensorMetaInfo { ov::PartialShape m_partial_shape; ov::element::Type m_element_type; - const uint8_t* m_tensor_data; - ov::Any m_tensor_data_any; - size_t m_tensor_data_size; + std::shared_ptr m_buffer; const std::string* m_tensor_name; - std::shared_ptr m_external_location; - bool m_is_raw; }; class ONNX_FRONTEND_API DecoderBase : public ov::frontend::DecoderBase { diff --git a/src/frontends/onnx/frontend/src/core/decoder_proto.hpp b/src/frontends/onnx/frontend/src/core/decoder_proto.hpp index d902b9a7abc4ad..42b4c34a7f732f 100644 --- a/src/frontends/onnx/frontend/src/core/decoder_proto.hpp +++ b/src/frontends/onnx/frontend/src/core/decoder_proto.hpp @@ -66,8 +66,6 @@ class DecoderProtoTensor : public ov::frontend::onnx::DecoderBaseTensor { m_tensor_meta_info.m_tensor_name = &name; m_tensor_meta_info.m_element_type = ov::element::dynamic; m_tensor_meta_info.m_partial_shape = ov::PartialShape::dynamic(); - m_tensor_meta_info.m_tensor_data = nullptr; - m_tensor_meta_info.m_tensor_data_size = 0; } const ov::frontend::onnx::TensorMetaInfo& get_tensor_info() const override { diff --git a/src/frontends/onnx/frontend/src/core/graph_iterator_proto.cpp b/src/frontends/onnx/frontend/src/core/graph_iterator_proto.cpp index 4b55c5f94ed06c..7e03ec218e431f 100644 --- a/src/frontends/onnx/frontend/src/core/graph_iterator_proto.cpp +++ b/src/frontends/onnx/frontend/src/core/graph_iterator_proto.cpp @@ -16,10 +16,12 @@ #include #include "decoder_proto.hpp" +#include "openvino/core/type/element_iterator.hpp" #include "openvino/frontend/graph_iterator.hpp" #include "openvino/frontend/onnx/graph_iterator.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/wstring_convert_util.hpp" +#include "utils/tensor_external_data.hpp" namespace { // THis is copied from utils/common.hpp @@ -76,95 +78,34 @@ namespace { bool extract_tensor_external_data(ov::frontend::onnx::TensorMetaInfo& tensor_meta_info, const TensorProto* tensor_info, GraphIteratorProto* graph_iterator) { - std::string ext_location{}; - uint64_t ext_data_offset = 0; - uint64_t ext_data_length = 0; - std::string m_sha1_digest{}; // for future use - for (const auto& entry : tensor_info->external_data()) { - if (entry.key() == "location") { - ext_location = ov::util::sanitize_path(entry.value()); - } else if (entry.key() == "offset") { - ext_data_offset = std::stoull(entry.value()); - } else if (entry.key() == "length") { - ext_data_length = std::stoull(entry.value()); - } else if (entry.key() == "checksum") { - m_sha1_digest = entry.value(); - } - } - const auto full_path = - ov::util::get_absolute_file_path(ov::util::path_join({graph_iterator->get_model_dir(), ext_location}).string()); - const int64_t file_size = ov::util::file_size(full_path); - if ((file_size <= 0 && ext_data_length > 0) || - ext_data_offset + ext_data_length > static_cast(file_size)) { - // not_existed_file.data, offset: 4096, data_length: 16) - std::stringstream ss; - ss << "Invalid usage of method for externally stored data in file (" << ext_location; - ss << ", offset: " << ext_data_offset << ", data_length: " << ext_data_length << ")"; - throw std::runtime_error(ss.str()); + const auto ext_data = detail::TensorExternalData(*tensor_info); + if (ext_data.data_location() == detail::ORT_MEM_ADDR) { + tensor_meta_info.m_buffer = ext_data.load_external_mem_data(); + } else if (graph_iterator->get_mmap_cache()) { + tensor_meta_info.m_buffer = + ext_data.load_external_mmap_data(graph_iterator->get_model_dir(), graph_iterator->get_mmap_cache()); + } else { + tensor_meta_info.m_buffer = ext_data.load_external_data(graph_iterator->get_model_dir()); } - auto memory_mode = graph_iterator->get_memory_management_mode(); - if (ext_location == "*/_ORT_MEM_ADDR_/*") { - // Specific ONNX Runtime Case when it passes a model with self-managed data - tensor_meta_info.m_is_raw = true; - tensor_meta_info.m_tensor_data = reinterpret_cast(ext_data_offset); - tensor_meta_info.m_tensor_data_size = ext_data_length; - return true; - } else if (memory_mode == External_MMAP) { - auto cache = graph_iterator->get_mmap_cache(); - auto cached_mapped_memory = cache->find(full_path); - std::shared_ptr mapped_memory; - if (cached_mapped_memory != cache->end()) { - mapped_memory = cached_mapped_memory->second; - } else { - mapped_memory = ov::load_mmap_object(full_path); - (*cache)[full_path] = mapped_memory; - } - tensor_meta_info.m_is_raw = true; - tensor_meta_info.m_tensor_data = - static_cast(static_cast(mapped_memory->data() + ext_data_offset)); - tensor_meta_info.m_tensor_data_size = - ext_data_length > 0 ? ext_data_length : static_cast(file_size) - ext_data_length; - return true; - } else if (memory_mode == External_Stream) { - auto cache = graph_iterator->get_stream_cache(); - auto cached_stream = cache->find(full_path); - std::shared_ptr external_data_stream; - if (cached_stream != cache->end()) { - external_data_stream = cached_stream->second; - } else { - external_data_stream = { - new std::ifstream(full_path.c_str(), std::ios::binary | std::ios::in | std::ios::ate), - [](std::ifstream* p) { - p->close(); - delete p; - }}; - (*cache)[full_path] = external_data_stream; - } - - if (external_data_stream->fail() || !external_data_stream->good()) { - throw std::runtime_error("Failed to open external data stream"); - } + return tensor_meta_info.m_buffer != nullptr; +} - tensor_meta_info.m_is_raw = true; - tensor_meta_info.m_tensor_data_size = - ext_data_length > 0 ? ext_data_length : static_cast(file_size) - ext_data_length; - uint8_t* data_ptr = graph_iterator->allocate_data(tensor_meta_info.m_tensor_data_size).get(); - tensor_meta_info.m_tensor_data = data_ptr; - - // default value of m_offset is 0 - external_data_stream->seekg(ext_data_offset, std::ios::beg); - - external_data_stream->read(static_cast(static_cast(data_ptr)), - tensor_meta_info.m_tensor_data_size); - return true; - } else if (memory_mode == Internal_MMAP || memory_mode == Internal_Stream) { - tensor_meta_info.m_external_location = std::make_shared(full_path); - tensor_meta_info.m_tensor_data = reinterpret_cast(ext_data_offset); - tensor_meta_info.m_tensor_data_size = ext_data_length; - return true; - } else { - throw std::runtime_error("Unsupported memory management mode"); +template +std::shared_ptr make_buffer_from_container_using_cast(const Container& container) { + auto buffer = std::make_shared(container.size() * sizeof(T)); + T* ptr = buffer->template get_ptr(); + size_t idx = 0; + for (const auto& elem : container) { + ptr[idx++] = static_cast(elem); } + return buffer; +} + +template +std::shared_ptr make_buffer_from_container(const Container& container) { + auto buffer = std::make_shared(container.size() * sizeof(T)); + std::copy(container.begin(), container.end(), buffer->template get_ptr()); + return buffer; } } // namespace @@ -173,8 +114,6 @@ ov::frontend::onnx::TensorMetaInfo extract_tensor_meta_info(const TensorProto* t GraphIteratorProto* graph_iterator) { auto graph_def = graph_iterator->get_graph(); ov::frontend::onnx::TensorMetaInfo tensor_meta_info{}; - tensor_meta_info.m_external_location = nullptr; - tensor_meta_info.m_is_raw = false; if ((tensor_info == nullptr && value_info == nullptr) || graph_def == nullptr) { throw std::runtime_error("Wrong usage"); } @@ -194,13 +133,9 @@ ov::frontend::onnx::TensorMetaInfo extract_tensor_meta_info(const TensorProto* t } const auto& value_type = value_info->type().tensor_type(); if (value_type.has_shape()) { - std::vector dims{}; + std::vector dims; for (const auto& dim : value_type.shape().dim()) { - if (dim.has_dim_value()) { - dims.push_back(dim.dim_value()); - } else { - dims.push_back(-1); - } + dims.push_back(dim.has_dim_value() ? dim.dim_value() : -1); } tensor_meta_info.m_partial_shape = ov::PartialShape{dims}; } else { @@ -212,6 +147,7 @@ ov::frontend::onnx::TensorMetaInfo extract_tensor_meta_info(const TensorProto* t tensor_meta_info.m_element_type = ov::element::dynamic; } } + int tensor_size = 0; if (tensor_info != nullptr) { tensor_meta_info.m_tensor_name = tensor_info->has_name() ? &tensor_info->name() : &empty_name; std::vector dims_vec{tensor_info->dims().begin(), tensor_info->dims().end()}; @@ -221,70 +157,155 @@ ov::frontend::onnx::TensorMetaInfo extract_tensor_meta_info(const TensorProto* t if (tensor_info->has_data_location() && tensor_info->data_location() == TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL) { if (extract_tensor_external_data(tensor_meta_info, tensor_info, graph_iterator)) { + auto element_count = tensor_meta_info.m_buffer->size() / tensor_meta_info.m_element_type.size(); + if (ov::element::is_nibble_type(tensor_meta_info.m_element_type)) { + element_count *= 2; // Each byte contains 2 data items, so byte size must be multiplied + } + if (element_count != ov::shape_size(tensor_meta_info.m_partial_shape.get_shape())) { + FRONT_END_THROW( + "The size of the external data file does not match the byte size of an initializer '" + + *tensor_meta_info.m_tensor_name + "' in the model"); + } return tensor_meta_info; } throw std::runtime_error("Unsupported method for externally stored data"); } - switch (tensor_info->data_type()) { - case TensorProto_DataType::TensorProto_DataType_FLOAT: - tensor_meta_info.m_tensor_data = - static_cast(static_cast(tensor_info->float_data().data())); - tensor_meta_info.m_tensor_data_size = tensor_info->float_data_size(); - break; - case TensorProto_DataType::TensorProto_DataType_INT4: - case TensorProto_DataType::TensorProto_DataType_INT8: - case TensorProto_DataType::TensorProto_DataType_INT16: - case TensorProto_DataType::TensorProto_DataType_INT32: - case TensorProto_DataType::TensorProto_DataType_UINT4: - case TensorProto_DataType::TensorProto_DataType_UINT8: - case TensorProto_DataType::TensorProto_DataType_UINT16: - case TensorProto_DataType::TensorProto_DataType_BOOL: - case TensorProto_DataType::TensorProto_DataType_BFLOAT16: - case TensorProto_DataType::TensorProto_DataType_FLOAT16: - case TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN: - case TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2: - tensor_meta_info.m_tensor_data = - static_cast(static_cast(tensor_info->int32_data().data())); - tensor_meta_info.m_tensor_data_size = tensor_info->int32_data_size(); - break; - case TensorProto_DataType::TensorProto_DataType_INT64: - tensor_meta_info.m_tensor_data = - static_cast(static_cast(tensor_info->int64_data().data())); - tensor_meta_info.m_tensor_data_size = tensor_info->int64_data_size(); - break; - case TensorProto_DataType::TensorProto_DataType_UINT32: - case TensorProto_DataType::TensorProto_DataType_UINT64: - tensor_meta_info.m_tensor_data = - static_cast(static_cast(tensor_info->uint64_data().data())); - tensor_meta_info.m_tensor_data_size = tensor_info->uint64_data_size(); - break; - case TensorProto_DataType::TensorProto_DataType_DOUBLE: - tensor_meta_info.m_tensor_data = - static_cast(static_cast(tensor_info->double_data().data())); - tensor_meta_info.m_tensor_data_size = tensor_info->double_data_size(); - break; - case TensorProto_DataType::TensorProto_DataType_STRING: - tensor_meta_info.m_tensor_data_any = - std::vector(tensor_info->string_data().begin(), tensor_info->string_data().end()); - tensor_meta_info.m_tensor_data_size = tensor_info->string_data_size(); - break; - default: - throw std::runtime_error("Unsupported type " + - ::ONNX_NAMESPACE::TensorProto_DataType_Name(tensor_info->data_type())); - break; - } - // Looks like raw_data has bigger priority. but not 100% sure - if (tensor_meta_info.m_tensor_data == nullptr && tensor_info->has_raw_data()) { - tensor_meta_info.m_tensor_data = - static_cast(static_cast(tensor_info->raw_data().data())); - tensor_meta_info.m_tensor_data_size = tensor_info->raw_data().size(); - tensor_meta_info.m_is_raw = true; + + if (tensor_info->has_segment()) { + FRONT_END_THROW("Loading segments isn't supported"); + } else if (tensor_info->has_raw_data()) { + tensor_size = + static_cast(tensor_info->raw_data().size() * 8 / tensor_meta_info.m_element_type.bitwidth()); + tensor_meta_info.m_buffer = std::make_shared(tensor_info->raw_data().size()); + std::copy(tensor_info->raw_data().begin(), + tensor_info->raw_data().end(), + tensor_meta_info.m_buffer->get_ptr()); + } else { + switch (tensor_info->data_type()) { + case TensorProto_DataType::TensorProto_DataType_INT32: + tensor_size = tensor_info->int32_data_size(); + tensor_meta_info.m_buffer = make_buffer_from_container(tensor_info->int32_data()); + break; + case TensorProto_DataType::TensorProto_DataType_INT4: + case TensorProto_DataType::TensorProto_DataType_INT8: + tensor_size = tensor_info->int32_data_size(); + tensor_meta_info.m_buffer = make_buffer_from_container_using_cast(tensor_info->int32_data()); + break; + case TensorProto_DataType::TensorProto_DataType_INT16: + tensor_size = tensor_info->int32_data_size(); + tensor_meta_info.m_buffer = make_buffer_from_container_using_cast(tensor_info->int32_data()); + break; + case TensorProto_DataType::TensorProto_DataType_UINT4: + case TensorProto_DataType::TensorProto_DataType_UINT8: + tensor_size = tensor_info->int32_data_size(); + tensor_meta_info.m_buffer = make_buffer_from_container_using_cast(tensor_info->int32_data()); + break; + case TensorProto_DataType::TensorProto_DataType_UINT16: + tensor_size = tensor_info->int32_data_size(); + tensor_meta_info.m_buffer = make_buffer_from_container_using_cast(tensor_info->int32_data()); + break; + case TensorProto_DataType::TensorProto_DataType_BOOL: + tensor_size = tensor_info->int32_data_size(); + tensor_meta_info.m_buffer = make_buffer_from_container_using_cast(tensor_info->int32_data()); + break; + case TensorProto_DataType::TensorProto_DataType_INT64: + tensor_size = tensor_info->int64_data_size(); + tensor_meta_info.m_buffer = make_buffer_from_container(tensor_info->int64_data()); + break; + case TensorProto_DataType::TensorProto_DataType_UINT32: + tensor_size = tensor_info->uint64_data_size(); + tensor_meta_info.m_buffer = make_buffer_from_container_using_cast(tensor_info->uint64_data()); + break; + case TensorProto_DataType::TensorProto_DataType_UINT64: + tensor_size = tensor_info->uint64_data_size(); + tensor_meta_info.m_buffer = make_buffer_from_container(tensor_info->uint64_data()); + break; + case TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN: { + tensor_size = tensor_info->int32_data_size(); + auto data = std::make_shared>(); + data->reserve(tensor_size); + std::transform(tensor_info->int32_data().begin(), + tensor_info->int32_data().end(), + std::back_inserter(*data), + [](int32_t elem) { + return ov::float8_e4m3::from_bits(static_cast(elem)); + }); + tensor_meta_info.m_buffer = + std::make_shared>>>( + reinterpret_cast(data->data()), + data->size() * sizeof(ov::float8_e4m3), + data); + break; + } + case TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2: { + tensor_size = tensor_info->int32_data_size(); + auto data = std::make_shared>(); + data->reserve(tensor_size); + std::transform(tensor_info->int32_data().begin(), + tensor_info->int32_data().end(), + std::back_inserter(*data), + [](int32_t elem) { + return ov::float8_e5m2::from_bits(static_cast(elem)); + }); + tensor_meta_info.m_buffer = + std::make_shared>>>( + reinterpret_cast(data->data()), + data->size() * sizeof(ov::float8_e5m2), + data); + break; + } + case TensorProto_DataType::TensorProto_DataType_FLOAT16: { + tensor_size = tensor_info->int32_data_size(); + auto data = std::make_shared>(); + data->reserve(tensor_size); + std::transform(tensor_info->int32_data().begin(), + tensor_info->int32_data().end(), + std::back_inserter(*data), + [](int32_t elem) { + return ov::float16::from_bits(static_cast(elem)); + }); + tensor_meta_info.m_buffer = + std::make_shared>>>( + reinterpret_cast(data->data()), + data->size() * sizeof(ov::float16), + data); + break; + } + case TensorProto_DataType::TensorProto_DataType_BFLOAT16: + tensor_size = tensor_info->int32_data_size(); + tensor_meta_info.m_buffer = + make_buffer_from_container_using_cast(tensor_info->int32_data()); + break; + case TensorProto_DataType::TensorProto_DataType_FLOAT: + tensor_size = tensor_info->float_data_size(); + tensor_meta_info.m_buffer = make_buffer_from_container(tensor_info->float_data()); + break; + case TensorProto_DataType::TensorProto_DataType_DOUBLE: + tensor_size = tensor_info->double_data_size(); + tensor_meta_info.m_buffer = make_buffer_from_container(tensor_info->double_data()); + break; + case TensorProto_DataType::TensorProto_DataType_STRING: { + tensor_size = tensor_info->string_data_size(); + auto data = std::make_shared>(tensor_info->string_data().begin(), + tensor_info->string_data().end()); + tensor_meta_info.m_buffer = + std::make_shared>>>( + reinterpret_cast(data->data()), + data->size() * sizeof(std::string), + data); + break; + } + default: + throw std::runtime_error("Unsupported type " + + ::ONNX_NAMESPACE::TensorProto_DataType_Name(tensor_info->data_type())); + break; + } } } if (tensor_meta_info.m_tensor_name == nullptr) { tensor_meta_info.m_tensor_name = &empty_name; } - if (tensor_meta_info.m_partial_shape == ov::Shape{0} && tensor_meta_info.m_tensor_data_size == 1) { + if (tensor_meta_info.m_partial_shape == ov::Shape{0} && tensor_size == 1) { tensor_meta_info.m_partial_shape = ov::Shape{}; } return tensor_meta_info; diff --git a/src/frontends/onnx/frontend/src/core/node.cpp b/src/frontends/onnx/frontend/src/core/node.cpp index f52a5254eb57ea..c8d90bb0090457 100644 --- a/src/frontends/onnx/frontend/src/core/node.cpp +++ b/src/frontends/onnx/frontend/src/core/node.cpp @@ -797,11 +797,7 @@ Tensor Node::get_attribute_value(const std::string& name) const { tensor_meta_info.m_partial_shape, tensor_meta_info.m_element_type, std::vector{*tensor_meta_info.m_tensor_name}, - tensor_meta_info.m_tensor_data, - tensor_meta_info.m_tensor_data_size, - tensor_meta_info.m_tensor_data_any, - tensor_meta_info.m_external_location, - tensor_meta_info.m_is_raw); + tensor_meta_info.m_buffer); return {tensor_place}; } FRONT_END_NOT_IMPLEMENTED(get_attribute_value); @@ -824,11 +820,7 @@ SparseTensor Node::get_attribute_value(const std::string& name) const { values_meta_info.m_partial_shape, values_meta_info.m_element_type, std::vector{*values_meta_info.m_tensor_name}, - values_meta_info.m_tensor_data, - values_meta_info.m_tensor_data_size, - values_meta_info.m_tensor_data_any, - values_meta_info.m_external_location, - values_meta_info.m_is_raw); + values_meta_info.m_buffer); auto indices_decoder = std::dynamic_pointer_cast(sparse_tensor_info.m_indices); @@ -838,11 +830,7 @@ SparseTensor Node::get_attribute_value(const std::string& name) const { indices_meta_info.m_partial_shape, indices_meta_info.m_element_type, std::vector{*indices_meta_info.m_tensor_name}, - indices_meta_info.m_tensor_data, - indices_meta_info.m_tensor_data_size, - indices_meta_info.m_tensor_data_any, - indices_meta_info.m_external_location, - indices_meta_info.m_is_raw); + indices_meta_info.m_buffer); return {values_place, indices_place, sparse_tensor_info.m_partial_shape}; } FRONT_END_NOT_IMPLEMENTED(get_attribute_value); diff --git a/src/frontends/onnx/frontend/src/core/tensor.cpp b/src/frontends/onnx/frontend/src/core/tensor.cpp index 62f7fecdb3428a..86062e356b6863 100644 --- a/src/frontends/onnx/frontend/src/core/tensor.cpp +++ b/src/frontends/onnx/frontend/src/core/tensor.cpp @@ -33,7 +33,9 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), + buffer->get_ptr() + buffer->size() / sizeof(double)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -50,7 +52,8 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), buffer->get_ptr() + buffer->size() / sizeof(float)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -67,23 +70,9 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - if (m_tensor_place->is_raw()) { - return detail::__get_data(m_tensor_place->get_data(), - m_tensor_place->get_data_size()); - } - using std::begin; - using std::end; - - const auto& int32_data = std::vector( - static_cast(m_tensor_place->get_data()), - static_cast(m_tensor_place->get_data()) + m_tensor_place->get_data_size()); - std::vector float16_data; - float16_data.reserve(int32_data.size()); - std::transform(begin(int32_data), end(int32_data), std::back_inserter(float16_data), [](int32_t elem) { - return ov::float16::from_bits(static_cast(elem)); - }); - - return detail::__get_data(float16_data); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), + buffer->get_ptr() + buffer->size() / sizeof(ov::float16)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -110,11 +99,9 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - if (m_tensor_place->is_raw()) { - return detail::__get_data(m_tensor_place->get_data(), - m_tensor_place->get_data_size()); - } - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), + buffer->get_ptr() + buffer->size() / sizeof(ov::bfloat16)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -131,10 +118,9 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - if (m_tensor_place->is_raw()) { - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); - } - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), + buffer->get_ptr() + buffer->size() / sizeof(int8_t)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -152,10 +138,9 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - if (m_tensor_place->is_raw()) { - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); - } - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), + buffer->get_ptr() + buffer->size() / sizeof(int16_t)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -172,7 +157,9 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), + buffer->get_ptr() + buffer->size() / sizeof(int32_t)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -189,7 +176,9 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), + buffer->get_ptr() + buffer->size() / sizeof(int64_t)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -206,10 +195,9 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - if (m_tensor_place->is_raw()) { - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); - } - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), + buffer->get_ptr() + buffer->size() / sizeof(uint8_t)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -227,10 +215,9 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - if (m_tensor_place->is_raw()) { - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); - } - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), + buffer->get_ptr() + buffer->size() / sizeof(uint16_t)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -247,10 +234,9 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - if (m_tensor_place->is_raw()) { - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); - } - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), + buffer->get_ptr() + buffer->size() / sizeof(uint32_t)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -267,7 +253,9 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), + buffer->get_ptr() + buffer->size() / sizeof(uint64_t)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -284,12 +272,7 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - if (m_tensor_place->is_raw()) { - return detail::__get_data(m_tensor_place->get_data(), - m_tensor_place->get_data_size()); - } - return detail::__get_data(m_tensor_place->get_data(), - m_tensor_place->get_data_size()); + FRONT_END_NOT_IMPLEMENTED(get_data); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -316,12 +299,7 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - if (m_tensor_place->is_raw()) { - return detail::__get_data(m_tensor_place->get_data(), - m_tensor_place->get_data_size()); - } - return detail::__get_data(m_tensor_place->get_data(), - m_tensor_place->get_data_size()); + FRONT_END_NOT_IMPLEMENTED(get_data); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -350,10 +328,8 @@ std::vector Tensor::get_data() const { return get_external_data(); } if (m_tensor_place != nullptr) { - if (m_tensor_place->is_raw()) { - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); - } - return detail::__get_data(m_tensor_place->get_data(), m_tensor_place->get_data_size()); + std::shared_ptr buffer = m_tensor_place->get_buffer(); + return std::vector(buffer->get_ptr(), buffer->get_ptr() + buffer->size() / sizeof(char)); } if (m_tensor_proto->has_raw_data()) { return detail::__get_raw_data(m_tensor_proto->raw_data(), m_tensor_proto->data_type()); @@ -370,10 +346,7 @@ std::vector Tensor::get_data() const { FRONT_END_THROW("External strings are not supported"); } if (m_tensor_place != nullptr) { - FRONT_END_GENERAL_CHECK(!m_tensor_place->is_raw(), "Loading strings from raw data isn't supported"); - FRONT_END_GENERAL_CHECK(m_tensor_place->get_data_any().is>(), - "Tensor data type mismatch for strings"); - return m_tensor_place->get_data_any().as>(); + FRONT_END_NOT_IMPLEMENTED(get_data); } if (m_tensor_proto->has_raw_data()) { FRONT_END_THROW("Loading strings from raw data isn't supported"); @@ -390,20 +363,30 @@ std::shared_ptr Tensor::get_ov_constant() const { FRONT_END_THROW("Loading segments isn't supported"); } ov::element::Type ov_type = get_ov_type(); - size_t element_count = get_data_size(); - if (ov::element::is_nibble_type(ov_type)) { - element_count *= 2; // Each byte contains 2 data items - if (shape_size(m_shape) % 2) { - // Odd elements - element_count--; + size_t element_count = 0; + if (m_tensor_place != nullptr) { + element_count = ov::shape_size(m_shape); + } else { + element_count = get_data_size(); + if (ov::element::is_nibble_type(ov_type)) { + element_count *= 2; // Each byte contains 2 data items + if (shape_size(m_shape) % 2) { + // Odd elements + element_count--; + } } } - if (has_external_data()) { - const auto ext_data = m_tensor_place != nullptr - ? detail::TensorExternalData(*m_tensor_place->get_data_location(), - reinterpret_cast(m_tensor_place->get_data()), - m_tensor_place->get_data_size()) - : detail::TensorExternalData(*m_tensor_proto); + if (m_tensor_place != nullptr) { + FRONT_END_GENERAL_CHECK(m_tensor_place->get_buffer() != nullptr, + "TensorPlace buffer is null for initializer '" + get_name() + "'"); + auto buffer = m_tensor_place->get_buffer(); + if (buffer->size() * 8 / ov_type.bitwidth() != element_count) { + constant = common::make_failsafe_constant(ov_type); + } else { + constant = std::make_shared(ov_type, m_shape, buffer); + } + } else if (has_external_data()) { + const auto ext_data = detail::TensorExternalData(*m_tensor_proto); if (ext_data.data_location() == detail::ORT_MEM_ADDR) { constant = std::make_shared(ov_type, m_shape, ext_data.load_external_mem_data()); } else if (m_mmap_cache) { @@ -415,10 +398,10 @@ std::shared_ptr Tensor::get_ov_constant() const { constant = std::make_shared(ov_type, m_shape, ext_data.load_external_data(m_model_dir)); } - // ext_data.size() might be zero, need to recalc by using info about actually red data (for byte-size) + // ext_data.size() might be zero, need to recalculate by using info about actually read data (for byte-size) element_count = constant->get_byte_size() / ov_type.size(); if (ov::element::is_nibble_type(ov_type)) { - element_count *= 2; // Each byte contains 2 data items, so byte size must be multiplicated + element_count *= 2; // Each byte contains 2 data items, so byte size must be multiplied } if (element_count != ov::shape_size(m_shape) || (ext_data.size() != 0 && constant->get_byte_size() != ext_data.size())) { @@ -478,62 +461,11 @@ std::shared_ptr Tensor::get_ov_constant() const { "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT4, INT8, INT16, INT32, INT64, " "UINT4, UINT8, UINT16, UINT32, UINT64, STRING"); } - } else if (element_count == shape_size(m_shape) && m_tensor_place != nullptr) { - switch (m_tensor_place->get_element_type()) { - case ov::element::f32: - case ov::element::f64: - case ov::element::i32: - case ov::element::i64: - case ov::element::u32: - case ov::element::u64: - constant = std::make_shared(ov_type, m_shape, get_data_ptr()); - break; - case ov::element::i4: - constant = std::make_shared(ov_type, m_shape, get_data().data()); - break; - case ov::element::i8: - constant = std::make_shared(ov_type, m_shape, get_data().data()); - break; - case ov::element::i16: - constant = std::make_shared(ov_type, m_shape, get_data().data()); - break; - case ov::element::u4: - constant = std::make_shared(ov_type, m_shape, get_data().data()); - break; - case ov::element::u8: - constant = std::make_shared(ov_type, m_shape, get_data().data()); - break; - case ov::element::u16: - constant = std::make_shared(ov_type, m_shape, get_data().data()); - break; - case ov::element::boolean: - constant = std::make_shared(ov_type, m_shape, get_data().data()); - break; - case ov::element::bf16: - constant = std::make_shared(ov_type, m_shape, get_data().data()); - break; - case ov::element::f16: - constant = std::make_shared(ov_type, m_shape, get_data().data()); - break; - case ov::element::f8e4m3: - constant = std::make_shared(ov_type, m_shape, get_data().data()); - break; - case ov::element::f8e5m2: - constant = std::make_shared(ov_type, m_shape, get_data().data()); - break; - case ov::element::string: - constant = std::make_shared(ov_type, m_shape, get_data().data()); - break; - default: - ONNX_UNSUPPORTED_DATA_TYPE( - m_tensor_proto->data_type(), - "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT4, INT8, INT16, INT32, INT64, " - "UINT4, UINT8, UINT16, UINT32, UINT64, STRING"); - } } else if (element_count == 0 && m_shape.size() == 0) { constant = common::make_failsafe_constant(ov_type); } else { - FRONT_END_THROW("Tensor shape doesn't match data size"); + FRONT_END_THROW("Tensor shape doesn't match data size: " + std::to_string(element_count) + " vs " + + std::to_string(ov::shape_size(m_shape))); } if (m_tensor_proto != nullptr && m_tensor_proto->has_name()) { diff --git a/src/frontends/onnx/frontend/src/core/tensor.hpp b/src/frontends/onnx/frontend/src/core/tensor.hpp index 28a6cbe3773b1c..14d6dae4876916 100644 --- a/src/frontends/onnx/frontend/src/core/tensor.hpp +++ b/src/frontends/onnx/frontend/src/core/tensor.hpp @@ -52,19 +52,6 @@ inline std::vector __get_data(const Container& container) { #endif } -template -inline std::vector __get_data(const void* data, const size_t data_size) { -#if defined(_MSC_VER) -# pragma warning(push) -# pragma warning(disable : 4267) -# pragma warning(disable : 4244) -#endif - return std::vector(static_cast(data), static_cast(data) + data_size); -#if defined(_MSC_VER) -# pragma warning(pop) -#endif -} - template inline std::vector __get_raw_data(const std::string& raw_data, int onnx_data_type) { auto it = reinterpret_cast(raw_data.data()); @@ -83,18 +70,10 @@ class TensorONNXPlace : public ov::frontend::onnx::TensorPlace { const ov::PartialShape& pshape, ov::element::Type type, const std::vector& names, - const void* data, - const size_t data_size, - const ov::Any& data_any, - std::shared_ptr data_location, - const bool is_raw) + const std::shared_ptr& buffer) : ov::frontend::onnx::TensorPlace(input_model, pshape, type, names), m_input_model(input_model), - m_data(data), - m_data_any(data_any), - m_data_size(data_size), - m_data_location(data_location), - m_is_raw(is_raw) {}; + m_buffer(buffer) {}; void translate(ov::Output& output); @@ -119,24 +98,8 @@ class TensorONNXPlace : public ov::frontend::onnx::TensorPlace { m_output_idx = idx; } - const void* get_data() const { - return m_data; - } - - size_t get_data_size() const { - return m_data_size; - } - - const ov::Any get_data_any() const { - return m_data_any; - } - - std::shared_ptr get_data_location() const { - return m_data_location; - } - - bool is_raw() const { - return m_is_raw; + std::shared_ptr get_buffer() const { + return m_buffer; } detail::MappedMemoryHandles get_mmap_cache(); @@ -145,11 +108,7 @@ class TensorONNXPlace : public ov::frontend::onnx::TensorPlace { protected: int64_t m_input_idx = -1, m_output_idx = -1; const ov::frontend::InputModel& m_input_model; - const void* m_data; - ov::Any m_data_any; - size_t m_data_size; - std::shared_ptr m_data_location; - bool m_is_raw; + std::shared_ptr m_buffer; }; class Tensor { @@ -298,7 +257,7 @@ class Tensor { private: bool has_external_data() const { if (m_tensor_place != nullptr) { - return m_tensor_place->get_data_location() != nullptr; + return false; } return m_tensor_proto->has_data_location() && m_tensor_proto->data_location() == TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL; @@ -306,11 +265,10 @@ class Tensor { template std::vector get_external_data() const { - const auto ext_data = m_tensor_place != nullptr - ? detail::TensorExternalData(*m_tensor_place->get_data_location(), - reinterpret_cast(m_tensor_place->get_data()), - m_tensor_place->get_data_size()) - : detail::TensorExternalData(*m_tensor_proto); + if (m_tensor_place != nullptr) { + FRONT_END_NOT_IMPLEMENTED(get_external_data); + } + const auto ext_data = detail::TensorExternalData(*m_tensor_proto); std::shared_ptr buffer = nullptr; if (ext_data.data_location() == detail::ORT_MEM_ADDR) { buffer = ext_data.load_external_mem_data(); @@ -327,7 +285,7 @@ class Tensor { FRONT_END_THROW("Unexpected usage of method for externally stored data"); } if (m_tensor_place != nullptr) { - return m_tensor_place->get_data(); + FRONT_END_NOT_IMPLEMENTED(get_data_ptr); } if (m_tensor_proto->has_raw_data()) { @@ -350,12 +308,7 @@ class Tensor { size_t get_data_size() const { if (m_tensor_place != nullptr) { - if (m_tensor_place->is_raw()) { - return m_tensor_place->get_data_size() / - get_onnx_data_size(ov_to_onnx_data_type(m_tensor_place->get_element_type())); - } else { - return m_tensor_place->get_data_size(); - } + FRONT_END_NOT_IMPLEMENTED(get_data_size); } if (has_external_data()) { const auto ext_data = detail::TensorExternalData(*m_tensor_proto); diff --git a/src/frontends/onnx/frontend/src/input_model.cpp b/src/frontends/onnx/frontend/src/input_model.cpp index 6f96df6e04133f..43a5610fd15b11 100644 --- a/src/frontends/onnx/frontend/src/input_model.cpp +++ b/src/frontends/onnx/frontend/src/input_model.cpp @@ -651,11 +651,7 @@ std::shared_ptr decode_tensor_place( tensor_meta_info.m_partial_shape, tensor_meta_info.m_element_type, std::vector{*tensor_meta_info.m_tensor_name}, - tensor_meta_info.m_tensor_data, - tensor_meta_info.m_tensor_data_size, - tensor_meta_info.m_tensor_data_any, - tensor_meta_info.m_external_location, - tensor_meta_info.m_is_raw); + tensor_meta_info.m_buffer); return tensor_place; } @@ -688,7 +684,7 @@ void InputModel::InputModelONNXImpl::load_model() { tensor_place->set_input_index(tensor_decoder->get_input_idx()); tensor_place->set_output_index(tensor_decoder->get_output_idx()); // Constant with data has been found - if (tensor_place->get_data() != nullptr) + if (tensor_place->get_buffer() != nullptr) continue; auto name = tensor_place->get_names()[0]; if (m_tensor_places.count(name) == 0) { diff --git a/src/frontends/onnx/frontend/src/translate_session.cpp b/src/frontends/onnx/frontend/src/translate_session.cpp index 7d13cba166f69a..ec8076a0e8c231 100644 --- a/src/frontends/onnx/frontend/src/translate_session.cpp +++ b/src/frontends/onnx/frontend/src/translate_session.cpp @@ -81,7 +81,7 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu auto create_const_or_param = [&](const std::string& name, const std::shared_ptr& input_tensor) { std::shared_ptr node; - if (input_tensor->get_data_location() != nullptr || input_tensor->get_data() != nullptr) { + if (input_tensor->get_buffer() != nullptr) { Tensor tensor = Tensor(input_tensor); node = tensor.get_ov_constant(); } else if (input_tensor->get_partial_shape() == PartialShape{0}) { // empty constant diff --git a/src/frontends/onnx/tests/CMakeLists.txt b/src/frontends/onnx/tests/CMakeLists.txt index 54127c56e93bbc..876bf0c179529a 100644 --- a/src/frontends/onnx/tests/CMakeLists.txt +++ b/src/frontends/onnx/tests/CMakeLists.txt @@ -88,6 +88,7 @@ set(SRC skip_tests_config.cpp ../frontend/src/core/graph_iterator_proto.cpp ../frontend/src/core/decoder_proto.cpp + ../frontend/src/utils/tensor_external_data.cpp ) foreach(src IN LISTS SRC MULTI_TEST_SRC) @@ -129,7 +130,10 @@ set_property(TEST ov_onnx_frontend_tests PROPERTY LABELS OV UNIT ONNX_FE) add_dependencies(ov_onnx_frontend_tests openvino_template_extension) -target_include_directories(ov_onnx_frontend_tests PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") +target_include_directories(ov_onnx_frontend_tests PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}" + "${CMAKE_CURRENT_SOURCE_DIR}/../frontend/src" +) target_compile_definitions(ov_onnx_frontend_tests PRIVATE