diff --git a/components/core/src/clp/ffi/ir_stream/Serializer.cpp b/components/core/src/clp/ffi/ir_stream/Serializer.cpp index 01215eb9d..71925f567 100644 --- a/components/core/src/clp/ffi/ir_stream/Serializer.cpp +++ b/components/core/src/clp/ffi/ir_stream/Serializer.cpp @@ -16,6 +16,7 @@ #include "../../ir/types.hpp" #include "../../time_types.hpp" +#include "../../TransactionManager.hpp" #include "../../type_utils.hpp" #include "../encoding_methods.hpp" #include "../SchemaTree.hpp" @@ -145,6 +146,16 @@ template vector& output_buf ) -> bool; +/** + * Checks whether the given msgpack array can be serialized in the key-value pair IR format. + * @param array + * @return true if the array is serializable + * @return false if: + * - Any object inside the array has its type unsupported (`BIN` or `EXT`). + * - Any object inside the array is `MAP` and it has non-string keys. + */ +[[nodiscard]] auto is_msgpack_array_serializable(msgpack::object const& array) -> bool; + auto get_schema_tree_node_type_from_msgpack_val(msgpack::object const& val ) -> optional { optional ret_val; @@ -225,11 +236,56 @@ auto serialize_value_array( string& logtype_buf, vector& output_buf ) -> bool { + if (false == is_msgpack_array_serializable(val)) { + return false; + } std::ostringstream oss; oss << val; logtype_buf.clear(); return serialize_clp_string(oss.str(), logtype_buf, output_buf); } + +auto is_msgpack_array_serializable(msgpack::object const& array) -> bool { + vector validation_stack{&array}; + while (false == validation_stack.empty()) { + auto const* curr{validation_stack.back()}; + validation_stack.pop_back(); + if (msgpack::type::MAP == curr->type) { + // Validate maps + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) + auto const& as_map{curr->via.map}; + for (auto const& [key, value] : span{as_map.ptr, as_map.size}) { + if (msgpack::type::STR != key.type) { + return false; + } + if (msgpack::type::MAP == value.type || msgpack::type::ARRAY == value.type) { + validation_stack.push_back(&value); + } + } + continue; + } + + // Validate arrays + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) + auto const& as_array{curr->via.array}; + for (auto const& obj : span{as_array.ptr, as_array.size}) { + switch (obj.type) { + case msgpack::type::BIN: + case msgpack::type::EXT: + // Unsupported types + return false; + case msgpack::type::ARRAY: + case msgpack::type::MAP: + validation_stack.push_back(&obj); + break; + default: + break; + } + } + } + + return true; +} } // namespace template @@ -282,86 +338,11 @@ auto Serializer::serialize_msgpack_map(msgpack::object_map c return true; } - m_schema_tree.take_snapshot(); m_schema_tree_node_buf.clear(); m_key_group_buf.clear(); m_value_group_buf.clear(); - // Traverse the map using DFS iteratively - bool failure{false}; - vector dfs_stack; - dfs_stack.emplace_back( - SchemaTree::cRootId, - span{msgpack_map.ptr, msgpack_map.size} - ); - while (false == dfs_stack.empty()) { - auto& curr{dfs_stack.back()}; - if (false == curr.has_next_child()) { - // Visited all children, so pop node - dfs_stack.pop_back(); - continue; - } - - // Convert the current value's type to its corresponding schema-tree node type - auto const& [key, val]{curr.get_next_child()}; - auto const opt_schema_tree_node_type{get_schema_tree_node_type_from_msgpack_val(val)}; - if (false == opt_schema_tree_node_type.has_value()) { - failure = true; - break; - } - auto const schema_tree_node_type{opt_schema_tree_node_type.value()}; - - SchemaTree::NodeLocator const locator{ - curr.get_schema_tree_node_id(), - key.as(), - schema_tree_node_type - }; - - // Get the schema-tree node that corresponds with the current kv-pair, or add it if it - // doesn't exist. - auto opt_schema_tree_node_id{m_schema_tree.try_get_node_id(locator)}; - if (false == opt_schema_tree_node_id.has_value()) { - opt_schema_tree_node_id.emplace(m_schema_tree.insert_node(locator)); - if (false == serialize_schema_tree_node(locator)) { - failure = true; - break; - } - } - auto const schema_tree_node_id{opt_schema_tree_node_id.value()}; - - if (msgpack::type::MAP == val.type) { - // Serialize map - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) - auto const& inner_map{val.via.map}; - auto const inner_map_size(inner_map.size); - if (0 == inner_map_size) { - // Value is an empty map, so we can serialize it immediately - if (false == serialize_key(schema_tree_node_id)) { - failure = true; - break; - } - serialize_value_empty_object(m_value_group_buf); - } else { - // Add map for DFS iteration - dfs_stack.emplace_back( - schema_tree_node_id, - span{inner_map.ptr, inner_map_size} - ); - } - } else { - // Serialize primitive - if (false - == (serialize_key(schema_tree_node_id) && serialize_val(val, schema_tree_node_type) - )) - { - failure = true; - break; - } - } - } - - if (failure) { - m_schema_tree.revert(); + if (false == serialize_msgpack_map_using_dfs(msgpack_map)) { return false; } @@ -485,6 +466,92 @@ auto Serializer::serialize_val( return true; } +template +auto Serializer::serialize_msgpack_map_using_dfs( + msgpack::object_map const& msgpack_map +) -> bool { + m_schema_tree.take_snapshot(); + TransactionManager revert_manager{ + []() noexcept -> void {}, + [&]() noexcept -> void { m_schema_tree.revert(); } + }; + + vector dfs_stack; + dfs_stack.emplace_back( + SchemaTree::cRootId, + span{msgpack_map.ptr, msgpack_map.size} + ); + while (false == dfs_stack.empty()) { + auto& curr{dfs_stack.back()}; + if (false == curr.has_next_child()) { + // Visited all children, so pop node + dfs_stack.pop_back(); + continue; + } + + // Convert the current value's type to its corresponding schema-tree node type + auto const& [key, val]{curr.get_next_child()}; + if (msgpack::type::STR != key.type) { + // A map containing non-string keys is not serializable + return false; + } + + auto const opt_schema_tree_node_type{get_schema_tree_node_type_from_msgpack_val(val)}; + if (false == opt_schema_tree_node_type.has_value()) { + return false; + } + auto const schema_tree_node_type{opt_schema_tree_node_type.value()}; + + SchemaTree::NodeLocator const locator{ + curr.get_schema_tree_node_id(), + key.as(), + schema_tree_node_type + }; + + // Get the schema-tree node that corresponds with the current kv-pair, or add it if it + // doesn't exist. + auto opt_schema_tree_node_id{m_schema_tree.try_get_node_id(locator)}; + if (false == opt_schema_tree_node_id.has_value()) { + opt_schema_tree_node_id.emplace(m_schema_tree.insert_node(locator)); + if (false == serialize_schema_tree_node(locator)) { + return false; + } + } + auto const schema_tree_node_id{opt_schema_tree_node_id.value()}; + + if (msgpack::type::MAP == val.type) { + // Serialize map + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) + auto const& inner_map{val.via.map}; + auto const inner_map_size(inner_map.size); + if (0 != inner_map_size) { + // Add map for DFS iteration + dfs_stack.emplace_back( + schema_tree_node_id, + span{inner_map.ptr, inner_map_size} + ); + } else { + // Value is an empty map, so we can serialize it immediately + if (false == serialize_key(schema_tree_node_id)) { + return false; + } + serialize_value_empty_object(m_value_group_buf); + } + continue; + } + + // Serialize primitive + if (false + == (serialize_key(schema_tree_node_id) && serialize_val(val, schema_tree_node_type))) + { + return false; + } + } + + revert_manager.mark_success(); + return true; +} + // Explicitly declare template specializations so that we can define the template methods in this // file template auto Serializer::create( @@ -524,4 +591,11 @@ template auto Serializer::serialize_val( msgpack::object const& val, SchemaTree::Node::Type schema_tree_node_type ) -> bool; + +template auto Serializer::serialize_msgpack_map_using_dfs( + msgpack::object_map const& msgpack_map +) -> bool; +template auto Serializer::serialize_msgpack_map_using_dfs( + msgpack::object_map const& msgpack_map +) -> bool; } // namespace clp::ffi::ir_stream diff --git a/components/core/src/clp/ffi/ir_stream/Serializer.hpp b/components/core/src/clp/ffi/ir_stream/Serializer.hpp index 292b360a3..e66058e05 100644 --- a/components/core/src/clp/ffi/ir_stream/Serializer.hpp +++ b/components/core/src/clp/ffi/ir_stream/Serializer.hpp @@ -116,6 +116,14 @@ class Serializer { [[nodiscard]] auto serialize_val(msgpack::object const& val, SchemaTree::Node::Type schema_tree_node_type) -> bool; + /** + * Serializes the given msgpack map using depth-first search (DFS). + * @param msgpack_map + * @return Whether the serialization succeeded. + */ + [[nodiscard]] auto serialize_msgpack_map_using_dfs(msgpack::object_map const& msgpack_map + ) -> bool; + UtcOffset m_curr_utc_offset{0}; Buffer m_ir_buf; SchemaTree m_schema_tree; diff --git a/components/core/tests/test-ir_encoding_methods.cpp b/components/core/tests/test-ir_encoding_methods.cpp index b4f0257c1..1e995924f 100644 --- a/components/core/tests/test-ir_encoding_methods.cpp +++ b/components/core/tests/test-ir_encoding_methods.cpp @@ -1,8 +1,11 @@ #include #include +#include #include +#include #include #include +#include #include #include #include @@ -208,6 +211,19 @@ template */ [[nodiscard]] auto count_num_leaves(nlohmann::json const& root) -> size_t; +/** + * Unpacks and asserts the serialization of the msgpack bytes fails. + * @tparam encoded_variable_t + * @param buffer A buffer containing msgpack byte sequence. + * @param serializer + * @return Whether serialization failed with the underlying IR buffer being empty. + */ +template +[[nodiscard]] auto unpack_and_assert_serialization_failure( + std::stringstream& buffer, + Serializer& serializer +) -> bool; + template [[nodiscard]] auto serialize_log_events( vector const& log_events, @@ -357,6 +373,30 @@ auto count_num_leaves(nlohmann::json const& root) -> size_t { return num_leaves; } + +template +auto unpack_and_assert_serialization_failure( + std::stringstream& buffer, + Serializer& serializer +) -> bool { + string msgpack_bytes{buffer.str()}; + buffer.str({}); + buffer.clear(); + auto const msgpack_obj_handle{msgpack::unpack(msgpack_bytes.data(), msgpack_bytes.size())}; + auto const msgpack_obj{msgpack_obj_handle.get()}; + if (msgpack::type::MAP != msgpack_obj.type) { + return false; + } + if (serializer.serialize_msgpack_map(msgpack_obj.via.map)) { + // Serialization should fail + return false; + } + if (false == serializer.get_ir_buf_view().empty()) { + // Serialization buffer should be empty + return false; + } + return true; +} } // namespace /** @@ -1284,3 +1324,59 @@ TEMPLATE_TEST_CASE( output_buf )); } + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +TEMPLATE_TEST_CASE( + "ffi_ir_stream_Serializer_serialize_invalid_msgpack", + "[clp][ffi][ir_stream][Serializer]", + four_byte_encoded_variable_t, + eight_byte_encoded_variable_t +) { + auto result{Serializer::create()}; + REQUIRE((false == result.has_error())); + + std::stringstream msgpack_serialization_buffer; + auto& serializer{result.value()}; + serializer.clear_ir_buf(); + + auto assert_invalid_serialization = [&](auto invalid_input) -> bool { + std::map const invalid_map{{"valid_key", invalid_input}}; + msgpack::pack(msgpack_serialization_buffer, invalid_map); + return unpack_and_assert_serialization_failure(msgpack_serialization_buffer, serializer); + }; + + std::map const map_with_integer_keys{{0, 0}, {1, 1}, {2, 2}}; + REQUIRE(assert_invalid_serialization(map_with_integer_keys)); + + std::map const map_with_invalid_submap{ + {"valid_key", map_with_integer_keys} + }; + REQUIRE(assert_invalid_serialization(map_with_invalid_submap)); + + std::tuple> const array_with_invalid_type{0, {0x00, 0x00, 0x00}}; + REQUIRE(assert_invalid_serialization(array_with_invalid_type)); + + std::tuple const subarray_with_invalid_type{ + 0, + array_with_invalid_type + }; + REQUIRE(assert_invalid_serialization(subarray_with_invalid_type)); + + std::tuple const array_with_invalid_map{ + 0, + map_with_integer_keys + }; + REQUIRE(assert_invalid_serialization(array_with_invalid_map)); + + std::tuple const subarray_with_invalid_map{ + 0, + array_with_invalid_map + }; + REQUIRE(assert_invalid_serialization(subarray_with_invalid_map)); + + std::tuple const array_with_invalid_submap{ + 0, + map_with_invalid_submap + }; + REQUIRE(assert_invalid_serialization(array_with_invalid_submap)); +}