From ab3e83f57e779e91dd022624c0ec4cf9428613f2 Mon Sep 17 00:00:00 2001 From: LinZhihao-723 Date: Thu, 31 Oct 2024 22:24:14 -0400 Subject: [PATCH 1/6] Fix serializer key validation --- .../core/src/clp/ffi/ir_stream/Serializer.cpp | 225 ++++++++++++------ .../core/src/clp/ffi/ir_stream/Serializer.hpp | 8 + .../core/tests/test-ir_encoding_methods.cpp | 96 ++++++++ 3 files changed, 255 insertions(+), 74 deletions(-) diff --git a/components/core/src/clp/ffi/ir_stream/Serializer.cpp b/components/core/src/clp/ffi/ir_stream/Serializer.cpp index 01215eb9d..bd0eeb64f 100644 --- a/components/core/src/clp/ffi/ir_stream/Serializer.cpp +++ b/components/core/src/clp/ffi/ir_stream/Serializer.cpp @@ -16,6 +16,7 @@ #include "../../ir/types.hpp" #include "../../time_types.hpp" +#include "../../TransactionManager.hpp" #include "../../type_utils.hpp" #include "../encoding_methods.hpp" #include "../SchemaTree.hpp" @@ -145,6 +146,15 @@ template vector& output_buf ) -> bool; +/** + * Validates whehther the msgpack array is valid to be serialized. + * @param array + * @return true if the array if valid, or false on errors: + * - The array element has an unsupported type. + * - The array contains a map with non-string keys. + */ +[[nodiscard]] auto validate_msgpack_array(msgpack::object const& array) -> bool; + auto get_schema_tree_node_type_from_msgpack_val(msgpack::object const& val ) -> optional { optional ret_val; @@ -225,11 +235,59 @@ auto serialize_value_array( string& logtype_buf, vector& output_buf ) -> bool { + if (false == validate_msgpack_array(val)) { + return false; + } std::ostringstream oss; oss << val; logtype_buf.clear(); return serialize_clp_string(oss.str(), logtype_buf, output_buf); } + +auto validate_msgpack_array(msgpack::object const& array) -> bool { + vector validation_stack{&array}; + while (false == validation_stack.empty()) { + auto const* curr{validation_stack.back()}; + validation_stack.pop_back(); + if (msgpack::type::MAP == curr->type) { + // Validate maps + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) + auto const& as_map{curr->via.map}; + std::span const map_view{as_map.ptr, as_map.size}; + for (auto const& [key, value] : map_view) { + if (msgpack::type::STR != key.type) { + return false; + } + if (msgpack::type::MAP == value.type || msgpack::type::ARRAY == value.type) { + validation_stack.push_back(&value); + } + } + continue; + } + + // Validate arrays + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) + auto const& as_array{curr->via.array}; + std::span const array_view{as_array.ptr, as_array.size}; + for (auto const& obj : array_view) { + auto const obj_type{obj.type}; + switch (obj_type) { + case msgpack::type::BIN: + case msgpack::type::EXT: + // Unsupported types + return false; + case msgpack::type::ARRAY: + case msgpack::type::MAP: + validation_stack.push_back(&obj); + break; + default: + break; + } + } + } + + return true; +} } // namespace template @@ -287,81 +345,12 @@ auto Serializer::serialize_msgpack_map(msgpack::object_map c m_key_group_buf.clear(); m_value_group_buf.clear(); - // Traverse the map using DFS iteratively - bool failure{false}; - vector dfs_stack; - dfs_stack.emplace_back( - SchemaTree::cRootId, - span{msgpack_map.ptr, msgpack_map.size} - ); - while (false == dfs_stack.empty()) { - auto& curr{dfs_stack.back()}; - if (false == curr.has_next_child()) { - // Visited all children, so pop node - dfs_stack.pop_back(); - continue; - } - - // Convert the current value's type to its corresponding schema-tree node type - auto const& [key, val]{curr.get_next_child()}; - auto const opt_schema_tree_node_type{get_schema_tree_node_type_from_msgpack_val(val)}; - if (false == opt_schema_tree_node_type.has_value()) { - failure = true; - break; - } - auto const schema_tree_node_type{opt_schema_tree_node_type.value()}; - - SchemaTree::NodeLocator const locator{ - curr.get_schema_tree_node_id(), - key.as(), - schema_tree_node_type - }; - - // Get the schema-tree node that corresponds with the current kv-pair, or add it if it - // doesn't exist. - auto opt_schema_tree_node_id{m_schema_tree.try_get_node_id(locator)}; - if (false == opt_schema_tree_node_id.has_value()) { - opt_schema_tree_node_id.emplace(m_schema_tree.insert_node(locator)); - if (false == serialize_schema_tree_node(locator)) { - failure = true; - break; - } - } - auto const schema_tree_node_id{opt_schema_tree_node_id.value()}; - - if (msgpack::type::MAP == val.type) { - // Serialize map - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) - auto const& inner_map{val.via.map}; - auto const inner_map_size(inner_map.size); - if (0 == inner_map_size) { - // Value is an empty map, so we can serialize it immediately - if (false == serialize_key(schema_tree_node_id)) { - failure = true; - break; - } - serialize_value_empty_object(m_value_group_buf); - } else { - // Add map for DFS iteration - dfs_stack.emplace_back( - schema_tree_node_id, - span{inner_map.ptr, inner_map_size} - ); - } - } else { - // Serialize primitive - if (false - == (serialize_key(schema_tree_node_id) && serialize_val(val, schema_tree_node_type) - )) - { - failure = true; - break; - } - } - } + TransactionManager revert_manager{ + []() noexcept -> void {}, + [&]() noexcept -> void { m_schema_tree.revert(); } + }; - if (failure) { - m_schema_tree.revert(); + if (false == serialize_msgpack_map_using_dfs(msgpack_map)) { return false; } @@ -372,6 +361,8 @@ auto Serializer::serialize_msgpack_map(msgpack::object_map c ); m_ir_buf.insert(m_ir_buf.cend(), m_key_group_buf.cbegin(), m_key_group_buf.cend()); m_ir_buf.insert(m_ir_buf.cend(), m_value_group_buf.cbegin(), m_value_group_buf.cend()); + + revert_manager.mark_success(); return true; } @@ -485,6 +476,85 @@ auto Serializer::serialize_val( return true; } +template +auto Serializer::serialize_msgpack_map_using_dfs( + msgpack::object_map const& msgpack_map +) -> bool { + vector dfs_stack; + dfs_stack.emplace_back( + SchemaTree::cRootId, + span{msgpack_map.ptr, msgpack_map.size} + ); + while (false == dfs_stack.empty()) { + auto& curr{dfs_stack.back()}; + if (false == curr.has_next_child()) { + // Visited all children, so pop node + dfs_stack.pop_back(); + continue; + } + + // Convert the current value's type to its corresponding schema-tree node type + auto const& [key, val]{curr.get_next_child()}; + if (msgpack::type::STR != key.type) { + // All keys must be string type + return false; + } + + auto const opt_schema_tree_node_type{get_schema_tree_node_type_from_msgpack_val(val)}; + if (false == opt_schema_tree_node_type.has_value()) { + return false; + } + auto const schema_tree_node_type{opt_schema_tree_node_type.value()}; + + SchemaTree::NodeLocator const locator{ + curr.get_schema_tree_node_id(), + key.as(), + schema_tree_node_type + }; + + // Get the schema-tree node that corresponds with the current kv-pair, or add it if it + // doesn't exist. + auto opt_schema_tree_node_id{m_schema_tree.try_get_node_id(locator)}; + if (false == opt_schema_tree_node_id.has_value()) { + opt_schema_tree_node_id.emplace(m_schema_tree.insert_node(locator)); + if (false == serialize_schema_tree_node(locator)) { + return false; + } + } + auto const schema_tree_node_id{opt_schema_tree_node_id.value()}; + + if (msgpack::type::MAP == val.type) { + // Serialize map + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) + auto const& inner_map{val.via.map}; + auto const inner_map_size(inner_map.size); + if (0 != inner_map_size) { + // Add map for DFS iteration + dfs_stack.emplace_back( + schema_tree_node_id, + span{inner_map.ptr, inner_map_size} + ); + } else { + // Value is an empty map, so we can serialize it immediately + if (false == serialize_key(schema_tree_node_id)) { + return false; + } + serialize_value_empty_object(m_value_group_buf); + } + continue; + } + + // Serialize primitive + if (false + == (serialize_key(schema_tree_node_id) && serialize_val(val, schema_tree_node_type))) + { + return false; + } + } + + return true; +} + // Explicitly declare template specializations so that we can define the template methods in this // file template auto Serializer::create( @@ -524,4 +594,11 @@ template auto Serializer::serialize_val( msgpack::object const& val, SchemaTree::Node::Type schema_tree_node_type ) -> bool; + +template auto Serializer::serialize_msgpack_map_using_dfs( + msgpack::object_map const& msgpack_map +) -> bool; +template auto Serializer::serialize_msgpack_map_using_dfs( + msgpack::object_map const& msgpack_map +) -> bool; } // namespace clp::ffi::ir_stream diff --git a/components/core/src/clp/ffi/ir_stream/Serializer.hpp b/components/core/src/clp/ffi/ir_stream/Serializer.hpp index 292b360a3..e66058e05 100644 --- a/components/core/src/clp/ffi/ir_stream/Serializer.hpp +++ b/components/core/src/clp/ffi/ir_stream/Serializer.hpp @@ -116,6 +116,14 @@ class Serializer { [[nodiscard]] auto serialize_val(msgpack::object const& val, SchemaTree::Node::Type schema_tree_node_type) -> bool; + /** + * Serializes the given msgpack map using depth-first search (DFS). + * @param msgpack_map + * @return Whether the serialization succeeded. + */ + [[nodiscard]] auto serialize_msgpack_map_using_dfs(msgpack::object_map const& msgpack_map + ) -> bool; + UtcOffset m_curr_utc_offset{0}; Buffer m_ir_buf; SchemaTree m_schema_tree; diff --git a/components/core/tests/test-ir_encoding_methods.cpp b/components/core/tests/test-ir_encoding_methods.cpp index b4f0257c1..c673a8087 100644 --- a/components/core/tests/test-ir_encoding_methods.cpp +++ b/components/core/tests/test-ir_encoding_methods.cpp @@ -1,8 +1,11 @@ #include #include +#include #include +#include #include #include +#include #include #include #include @@ -208,6 +211,19 @@ template */ [[nodiscard]] auto count_num_leaves(nlohmann::json const& root) -> size_t; +/** + * Unpacks and asserts the serialization of the msgpack bytes fails. + * @tparam encoded_variable_t + * @param buffer + * @param serializer + * @return Whether serialization failed with the underlying IR buffer being empty. + */ +template +[[nodiscard]] auto unpack_and_assert_serialization_failure( + std::stringstream& buffer, + Serializer& serializer +) -> bool; + template [[nodiscard]] auto serialize_log_events( vector const& log_events, @@ -357,6 +373,30 @@ auto count_num_leaves(nlohmann::json const& root) -> size_t { return num_leaves; } + +template +auto unpack_and_assert_serialization_failure( + std::stringstream& buffer, + Serializer& serializer +) -> bool { + string msgpack_bytes{buffer.str()}; + buffer.str(""); + buffer.clear(); + auto const msgpack_obj_handle{msgpack::unpack(msgpack_bytes.data(), msgpack_bytes.size())}; + auto const msgpack_obj{msgpack_obj_handle.get()}; + if (msgpack::type::MAP != msgpack_obj.type) { + return false; + } + if (serializer.serialize_msgpack_map(msgpack_obj.via.map)) { + // Serialization should fail + return false; + } + if (false == serializer.get_ir_buf_view().empty()) { + // Serialization buffer should be empty + return false; + } + return true; +} } // namespace /** @@ -1284,3 +1324,59 @@ TEMPLATE_TEST_CASE( output_buf )); } + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +TEMPLATE_TEST_CASE( + "ffi_ir_stream_Serializer_serialize_invalid_msgpack", + "[clp][ffi][ir_stream][Serializer]", + four_byte_encoded_variable_t, + eight_byte_encoded_variable_t +) { + auto result{Serializer::create()}; + REQUIRE((false == result.has_error())); + + std::stringstream msgpack_serialization_buffer; + auto& serializer{result.value()}; + serializer.clear_ir_buf(); + + auto assert_invalid_serialization = [&](auto invalid_input) -> bool { + std::map const invalid_map{{"valid_key", invalid_input}}; + msgpack::pack(msgpack_serialization_buffer, invalid_map); + return unpack_and_assert_serialization_failure(msgpack_serialization_buffer, serializer); + }; + + std::map const map_with_integer_keys{{0, 0}, {1, 1}, {2, 2}}; + REQUIRE(assert_invalid_serialization(map_with_integer_keys)); + + std::map const map_with_invalid_submap{ + {"valid_key", map_with_integer_keys} + }; + REQUIRE(assert_invalid_serialization(map_with_invalid_submap)); + + std::tuple> const array_with_invalid_type{0, {0x00, 0x00, 0x00}}; + REQUIRE(assert_invalid_serialization(array_with_invalid_type)); + + std::tuple const subarray_with_invalid_type{ + 0, + array_with_invalid_type + }; + REQUIRE(assert_invalid_serialization(subarray_with_invalid_type)); + + std::tuple const array_with_invalid_map{ + 0, + map_with_integer_keys + }; + REQUIRE(assert_invalid_serialization(array_with_invalid_map)); + + std::tuple const subarray_with_invalid_map{ + 0, + array_with_invalid_map + }; + REQUIRE(assert_invalid_serialization(subarray_with_invalid_type)); + + std::tuple const array_with_invalid_submap{ + 0, + map_with_invalid_submap + }; + REQUIRE(assert_invalid_serialization(array_with_invalid_submap)); +} From e12a8722f8fdc960d09d99da77e94ed3e7b2f5fc Mon Sep 17 00:00:00 2001 From: LinZhihao-723 Date: Thu, 31 Oct 2024 22:40:00 -0400 Subject: [PATCH 2/6] Renaming... --- .../core/src/clp/ffi/ir_stream/Serializer.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/components/core/src/clp/ffi/ir_stream/Serializer.cpp b/components/core/src/clp/ffi/ir_stream/Serializer.cpp index bd0eeb64f..db580fc9f 100644 --- a/components/core/src/clp/ffi/ir_stream/Serializer.cpp +++ b/components/core/src/clp/ffi/ir_stream/Serializer.cpp @@ -147,13 +147,14 @@ template ) -> bool; /** - * Validates whehther the msgpack array is valid to be serialized. + * Checks whether the given msgpack array can be serialized in the key-value pair IR format. * @param array - * @return true if the array if valid, or false on errors: - * - The array element has an unsupported type. - * - The array contains a map with non-string keys. + * @return true if the array is serializable + * @return false if: + * - Any object inside the array has its type unsupported (`BIN` or `EXT`). + * - Any object inside the array is `MAP` and it has non-string keys. */ -[[nodiscard]] auto validate_msgpack_array(msgpack::object const& array) -> bool; +[[nodiscard]] auto is_msgpack_array_serializable(msgpack::object const& array) -> bool; auto get_schema_tree_node_type_from_msgpack_val(msgpack::object const& val ) -> optional { @@ -235,7 +236,7 @@ auto serialize_value_array( string& logtype_buf, vector& output_buf ) -> bool { - if (false == validate_msgpack_array(val)) { + if (false == is_msgpack_array_serializable(val)) { return false; } std::ostringstream oss; @@ -244,7 +245,7 @@ auto serialize_value_array( return serialize_clp_string(oss.str(), logtype_buf, output_buf); } -auto validate_msgpack_array(msgpack::object const& array) -> bool { +auto is_msgpack_array_serializable(msgpack::object const& array) -> bool { vector validation_stack{&array}; while (false == validation_stack.empty()) { auto const* curr{validation_stack.back()}; @@ -496,7 +497,7 @@ auto Serializer::serialize_msgpack_map_using_dfs( // Convert the current value's type to its corresponding schema-tree node type auto const& [key, val]{curr.get_next_child()}; if (msgpack::type::STR != key.type) { - // All keys must be string type + // A map containing non-string keys is not serializable return false; } From 2d351abf6bc7d10fd007267876ae9b10642374f6 Mon Sep 17 00:00:00 2001 From: LinZhihao-723 Date: Thu, 31 Oct 2024 22:43:19 -0400 Subject: [PATCH 3/6] Move around... --- .../core/src/clp/ffi/ir_stream/Serializer.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/components/core/src/clp/ffi/ir_stream/Serializer.cpp b/components/core/src/clp/ffi/ir_stream/Serializer.cpp index db580fc9f..744f39f91 100644 --- a/components/core/src/clp/ffi/ir_stream/Serializer.cpp +++ b/components/core/src/clp/ffi/ir_stream/Serializer.cpp @@ -341,16 +341,10 @@ auto Serializer::serialize_msgpack_map(msgpack::object_map c return true; } - m_schema_tree.take_snapshot(); m_schema_tree_node_buf.clear(); m_key_group_buf.clear(); m_value_group_buf.clear(); - TransactionManager revert_manager{ - []() noexcept -> void {}, - [&]() noexcept -> void { m_schema_tree.revert(); } - }; - if (false == serialize_msgpack_map_using_dfs(msgpack_map)) { return false; } @@ -362,8 +356,6 @@ auto Serializer::serialize_msgpack_map(msgpack::object_map c ); m_ir_buf.insert(m_ir_buf.cend(), m_key_group_buf.cbegin(), m_key_group_buf.cend()); m_ir_buf.insert(m_ir_buf.cend(), m_value_group_buf.cbegin(), m_value_group_buf.cend()); - - revert_manager.mark_success(); return true; } @@ -481,6 +473,12 @@ template auto Serializer::serialize_msgpack_map_using_dfs( msgpack::object_map const& msgpack_map ) -> bool { + m_schema_tree.take_snapshot(); + TransactionManager revert_manager{ + []() noexcept -> void {}, + [&]() noexcept -> void { m_schema_tree.revert(); } + }; + vector dfs_stack; dfs_stack.emplace_back( SchemaTree::cRootId, @@ -553,6 +551,7 @@ auto Serializer::serialize_msgpack_map_using_dfs( } } + revert_manager.mark_success(); return true; } From 1ade58b87815abda90266bd1f21329b6c91eb70d Mon Sep 17 00:00:00 2001 From: LinZhihao-723 Date: Thu, 31 Oct 2024 22:46:00 -0400 Subject: [PATCH 4/6] Nit fix --- components/core/tests/test-ir_encoding_methods.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/core/tests/test-ir_encoding_methods.cpp b/components/core/tests/test-ir_encoding_methods.cpp index c673a8087..c18bfa680 100644 --- a/components/core/tests/test-ir_encoding_methods.cpp +++ b/components/core/tests/test-ir_encoding_methods.cpp @@ -214,7 +214,7 @@ template /** * Unpacks and asserts the serialization of the msgpack bytes fails. * @tparam encoded_variable_t - * @param buffer + * @param buffer A buffer containing msgpack byte sequence. * @param serializer * @return Whether serialization failed with the underlying IR buffer being empty. */ @@ -380,7 +380,7 @@ auto unpack_and_assert_serialization_failure( Serializer& serializer ) -> bool { string msgpack_bytes{buffer.str()}; - buffer.str(""); + buffer.str({}); buffer.clear(); auto const msgpack_obj_handle{msgpack::unpack(msgpack_bytes.data(), msgpack_bytes.size())}; auto const msgpack_obj{msgpack_obj_handle.get()}; From ec37d356038c1d1232a1a8e650e1f20ad30321bc Mon Sep 17 00:00:00 2001 From: LinZhihao-723 Date: Thu, 31 Oct 2024 22:49:53 -0400 Subject: [PATCH 5/6] Nit refactoring --- components/core/src/clp/ffi/ir_stream/Serializer.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/components/core/src/clp/ffi/ir_stream/Serializer.cpp b/components/core/src/clp/ffi/ir_stream/Serializer.cpp index 744f39f91..71925f567 100644 --- a/components/core/src/clp/ffi/ir_stream/Serializer.cpp +++ b/components/core/src/clp/ffi/ir_stream/Serializer.cpp @@ -254,8 +254,7 @@ auto is_msgpack_array_serializable(msgpack::object const& array) -> bool { // Validate maps // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) auto const& as_map{curr->via.map}; - std::span const map_view{as_map.ptr, as_map.size}; - for (auto const& [key, value] : map_view) { + for (auto const& [key, value] : span{as_map.ptr, as_map.size}) { if (msgpack::type::STR != key.type) { return false; } @@ -269,10 +268,8 @@ auto is_msgpack_array_serializable(msgpack::object const& array) -> bool { // Validate arrays // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) auto const& as_array{curr->via.array}; - std::span const array_view{as_array.ptr, as_array.size}; - for (auto const& obj : array_view) { - auto const obj_type{obj.type}; - switch (obj_type) { + for (auto const& obj : span{as_array.ptr, as_array.size}) { + switch (obj.type) { case msgpack::type::BIN: case msgpack::type::EXT: // Unsupported types From a28079a823d45731bd9e1ea8ca318e14c1a6db41 Mon Sep 17 00:00:00 2001 From: LinZhihao-723 Date: Thu, 31 Oct 2024 23:27:51 -0400 Subject: [PATCH 6/6] Fix rabit's comment --- components/core/tests/test-ir_encoding_methods.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/core/tests/test-ir_encoding_methods.cpp b/components/core/tests/test-ir_encoding_methods.cpp index c18bfa680..1e995924f 100644 --- a/components/core/tests/test-ir_encoding_methods.cpp +++ b/components/core/tests/test-ir_encoding_methods.cpp @@ -1372,7 +1372,7 @@ TEMPLATE_TEST_CASE( 0, array_with_invalid_map }; - REQUIRE(assert_invalid_serialization(subarray_with_invalid_type)); + REQUIRE(assert_invalid_serialization(subarray_with_invalid_map)); std::tuple const array_with_invalid_submap{ 0,