-
Notifications
You must be signed in to change notification settings - Fork 71
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(ffi): Fix Serializer
to invalidate msgpack maps with non-string keys and msgpack arrays with unsupported types.
#570
Open
LinZhihao-723
wants to merge
6
commits into
y-scope:main
Choose a base branch
from
LinZhihao-723:serializer_fix
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
ab3e83f
Fix serializer key validation
LinZhihao-723 e12a872
Renaming...
LinZhihao-723 2d351ab
Move around...
LinZhihao-723 1ade58b
Nit fix
LinZhihao-723 ec37d35
Nit refactoring
LinZhihao-723 a28079a
Fix rabit's comment
LinZhihao-723 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
|
||
#include "../../ir/types.hpp" | ||
#include "../../time_types.hpp" | ||
#include "../../TransactionManager.hpp" | ||
#include "../../type_utils.hpp" | ||
#include "../encoding_methods.hpp" | ||
#include "../SchemaTree.hpp" | ||
|
@@ -145,6 +146,16 @@ template <typename encoded_variable_t> | |
vector<int8_t>& output_buf | ||
) -> bool; | ||
|
||
/** | ||
* Checks whether the given msgpack array can be serialized in the key-value pair IR format. | ||
* @param array | ||
* @return true if the array is serializable | ||
* @return false if: | ||
* - Any object inside the array has its type unsupported (`BIN` or `EXT`). | ||
* - Any object inside the array is `MAP` and it has non-string keys. | ||
*/ | ||
[[nodiscard]] auto is_msgpack_array_serializable(msgpack::object const& array) -> bool; | ||
|
||
auto get_schema_tree_node_type_from_msgpack_val(msgpack::object const& val | ||
) -> optional<SchemaTree::Node::Type> { | ||
optional<SchemaTree::Node::Type> ret_val; | ||
|
@@ -225,11 +236,56 @@ auto serialize_value_array( | |
string& logtype_buf, | ||
vector<int8_t>& output_buf | ||
) -> bool { | ||
if (false == is_msgpack_array_serializable(val)) { | ||
return false; | ||
} | ||
std::ostringstream oss; | ||
oss << val; | ||
logtype_buf.clear(); | ||
return serialize_clp_string<encoded_variable_t>(oss.str(), logtype_buf, output_buf); | ||
} | ||
|
||
auto is_msgpack_array_serializable(msgpack::object const& array) -> bool { | ||
vector<msgpack::object const*> validation_stack{&array}; | ||
while (false == validation_stack.empty()) { | ||
auto const* curr{validation_stack.back()}; | ||
validation_stack.pop_back(); | ||
if (msgpack::type::MAP == curr->type) { | ||
// Validate maps | ||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) | ||
auto const& as_map{curr->via.map}; | ||
for (auto const& [key, value] : span{as_map.ptr, as_map.size}) { | ||
if (msgpack::type::STR != key.type) { | ||
return false; | ||
} | ||
if (msgpack::type::MAP == value.type || msgpack::type::ARRAY == value.type) { | ||
validation_stack.push_back(&value); | ||
} | ||
} | ||
continue; | ||
} | ||
|
||
// Validate arrays | ||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) | ||
auto const& as_array{curr->via.array}; | ||
for (auto const& obj : span{as_array.ptr, as_array.size}) { | ||
switch (obj.type) { | ||
case msgpack::type::BIN: | ||
case msgpack::type::EXT: | ||
// Unsupported types | ||
return false; | ||
case msgpack::type::ARRAY: | ||
case msgpack::type::MAP: | ||
validation_stack.push_back(&obj); | ||
break; | ||
default: | ||
break; | ||
} | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
LinZhihao-723 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} // namespace | ||
|
||
template <typename encoded_variable_t> | ||
|
@@ -282,86 +338,11 @@ auto Serializer<encoded_variable_t>::serialize_msgpack_map(msgpack::object_map c | |
return true; | ||
} | ||
|
||
m_schema_tree.take_snapshot(); | ||
m_schema_tree_node_buf.clear(); | ||
m_key_group_buf.clear(); | ||
m_value_group_buf.clear(); | ||
|
||
// Traverse the map using DFS iteratively | ||
bool failure{false}; | ||
vector<MsgpackMapIterator> dfs_stack; | ||
dfs_stack.emplace_back( | ||
SchemaTree::cRootId, | ||
span<MsgpackMapIterator::Child>{msgpack_map.ptr, msgpack_map.size} | ||
); | ||
while (false == dfs_stack.empty()) { | ||
auto& curr{dfs_stack.back()}; | ||
if (false == curr.has_next_child()) { | ||
// Visited all children, so pop node | ||
dfs_stack.pop_back(); | ||
continue; | ||
} | ||
|
||
// Convert the current value's type to its corresponding schema-tree node type | ||
auto const& [key, val]{curr.get_next_child()}; | ||
auto const opt_schema_tree_node_type{get_schema_tree_node_type_from_msgpack_val(val)}; | ||
if (false == opt_schema_tree_node_type.has_value()) { | ||
failure = true; | ||
break; | ||
} | ||
auto const schema_tree_node_type{opt_schema_tree_node_type.value()}; | ||
|
||
SchemaTree::NodeLocator const locator{ | ||
curr.get_schema_tree_node_id(), | ||
key.as<string_view>(), | ||
schema_tree_node_type | ||
}; | ||
|
||
// Get the schema-tree node that corresponds with the current kv-pair, or add it if it | ||
// doesn't exist. | ||
auto opt_schema_tree_node_id{m_schema_tree.try_get_node_id(locator)}; | ||
if (false == opt_schema_tree_node_id.has_value()) { | ||
opt_schema_tree_node_id.emplace(m_schema_tree.insert_node(locator)); | ||
if (false == serialize_schema_tree_node(locator)) { | ||
failure = true; | ||
break; | ||
} | ||
} | ||
auto const schema_tree_node_id{opt_schema_tree_node_id.value()}; | ||
|
||
if (msgpack::type::MAP == val.type) { | ||
// Serialize map | ||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) | ||
auto const& inner_map{val.via.map}; | ||
auto const inner_map_size(inner_map.size); | ||
if (0 == inner_map_size) { | ||
// Value is an empty map, so we can serialize it immediately | ||
if (false == serialize_key(schema_tree_node_id)) { | ||
failure = true; | ||
break; | ||
} | ||
serialize_value_empty_object(m_value_group_buf); | ||
} else { | ||
// Add map for DFS iteration | ||
dfs_stack.emplace_back( | ||
schema_tree_node_id, | ||
span<MsgpackMapIterator::Child>{inner_map.ptr, inner_map_size} | ||
); | ||
} | ||
} else { | ||
// Serialize primitive | ||
if (false | ||
== (serialize_key(schema_tree_node_id) && serialize_val(val, schema_tree_node_type) | ||
)) | ||
{ | ||
failure = true; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
if (failure) { | ||
m_schema_tree.revert(); | ||
if (false == serialize_msgpack_map_using_dfs(msgpack_map)) { | ||
return false; | ||
} | ||
|
||
|
@@ -485,6 +466,92 @@ auto Serializer<encoded_variable_t>::serialize_val( | |
return true; | ||
} | ||
|
||
template <typename encoded_variable_t> | ||
auto Serializer<encoded_variable_t>::serialize_msgpack_map_using_dfs( | ||
msgpack::object_map const& msgpack_map | ||
) -> bool { | ||
m_schema_tree.take_snapshot(); | ||
TransactionManager revert_manager{ | ||
[]() noexcept -> void {}, | ||
[&]() noexcept -> void { m_schema_tree.revert(); } | ||
}; | ||
|
||
vector<MsgpackMapIterator> dfs_stack; | ||
dfs_stack.emplace_back( | ||
SchemaTree::cRootId, | ||
span<MsgpackMapIterator::Child>{msgpack_map.ptr, msgpack_map.size} | ||
); | ||
while (false == dfs_stack.empty()) { | ||
auto& curr{dfs_stack.back()}; | ||
if (false == curr.has_next_child()) { | ||
// Visited all children, so pop node | ||
dfs_stack.pop_back(); | ||
continue; | ||
} | ||
|
||
// Convert the current value's type to its corresponding schema-tree node type | ||
auto const& [key, val]{curr.get_next_child()}; | ||
if (msgpack::type::STR != key.type) { | ||
// A map containing non-string keys is not serializable | ||
return false; | ||
} | ||
Comment on lines
+494
to
+497
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function is split from the previous
|
||
|
||
auto const opt_schema_tree_node_type{get_schema_tree_node_type_from_msgpack_val(val)}; | ||
if (false == opt_schema_tree_node_type.has_value()) { | ||
return false; | ||
} | ||
auto const schema_tree_node_type{opt_schema_tree_node_type.value()}; | ||
|
||
SchemaTree::NodeLocator const locator{ | ||
curr.get_schema_tree_node_id(), | ||
key.as<string_view>(), | ||
schema_tree_node_type | ||
}; | ||
|
||
// Get the schema-tree node that corresponds with the current kv-pair, or add it if it | ||
// doesn't exist. | ||
auto opt_schema_tree_node_id{m_schema_tree.try_get_node_id(locator)}; | ||
if (false == opt_schema_tree_node_id.has_value()) { | ||
opt_schema_tree_node_id.emplace(m_schema_tree.insert_node(locator)); | ||
if (false == serialize_schema_tree_node(locator)) { | ||
return false; | ||
} | ||
} | ||
auto const schema_tree_node_id{opt_schema_tree_node_id.value()}; | ||
|
||
if (msgpack::type::MAP == val.type) { | ||
// Serialize map | ||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) | ||
auto const& inner_map{val.via.map}; | ||
auto const inner_map_size(inner_map.size); | ||
if (0 != inner_map_size) { | ||
// Add map for DFS iteration | ||
dfs_stack.emplace_back( | ||
schema_tree_node_id, | ||
span<MsgpackMapIterator::Child>{inner_map.ptr, inner_map_size} | ||
); | ||
} else { | ||
// Value is an empty map, so we can serialize it immediately | ||
if (false == serialize_key(schema_tree_node_id)) { | ||
return false; | ||
} | ||
serialize_value_empty_object(m_value_group_buf); | ||
} | ||
continue; | ||
} | ||
|
||
// Serialize primitive | ||
if (false | ||
== (serialize_key(schema_tree_node_id) && serialize_val(val, schema_tree_node_type))) | ||
{ | ||
return false; | ||
} | ||
} | ||
|
||
revert_manager.mark_success(); | ||
return true; | ||
} | ||
|
||
// Explicitly declare template specializations so that we can define the template methods in this | ||
// file | ||
template auto Serializer<eight_byte_encoded_variable_t>::create( | ||
|
@@ -524,4 +591,11 @@ template auto Serializer<four_byte_encoded_variable_t>::serialize_val( | |
msgpack::object const& val, | ||
SchemaTree::Node::Type schema_tree_node_type | ||
) -> bool; | ||
|
||
template auto Serializer<eight_byte_encoded_variable_t>::serialize_msgpack_map_using_dfs( | ||
msgpack::object_map const& msgpack_map | ||
) -> bool; | ||
template auto Serializer<four_byte_encoded_variable_t>::serialize_msgpack_map_using_dfs( | ||
msgpack::object_map const& msgpack_map | ||
) -> bool; | ||
} // namespace clp::ffi::ir_stream |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not necessarily but can we treat it as an
Array<number>
?