forked from y-scope/clp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
clp-s: Add support for projecting of a subset of columns during searc…
…h. (y-scope#510) Co-authored-by: wraymo <[email protected]> Co-authored-by: Kirk Rodrigues <[email protected]>
- Loading branch information
1 parent
035449a
commit 7826d73
Showing
11 changed files
with
234 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
#include "Projection.hpp" | ||
|
||
#include <algorithm> | ||
|
||
#include "SearchUtils.hpp" | ||
|
||
namespace clp_s::search { | ||
void Projection::add_column(std::shared_ptr<ColumnDescriptor> column) { | ||
if (column->is_unresolved_descriptor()) { | ||
throw OperationFailed(ErrorCodeBadParam, __FILE__, __LINE__); | ||
} | ||
if (ProjectionMode::ReturnAllColumns == m_projection_mode) { | ||
throw OperationFailed(ErrorCodeUnsupported, __FILE__, __LINE__); | ||
} | ||
if (m_selected_columns.end() | ||
!= std::find_if( | ||
m_selected_columns.begin(), | ||
m_selected_columns.end(), | ||
[column](auto const& rhs) -> bool { return *column == *rhs; } | ||
)) | ||
{ | ||
// no duplicate columns in projection | ||
throw OperationFailed(ErrorCodeBadParam, __FILE__, __LINE__); | ||
} | ||
m_selected_columns.push_back(column); | ||
} | ||
|
||
void Projection::resolve_columns(std::shared_ptr<SchemaTree> tree) { | ||
for (auto& column : m_selected_columns) { | ||
resolve_column(tree, column); | ||
} | ||
} | ||
|
||
void Projection::resolve_column( | ||
std::shared_ptr<SchemaTree> tree, | ||
std::shared_ptr<ColumnDescriptor> column | ||
) { | ||
/** | ||
* Ideally we would reuse the code from SchemaMatch for resolving columns, but unfortunately we | ||
* can not. | ||
* | ||
* The main reason is that here we don't want to allow projection to travel inside unstructured | ||
* objects -- it may be possible to support such a thing in the future, but it poses some extra | ||
* challenges (e.g. deciding what to do when projecting repeated elements in a structure). | ||
* | ||
* It would be possible to create code that can handle our use-case and SchemaMatch's use-case | ||
* in an elegant way, but it's a significant refactor. In particular, if we extend our column | ||
* type system to be one-per-token instead of one-per-column we can make it so that intermediate | ||
* tokens will not match certain kinds of MPT nodes (like the node for structured arrays). | ||
* | ||
* In light of that we implement a simple version of column resolution here that does exactly | ||
* what we need. | ||
*/ | ||
|
||
auto cur_node_id = tree->get_root_node_id(); | ||
auto it = column->descriptor_begin(); | ||
while (it != column->descriptor_end()) { | ||
bool matched_any{false}; | ||
auto cur_it = it++; | ||
bool last_token = it == column->descriptor_end(); | ||
auto const& cur_node = tree->get_node(cur_node_id); | ||
for (int32_t child_node_id : cur_node.get_children_ids()) { | ||
auto const& child_node = tree->get_node(child_node_id); | ||
|
||
// Intermediate nodes must be objects | ||
if (false == last_token && child_node.get_type() != NodeType::Object) { | ||
continue; | ||
} | ||
|
||
if (child_node.get_key_name() != cur_it->get_token()) { | ||
continue; | ||
} | ||
|
||
matched_any = true; | ||
if (last_token && column->matches_type(node_to_literal_type(child_node.get_type()))) { | ||
m_matching_nodes.insert(child_node_id); | ||
} else if (false == last_token) { | ||
cur_node_id = child_node_id; | ||
break; | ||
} | ||
} | ||
|
||
if (false == matched_any) { | ||
break; | ||
} | ||
} | ||
} | ||
} // namespace clp_s::search |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
#ifndef CLP_S_SEARCH_PROJECTION_HPP | ||
#define CLP_S_SEARCH_PROJECTION_HPP | ||
|
||
#include <vector> | ||
|
||
#include <absl/container/flat_hash_set.h> | ||
|
||
#include "../SchemaTree.hpp" | ||
#include "../TraceableException.hpp" | ||
#include "ColumnDescriptor.hpp" | ||
|
||
namespace clp_s::search { | ||
enum ProjectionMode : uint8_t { | ||
ReturnAllColumns, | ||
ReturnSelectedColumns | ||
}; | ||
|
||
/** | ||
* This class describes the set of columns that should be included in the projected results. | ||
* | ||
* After adding columns and before calling matches_node the caller is responsible for calling | ||
* resolve_columns. | ||
*/ | ||
class Projection { | ||
public: | ||
class OperationFailed : public TraceableException { | ||
public: | ||
// Constructors | ||
OperationFailed(ErrorCode error_code, char const* const filename, int line_number) | ||
: TraceableException(error_code, filename, line_number) {} | ||
}; | ||
|
||
explicit Projection(ProjectionMode mode) : m_projection_mode{mode} {} | ||
|
||
/** | ||
* Adds a column to the set of columns that should be included in the projected results | ||
* @param column | ||
* @throws OperationFailed if `column` contains a wildcard | ||
* @throws OperationFailed if this instance of Projection is in mode ReturnAllColumns | ||
* @throws OperationFailed if `column` is identical to a previously added column | ||
*/ | ||
void add_column(std::shared_ptr<ColumnDescriptor> column); | ||
|
||
/** | ||
* Resolves all columns for the purpose of projection. This key resolution implementation is | ||
* more limited than the one in schema matching. In particular, this version of key resolution | ||
* only allows resolving keys that do not contain wildcards and does not allow resolving to | ||
* objects within arrays. | ||
* | ||
* Note: we could try to generalize column resolution code/move it to the schema tree. It is | ||
* probably best to write a simpler version dedicated to projection for now since types are | ||
* leaf-only. The type-per-token idea solves this problem (in the absence of wildcards). | ||
* | ||
* @param tree | ||
*/ | ||
void resolve_columns(std::shared_ptr<SchemaTree> tree); | ||
|
||
/** | ||
* Checks whether a column corresponding to given leaf node should be included in the output | ||
* @param node_id | ||
* @return true if the column should be included in the output, false otherwise | ||
*/ | ||
bool matches_node(int32_t node_id) const { | ||
return ProjectionMode::ReturnAllColumns == m_projection_mode | ||
|| m_matching_nodes.contains(node_id); | ||
} | ||
|
||
private: | ||
/** | ||
* Resolves an individual column as described by the `resolve_columns` method. | ||
* @param tree | ||
* @param column | ||
*/ | ||
void resolve_column(std::shared_ptr<SchemaTree> tree, std::shared_ptr<ColumnDescriptor> column); | ||
|
||
std::vector<std::shared_ptr<ColumnDescriptor>> m_selected_columns; | ||
absl::flat_hash_set<int32_t> m_matching_nodes; | ||
ProjectionMode m_projection_mode{ProjectionMode::ReturnAllColumns}; | ||
}; | ||
} // namespace clp_s::search | ||
|
||
#endif // CLP_S_SEARCH_PROJECTION_HPP |