Skip to content

Commit

Permalink
core-clp: Add EncodedTextAst class to represent parsed and encoded …
Browse files Browse the repository at this point in the history
…unstructured text strings. (y-scope#495)
  • Loading branch information
LinZhihao-723 authored Jul 30, 2024
1 parent 09fb0b7 commit b37bcd5
Show file tree
Hide file tree
Showing 9 changed files with 84 additions and 24 deletions.
1 change: 1 addition & 0 deletions components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ set(SOURCE_FILES_unitTest
src/clp/Grep.cpp
src/clp/Grep.hpp
src/clp/ir/constants.hpp
src/clp/ir/EncodedTextAst.hpp
src/clp/ir/LogEvent.hpp
src/clp/ir/LogEventDeserializer.cpp
src/clp/ir/LogEventDeserializer.hpp
Expand Down
9 changes: 5 additions & 4 deletions components/core/src/clp/EncodedVariableInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary(
size_t& raw_num_bytes
) {
logtype_dict_entry.clear();
logtype_dict_entry.reserve_constant_length(log_event.get_logtype().length());
auto const& log_message = log_event.get_message();
logtype_dict_entry.reserve_constant_length(log_message.get_logtype().length());

raw_num_bytes = 0;

Expand Down Expand Up @@ -284,9 +285,9 @@ void EncodedVariableInterpreter::encode_and_add_to_dictionary(
};

ffi::ir_stream::generic_decode_message<false>(
log_event.get_logtype(),
log_event.get_encoded_vars(),
log_event.get_dict_vars(),
log_message.get_logtype(),
log_message.get_encoded_vars(),
log_message.get_dict_vars(),
constant_handler,
encoded_int_handler,
encoded_float_handler,
Expand Down
1 change: 1 addition & 0 deletions components/core/src/clp/clg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ set(
../GlobalSQLiteMetadataDB.hpp
../Grep.cpp
../Grep.hpp
../ir/EncodedTextAst.hpp
../ir/LogEvent.hpp
../ir/parsing.cpp
../ir/parsing.hpp
Expand Down
1 change: 1 addition & 0 deletions components/core/src/clp/clo/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ set(
../FileWriter.hpp
../Grep.cpp
../Grep.hpp
../ir/EncodedTextAst.hpp
../ir/LogEvent.hpp
../ir/LogEventSerializer.cpp
../ir/LogEventSerializer.hpp
Expand Down
1 change: 1 addition & 0 deletions components/core/src/clp/clp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ set(
../GlobalSQLiteMetadataDB.cpp
../GlobalSQLiteMetadataDB.hpp
../ir/constants.hpp
../ir/EncodedTextAst.hpp
../ir/LogEvent.hpp
../ir/LogEventDeserializer.cpp
../ir/LogEventDeserializer.hpp
Expand Down
61 changes: 61 additions & 0 deletions components/core/src/clp/ir/EncodedTextAst.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#ifndef CLP_IR_ENCODEDTEXTAST_HPP
#define CLP_IR_ENCODEDTEXTAST_HPP

#include <string>
#include <utility>
#include <vector>

#include "types.hpp"

namespace clp::ir {
/**
* A parsed and encoded unstructured text string.
* @tparam encoded_variable_t The type of encoded variables in the string.
*/
template <typename encoded_variable_t>
class EncodedTextAst {
public:
// Constructor
explicit EncodedTextAst(
std::string logtype,
std::vector<std::string> dict_vars,
std::vector<encoded_variable_t> encoded_vars
)
: m_logtype{std::move(logtype)},
m_dict_vars{std::move(dict_vars)},
m_encoded_vars{std::move(encoded_vars)} {}

// Disable copy constructor and assignment operator
EncodedTextAst(EncodedTextAst const&) = delete;
auto operator=(EncodedTextAst const&) -> EncodedTextAst& = delete;

// Default move constructor and assignment operator
EncodedTextAst(EncodedTextAst&&) = default;
auto operator=(EncodedTextAst&&) -> EncodedTextAst& = default;

// Destructor
~EncodedTextAst() = default;

// Methods
[[nodiscard]] auto get_logtype() const -> std::string const& { return m_logtype; }

[[nodiscard]] auto get_dict_vars() const -> std::vector<std::string> const& {
return m_dict_vars;
}

[[nodiscard]] auto get_encoded_vars() const -> std::vector<encoded_variable_t> const& {
return m_encoded_vars;
}

private:
// Variables
std::string m_logtype;
std::vector<std::string> m_dict_vars;
std::vector<encoded_variable_t> m_encoded_vars;
};

using EightByteEncodedTextAst = EncodedTextAst<eight_byte_encoded_variable_t>;
using FourByteEncodedTextAst = EncodedTextAst<four_byte_encoded_variable_t>;
} // namespace clp::ir

#endif // CLP_IR_ENCODEDTEXTAST_HPP
25 changes: 7 additions & 18 deletions components/core/src/clp/ir/LogEvent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
#define CLP_IR_LOGEVENT_HPP

#include <string>
#include <utility>
#include <vector>

#include "../Defs.h"
#include "EncodedTextAst.hpp"
#include "time_types.hpp"
#include "types.hpp"

Expand All @@ -20,38 +21,26 @@ class LogEvent {
LogEvent(
epoch_time_ms_t timestamp,
UtcOffset utc_offset,
std::string logtype,
std::vector<std::string> dict_vars,
std::vector<encoded_variable_t> encoded_vars
EncodedTextAst<encoded_variable_t> message
)
: m_timestamp{timestamp},
m_utc_offset{utc_offset},
m_logtype{std::move(logtype)},
m_dict_vars{std::move(dict_vars)},
m_encoded_vars{std::move(encoded_vars)} {}
m_message{std::move(message)} {}

// Methods
[[nodiscard]] auto get_timestamp() const -> epoch_time_ms_t { return m_timestamp; }

[[nodiscard]] auto get_utc_offset() const -> UtcOffset { return m_utc_offset; }

[[nodiscard]] auto get_logtype() const -> std::string const& { return m_logtype; }

[[nodiscard]] auto get_dict_vars() const -> std::vector<std::string> const& {
return m_dict_vars;
}

[[nodiscard]] auto get_encoded_vars() const -> std::vector<encoded_variable_t> const& {
return m_encoded_vars;
[[nodiscard]] auto get_message() const -> EncodedTextAst<encoded_variable_t> const& {
return m_message;
}

private:
// Variables
epoch_time_ms_t m_timestamp{0};
UtcOffset m_utc_offset{0};
std::string m_logtype;
std::vector<std::string> m_dict_vars;
std::vector<encoded_variable_t> m_encoded_vars;
EncodedTextAst<encoded_variable_t> m_message;
};
} // namespace clp::ir

Expand Down
7 changes: 6 additions & 1 deletion components/core/src/clp/ir/LogEventDeserializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "../ffi/ir_stream/decoding_methods.hpp"
#include "../ffi/ir_stream/protocol_constants.hpp"
#include "EncodedTextAst.hpp"
#include "types.hpp"

namespace clp::ir {
Expand Down Expand Up @@ -124,7 +125,11 @@ auto LogEventDeserializer<encoded_variable_t>::deserialize_log_event(
timestamp = m_prev_msg_timestamp;
}

return LogEvent<encoded_variable_t>{timestamp, m_utc_offset, logtype, dict_vars, encoded_vars};
return LogEvent<encoded_variable_t>{
timestamp,
m_utc_offset,
EncodedTextAst<encoded_variable_t>{logtype, dict_vars, encoded_vars}
};
}

// Explicitly declare template specializations so that we can define the template methods in this
Expand Down
2 changes: 1 addition & 1 deletion components/core/tests/test-ir_encoding_methods.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -912,7 +912,7 @@ TEMPLATE_TEST_CASE(
REQUIRE(log_event.get_utc_offset() == ref_log_event.get_utc_offset());
// We only compare the logtype since decoding messages from logtype + variables is not yet
// supported by our public interfaces
REQUIRE(log_event.get_logtype() == encoded_logtypes.at(log_event_idx));
REQUIRE(log_event.get_message().get_logtype() == encoded_logtypes.at(log_event_idx));
++log_event_idx;
}
auto result = log_event_deserializer.deserialize_log_event();
Expand Down

0 comments on commit b37bcd5

Please sign in to comment.