From 424517334bbc81b2f2f30ae8d406b6cff52eafb0 Mon Sep 17 00:00:00 2001 From: davemarco <83603688+davemarco@users.noreply.github.com> Date: Thu, 10 Oct 2024 15:46:22 -0400 Subject: [PATCH] Add log-level filtering methods and associated refactoring: (#12) - Move log-level parsing from the decode method to the deserialization method. - Remove unused range parameters from the deserialize method. --- src/clp_ffi_js/constants.hpp | 25 +++- src/clp_ffi_js/ir/LogEventWithLevel.hpp | 41 ++++++ src/clp_ffi_js/ir/StreamReader.cpp | 161 ++++++++++++++++-------- src/clp_ffi_js/ir/StreamReader.hpp | 45 +++++-- 4 files changed, 202 insertions(+), 70 deletions(-) create mode 100644 src/clp_ffi_js/ir/LogEventWithLevel.hpp diff --git a/src/clp_ffi_js/constants.hpp b/src/clp_ffi_js/constants.hpp index 9bf336a9..045227da 100644 --- a/src/clp_ffi_js/constants.hpp +++ b/src/clp_ffi_js/constants.hpp @@ -2,14 +2,31 @@ #define CLP_FFI_JS_CONSTANTS_HPP #include -#include +#include #include namespace clp_ffi_js { -constexpr size_t cLogLevelNone{0}; -constexpr size_t cValidLogLevelsBeginIdx{cLogLevelNone + 1}; +/** + * Enum of known log levels. + */ +enum class LogLevel : std::uint8_t { + NONE = 0, // This isn't a valid log level. + TRACE, + DEBUG, + INFO, + WARN, + ERROR, + FATAL, +}; +constexpr LogLevel cValidLogLevelsBeginIdx{LogLevel::TRACE}; + +/** + * Strings corresponding to `LogLevel`. + * + * NOTE: These must be kept in sync manually. + */ constexpr std::array cLogLevelNames{ - "NONE", // This should not be used. + "NONE", // This isn't a valid log level. "TRACE", "DEBUG", "INFO", diff --git a/src/clp_ffi_js/ir/LogEventWithLevel.hpp b/src/clp_ffi_js/ir/LogEventWithLevel.hpp new file mode 100644 index 00000000..22a00af3 --- /dev/null +++ b/src/clp_ffi_js/ir/LogEventWithLevel.hpp @@ -0,0 +1,41 @@ +#ifndef CLP_FFI_JS_IR_LOGEVENTWITHLEVEL_HPP +#define CLP_FFI_JS_IR_LOGEVENTWITHLEVEL_HPP + +#include + +#include +#include +#include +#include + +#include + +namespace clp_ffi_js::ir { +/** + * A class derived from `clp::ir::LogEvent` with an additional member for the log level. + * + * NOTE: Once we move to the next IR format, this class should no longer be necessary since each + * IR log event will contain a set of key-value pairs, one of which should be the log level. + * @tparam encoded_variable_t The type of encoded variables in the event + */ +template +class LogEventWithLevel : public clp::ir::LogEvent { +public: + // Constructors + LogEventWithLevel( + clp::ir::epoch_time_ms_t timestamp, + clp::UtcOffset utc_offset, + clp::ir::EncodedTextAst message, + LogLevel log_level + ) + : clp::ir::LogEvent{timestamp, utc_offset, std::move(message)}, + m_log_level{log_level} {} + + [[nodiscard]] auto get_log_level() const -> LogLevel { return m_log_level; } + +private: + LogLevel m_log_level; +}; +} // namespace clp_ffi_js::ir + +#endif // CLP_FFI_JS_IR_LOGEVENTWITHLEVEL_HPP diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index b5682a69..b9c86b6b 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -5,27 +5,29 @@ #include #include #include -#include +#include #include #include #include +#include #include #include #include #include #include -#include #include #include #include #include +#include #include #include #include #include #include +#include #include using namespace std::literals::string_literals; @@ -99,25 +101,47 @@ auto StreamReader::get_num_events_buffered() const -> size_t { return m_encoded_log_events.size(); } -auto StreamReader::deserialize_range(size_t begin_idx, size_t end_idx) -> size_t { - constexpr size_t cFullRangeEndIdx{0}; - if (0 != begin_idx || cFullRangeEndIdx != end_idx) { - throw ClpFfiJsException{ - clp::ErrorCode::ErrorCode_Unsupported, - __FILENAME__, - __LINE__, - "Partial range deserialization is not yet supported." - }; +auto StreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType { + if (false == m_filtered_log_event_map.has_value()) { + return FilteredLogEventMapTsType{emscripten::val::null()}; } - if (nullptr != m_stream_reader_data_context) { - constexpr size_t cDefaultNumReservedLogEvents{500'000}; - m_encoded_log_events.reserve(cDefaultNumReservedLogEvents); - while (true) { - auto result{m_stream_reader_data_context->get_deserializer().deserialize_log_event()}; - if (false == result.has_error()) { - m_encoded_log_events.emplace_back(std::move(result.value())); - continue; - } + + return FilteredLogEventMapTsType{emscripten::val::array(m_filtered_log_event_map.value())}; +} + +void StreamReader::filter_log_events(emscripten::val const& log_level_filter) { + if (log_level_filter.isNull()) { + m_filtered_log_event_map.reset(); + return; + } + + m_filtered_log_event_map.emplace(); + auto filter_levels{emscripten::vecFromJSArray>(log_level_filter + )}; + for (size_t log_event_idx = 0; log_event_idx < m_encoded_log_events.size(); ++log_event_idx) { + auto const& log_event = m_encoded_log_events[log_event_idx]; + if (std::ranges::find( + filter_levels, + clp::enum_to_underlying_type(log_event.get_log_level()) + ) + != filter_levels.end()) + { + m_filtered_log_event_map->emplace_back(log_event_idx); + } + } +} + +auto StreamReader::deserialize_stream() -> size_t { + if (nullptr == m_stream_reader_data_context) { + return m_encoded_log_events.size(); + } + + constexpr size_t cDefaultNumReservedLogEvents{500'000}; + m_encoded_log_events.reserve(cDefaultNumReservedLogEvents); + + while (true) { + auto result{m_stream_reader_data_context->get_deserializer().deserialize_log_event()}; + if (result.has_error()) { auto const error{result.error()}; if (std::errc::no_message_available == error) { break; @@ -133,51 +157,76 @@ auto StreamReader::deserialize_range(size_t begin_idx, size_t end_idx) -> size_t "Failed to deserialize: "s + error.category().name() + ":" + error.message() }; } - m_stream_reader_data_context.reset(nullptr); - } + auto const& log_event = result.value(); + auto const& message = log_event.get_message(); + auto const& logtype = message.get_logtype(); + constexpr size_t cLogLevelPositionInMessages{1}; + LogLevel log_level{LogLevel::NONE}; + if (logtype.length() > cLogLevelPositionInMessages) { + // NOLINTNEXTLINE(readability-qualified-auto) + auto const log_level_name_it{std::find_if( + cLogLevelNames.begin() + static_cast(cValidLogLevelsBeginIdx), + cLogLevelNames.end(), + [&](std::string_view level) { + return logtype.substr(cLogLevelPositionInMessages).starts_with(level); + } + )}; + if (log_level_name_it != cLogLevelNames.end()) { + log_level = static_cast( + std::distance(cLogLevelNames.begin(), log_level_name_it) + ); + } + } + + auto log_viewer_event{LogEventWithLevel( + log_event.get_timestamp(), + log_event.get_utc_offset(), + message, + log_level + )}; + m_encoded_log_events.emplace_back(std::move(log_viewer_event)); + } + m_stream_reader_data_context.reset(nullptr); return m_encoded_log_events.size(); } -auto StreamReader::decode_range(size_t begin_idx, size_t end_idx) const -> DecodedResultsTsType { - if (m_encoded_log_events.size() < end_idx || begin_idx >= end_idx) { - return DecodedResultsTsType(emscripten::val::null()); +auto StreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const + -> DecodedResultsTsType { + if (use_filter && false == m_filtered_log_event_map.has_value()) { + return DecodedResultsTsType{emscripten::val::null()}; + } + + size_t length{0}; + if (use_filter) { + length = m_filtered_log_event_map->size(); + } else { + length = m_encoded_log_events.size(); + } + if (length < end_idx || begin_idx > end_idx) { + return DecodedResultsTsType{emscripten::val::null()}; } - std::span const log_events_span{ - m_encoded_log_events.begin() - + static_cast(begin_idx), - m_encoded_log_events.begin() - + static_cast(end_idx) - }; std::string message; constexpr size_t cDefaultReservedMessageLength{512}; message.reserve(cDefaultReservedMessageLength); - size_t log_num{begin_idx + 1}; auto const results{emscripten::val::array()}; - for (auto const& log_event : log_events_span) { - message.clear(); + + for (size_t i = begin_idx; i < end_idx; ++i) { + size_t log_event_idx{0}; + if (use_filter) { + log_event_idx = m_filtered_log_event_map->at(i); + } else { + log_event_idx = i; + } + auto const& log_event{m_encoded_log_events[log_event_idx]}; auto const parsed{log_event.get_message().decode_and_unparse()}; if (false == parsed.has_value()) { SPDLOG_ERROR("Failed to decode message."); break; } - message.append(parsed.value()); - - constexpr size_t cLogLevelPositionInMessages{1}; - size_t log_level{cLogLevelNone}; - // NOLINTNEXTLINE(readability-qualified-auto) - auto const log_level_name_it{std::find_if( - cLogLevelNames.begin() + cValidLogLevelsBeginIdx, - cLogLevelNames.end(), - [&](std::string_view level) { - return message.substr(cLogLevelPositionInMessages).starts_with(level); - } - )}; - if (log_level_name_it != cLogLevelNames.end()) { - log_level = std::distance(cLogLevelNames.begin(), log_level_name_it); - } + message = parsed.value(); m_ts_pattern.insert_formatted_timestamp(log_event.get_timestamp(), message); @@ -186,10 +235,9 @@ auto StreamReader::decode_range(size_t begin_idx, size_t end_idx) const -> Decod results.as_handle(), message.c_str(), log_event.get_timestamp(), - log_level, - log_num + log_event.get_log_level(), + log_event_idx + 1 ); - ++log_num; } return DecodedResultsTsType(results); @@ -211,6 +259,8 @@ EMSCRIPTEN_BINDINGS(ClpIrStreamReader) { emscripten::register_type( "Array<[string, number, number, number]>" ); + emscripten::register_type("number[] | null"); + emscripten::class_("ClpIrStreamReader") .constructor( &clp_ffi_js::ir::StreamReader::create, @@ -220,7 +270,12 @@ EMSCRIPTEN_BINDINGS(ClpIrStreamReader) { "getNumEventsBuffered", &clp_ffi_js::ir::StreamReader::get_num_events_buffered ) - .function("deserializeRange", &clp_ffi_js::ir::StreamReader::deserialize_range) + .function( + "getFilteredLogEventMap", + &clp_ffi_js::ir::StreamReader::get_filtered_log_event_map + ) + .function("filterLogEvents", &clp_ffi_js::ir::StreamReader::filter_log_events) + .function("deserializeStream", &clp_ffi_js::ir::StreamReader::deserialize_stream) .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range); } } // namespace diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 15feda71..dec6c360 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -3,19 +3,21 @@ #include #include +#include #include -#include #include #include #include #include +#include #include namespace clp_ffi_js::ir { EMSCRIPTEN_DECLARE_VAL_TYPE(DataArrayTsType); EMSCRIPTEN_DECLARE_VAL_TYPE(DecodedResultsTsType); +EMSCRIPTEN_DECLARE_VAL_TYPE(FilteredLogEventMapTsType); /** * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded @@ -23,6 +25,12 @@ EMSCRIPTEN_DECLARE_VAL_TYPE(DecodedResultsTsType); */ class StreamReader { public: + /** + * Mapping between an index in the filtered log events collection to an index in the unfiltered + * log events collection. + */ + using FilteredLogEventsMap = std::optional>; + /** * Creates a StreamReader to read from the given array. * @@ -50,32 +58,42 @@ class StreamReader { [[nodiscard]] auto get_num_events_buffered() const -> size_t; /** - * Deserializes and buffers log events in the range `[beginIdx, endIdx)`. After the stream has - * been exhausted, it will be deallocated. + * @return The filtered log events map. + */ + [[nodiscard]] auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType; + + /** + * Generates a filtered collection from all log events. * - * NOTE: Currently, this class only supports deserializing the full range of log events in the - * stream. + * @param log_level_filter Array of selected log levels + */ + void filter_log_events(emscripten::val const& log_level_filter); + + /** + * Deserializes all log events in the stream. After the stream has been exhausted, it will be + * deallocated. * - * @param begin_idx - * @param end_idx * @return The number of successfully deserialized ("valid") log events. */ - [[nodiscard]] auto deserialize_range(size_t begin_idx, size_t end_idx) -> size_t; + [[nodiscard]] auto deserialize_stream() -> size_t; /** - * Decodes the deserialized log events in the range `[beginIdx, endIdx)`. + * Decodes log events in the range `[beginIdx, endIdx)` of the filtered or unfiltered + * (depending on the value of `useFilter`) log events collection. * * @param begin_idx * @param end_idx + * @param use_filter Whether to decode from the filtered or unfiltered log events collection. * @return An array where each element is a decoded log event represented by an array of: * - The log event's message * - The log event's timestamp as milliseconds since the Unix epoch * - The log event's log level as an integer that indexes into `cLogLevelNames` * - The log event's number (1-indexed) in the stream - * @return null if any log event in the range doesn't exist (e.g., the range exceeds the number - * of log events in the file). + * @return null if any log event in the range doesn't exist (e.g. the range exceeds the number + * of log events in the collection). */ - [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx) const -> DecodedResultsTsType; + [[nodiscard]] auto + decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType; private: // Constructor @@ -83,9 +101,10 @@ class StreamReader { stream_reader_data_context); // Variables - std::vector> m_encoded_log_events; + std::vector> m_encoded_log_events; std::unique_ptr> m_stream_reader_data_context; + FilteredLogEventsMap m_filtered_log_event_map; clp::TimestampPattern m_ts_pattern; }; } // namespace clp_ffi_js::ir