From acd45637e135dd7e42c18d95bc55cbad0e42c54c Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Tue, 1 Oct 2024 22:16:24 +0800 Subject: [PATCH 01/18] Add initial code for IRv2 decoder. --- CMakeLists.txt | 12 ++- src/clp_ffi_js/ir/StreamReader.cpp | 74 ++++--------------- src/clp_ffi_js/ir/StreamReader.hpp | 12 +-- src/clp_ffi_js/ir/StreamReaderDataContext.hpp | 18 +++-- src/submodules/clp | 2 +- 5 files changed, 38 insertions(+), 80 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a4b55372..d8bd1749 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,7 +88,10 @@ if(CMAKE_BUILD_TYPE MATCHES "Release") -sENVIRONMENT=worker ) else() - set(CLP_FFI_JS_EXTRA_LINKER_FLAGS -sENVIRONMENT=node) + set(CLP_FFI_JS_EXTRA_LINKER_FLAGS + -sENVIRONMENT=node + -sNO_DISABLE_EXCEPTION_CATCHING + ) endif() message( "CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}: Extra linker flags: ${CLP_FFI_JS_EXTRA_LINKER_FLAGS}" @@ -128,12 +131,15 @@ set(CLP_FFI_JS_SRC_MAIN src/clp_ffi_js/ir/StreamReader.cpp) set(CLP_FFI_JS_SRC_CLP_CORE src/submodules/clp/components/core/src/clp/ffi/ir_stream/decoding_methods.cpp + src/submodules/clp/components/core/src/clp/ffi/ir_stream/Deserializer.cpp + src/submodules/clp/components/core/src/clp/ffi/ir_stream/ir_unit_deserialization_methods.cpp + src/submodules/clp/components/core/src/clp/ffi/ir_stream/utils.cpp + src/submodules/clp/components/core/src/clp/ffi/KeyValuePairLogEvent.cpp + src/submodules/clp/components/core/src/clp/ffi/SchemaTree.cpp src/submodules/clp/components/core/src/clp/ir/EncodedTextAst.cpp - src/submodules/clp/components/core/src/clp/ir/LogEventDeserializer.cpp src/submodules/clp/components/core/src/clp/ReadOnlyMemoryMappedFile.cpp src/submodules/clp/components/core/src/clp/ReaderInterface.cpp src/submodules/clp/components/core/src/clp/streaming_compression/zstd/Decompressor.cpp - src/submodules/clp/components/core/src/clp/TimestampPattern.cpp ) set(CLP_FFI_JS_SRC_FMT src/submodules/fmt/src/format.cc) diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index b5682a69..90a5fc8e 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -15,8 +15,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -46,31 +46,8 @@ auto StreamReader::create(DataArrayTsType const& data_array) -> StreamReader { auto zstd_decompressor{std::make_unique()}; zstd_decompressor->open(data_buffer.data(), length); - bool is_four_bytes_encoding{true}; - if (auto const err{ - clp::ffi::ir_stream::get_encoding_type(*zstd_decompressor, is_four_bytes_encoding) - }; - clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != err) - { - SPDLOG_CRITICAL("Failed to decode encoding type, err={}", err); - throw ClpFfiJsException{ - clp::ErrorCode::ErrorCode_MetadataCorrupted, - __FILENAME__, - __LINE__, - "Failed to decode encoding type." - }; - } - if (false == is_four_bytes_encoding) { - throw ClpFfiJsException{ - clp::ErrorCode::ErrorCode_Unsupported, - __FILENAME__, - __LINE__, - "IR stream uses unsupported encoding." - }; - } - auto result{ - clp::ir::LogEventDeserializer::create(*zstd_decompressor) + clp::ffi::ir_stream::Deserializer::create(*zstd_decompressor) }; if (result.has_error()) { auto const error_code{result.error()}; @@ -87,7 +64,7 @@ auto StreamReader::create(DataArrayTsType const& data_array) -> StreamReader { }; } - StreamReaderDataContext stream_reader_data_context{ + StreamReaderDataContext stream_reader_data_context{ std::move(data_buffer), std::move(zstd_decompressor), std::move(result.value()) @@ -112,8 +89,9 @@ auto StreamReader::deserialize_range(size_t begin_idx, size_t end_idx) -> size_t if (nullptr != m_stream_reader_data_context) { constexpr size_t cDefaultNumReservedLogEvents{500'000}; m_encoded_log_events.reserve(cDefaultNumReservedLogEvents); + auto &reader{m_stream_reader_data_context->get_reader()}; while (true) { - auto result{m_stream_reader_data_context->get_deserializer().deserialize_log_event()}; + auto result{m_stream_reader_data_context->get_deserializer().deserialize_to_next_log_event(reader)}; if (false == result.has_error()) { m_encoded_log_events.emplace_back(std::move(result.value())); continue; @@ -150,43 +128,19 @@ auto StreamReader::decode_range(size_t begin_idx, size_t end_idx) const -> Decod m_encoded_log_events.begin() + static_cast(end_idx) }; - std::string message; - constexpr size_t cDefaultReservedMessageLength{512}; - message.reserve(cDefaultReservedMessageLength); size_t log_num{begin_idx + 1}; auto const results{emscripten::val::array()}; for (auto const& log_event : log_events_span) { - message.clear(); - - auto const parsed{log_event.get_message().decode_and_unparse()}; - if (false == parsed.has_value()) { + auto const json{log_event.serialize_to_json()}; + if (false == json.has_value()) { SPDLOG_ERROR("Failed to decode message."); break; } - message.append(parsed.value()); - - constexpr size_t cLogLevelPositionInMessages{1}; - size_t log_level{cLogLevelNone}; - // NOLINTNEXTLINE(readability-qualified-auto) - auto const log_level_name_it{std::find_if( - cLogLevelNames.begin() + cValidLogLevelsBeginIdx, - cLogLevelNames.end(), - [&](std::string_view level) { - return message.substr(cLogLevelPositionInMessages).starts_with(level); - } - )}; - if (log_level_name_it != cLogLevelNames.end()) { - log_level = std::distance(cLogLevelNames.begin(), log_level_name_it); - } - - m_ts_pattern.insert_formatted_timestamp(log_event.get_timestamp(), message); EM_ASM( - { Emval.toValue($0).push([UTF8ToString($1), $2, $3, $4]); }, + { Emval.toValue($0).push([UTF8ToString($1), $2]); }, results.as_handle(), - message.c_str(), - log_event.get_timestamp(), - log_level, + json.value().dump().c_str(), log_num ); ++log_num; @@ -196,20 +150,18 @@ auto StreamReader::decode_range(size_t begin_idx, size_t end_idx) const -> Decod } StreamReader::StreamReader( - StreamReaderDataContext&& stream_reader_data_context + StreamReaderDataContext&& stream_reader_data_context ) - : m_stream_reader_data_context{std::make_unique< - StreamReaderDataContext>( + : m_stream_reader_data_context{std::make_unique( std::move(stream_reader_data_context) - )}, - m_ts_pattern{m_stream_reader_data_context->get_deserializer().get_timestamp_pattern()} {} + )} {} } // namespace clp_ffi_js::ir namespace { EMSCRIPTEN_BINDINGS(ClpIrStreamReader) { emscripten::register_type("Uint8Array"); emscripten::register_type( - "Array<[string, number, number, number]>" + "Array<[string, number]>" ); emscripten::class_("ClpIrStreamReader") .constructor( diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 15feda71..6e0d84c5 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -6,8 +6,7 @@ #include #include -#include -#include +#include #include #include @@ -79,14 +78,11 @@ class StreamReader { private: // Constructor - explicit StreamReader(StreamReaderDataContext&& - stream_reader_data_context); + explicit StreamReader(StreamReaderDataContext&& stream_reader_data_context); // Variables - std::vector> m_encoded_log_events; - std::unique_ptr> - m_stream_reader_data_context; - clp::TimestampPattern m_ts_pattern; + std::vector m_encoded_log_events; + std::unique_ptr m_stream_reader_data_context; }; } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/StreamReaderDataContext.hpp b/src/clp_ffi_js/ir/StreamReaderDataContext.hpp index 091b0b05..3feef3b7 100644 --- a/src/clp_ffi_js/ir/StreamReaderDataContext.hpp +++ b/src/clp_ffi_js/ir/StreamReaderDataContext.hpp @@ -5,25 +5,22 @@ #include #include -#include -#include #include +#include namespace clp_ffi_js::ir { /** * The data context for a `StreamReader`. It encapsulates a chain of the following resources: * A `clp::ir::LogEventDeserializer` that reads from a * `clp::streaming_compression::zstd::Decompressor`, which in turn reads from a `clp::Array`. - * @tparam encoded_variable_t Type of encoded variables encoded in the stream. */ -template class StreamReaderDataContext { public: // Constructors StreamReaderDataContext( clp::Array&& data_buffer, std::unique_ptr&& zstd_decompressor, - clp::ir::LogEventDeserializer deserializer + clp::ffi::ir_stream::Deserializer deserializer ) : m_data_buffer{std::move(data_buffer)}, m_zstd_decompressor{std::move(zstd_decompressor)}, @@ -41,17 +38,24 @@ class StreamReaderDataContext { ~StreamReaderDataContext() = default; // Methods + /** + * @return A reference to the reader. + */ + [[nodiscard]] auto get_reader() const -> clp::streaming_compression::zstd::Decompressor& { + return *m_zstd_decompressor; + } + /** * @return A reference to the deserializer. */ - [[nodiscard]] auto get_deserializer() -> clp::ir::LogEventDeserializer& { + [[nodiscard]] auto get_deserializer() -> clp::ffi::ir_stream::Deserializer& { return m_deserializer; } private: clp::Array m_data_buffer; std::unique_ptr m_zstd_decompressor; - clp::ir::LogEventDeserializer m_deserializer; + clp::ffi::ir_stream::Deserializer m_deserializer; }; } // namespace clp_ffi_js::ir diff --git a/src/submodules/clp b/src/submodules/clp index 86299ca2..086c2b79 160000 --- a/src/submodules/clp +++ b/src/submodules/clp @@ -1 +1 @@ -Subproject commit 86299ca2907565e09cb10c2ddd3661ad1ceb6cb0 +Subproject commit 086c2b79edd91440b78c2c048ecdf957865e2d34 From d8839664f9fce6f997425f84ab3ddbf883dd3c50 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Tue, 8 Oct 2024 19:33:37 +0800 Subject: [PATCH 02/18] Revert changes to enable exception catching linker flags - Update CMakeLists.txt --- CMakeLists.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d8bd1749..ef70c324 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,10 +88,7 @@ if(CMAKE_BUILD_TYPE MATCHES "Release") -sENVIRONMENT=worker ) else() - set(CLP_FFI_JS_EXTRA_LINKER_FLAGS - -sENVIRONMENT=node - -sNO_DISABLE_EXCEPTION_CATCHING - ) + set(CLP_FFI_JS_EXTRA_LINKER_FLAGS -sENVIRONMENT=node) endif() message( "CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}: Extra linker flags: ${CLP_FFI_JS_EXTRA_LINKER_FLAGS}" From 322b60a6c7ff3d5ddb45191cf819b04cbaa26f28 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Tue, 8 Oct 2024 20:10:25 +0800 Subject: [PATCH 03/18] Switch clp to OSS' main. --- src/submodules/clp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/submodules/clp b/src/submodules/clp index 086c2b79..54962a07 160000 --- a/src/submodules/clp +++ b/src/submodules/clp @@ -1 +1 @@ -Subproject commit 086c2b79edd91440b78c2c048ecdf957865e2d34 +Subproject commit 54962a0708daa443c9bdfb3462ed464b65b62886 From ab37228d1b16b72ba8e31806c78b0bbb2251a89b Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Tue, 8 Oct 2024 20:18:31 +0800 Subject: [PATCH 04/18] Add code to parse and validate the CLP IR version. --- src/clp_ffi_js/ir/StreamReader.cpp | 53 ++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index 90a5fc8e..10abe351 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -46,6 +46,59 @@ auto StreamReader::create(DataArrayTsType const& data_array) -> StreamReader { auto zstd_decompressor{std::make_unique()}; zstd_decompressor->open(data_buffer.data(), length); + bool is_four_byte_encoding{}; + auto const get_encoding_type_result{ + clp::ffi::ir_stream::get_encoding_type(*zstd_decompressor, is_four_byte_encoding)}; + if (clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != get_encoding_type_result) { + SPDLOG_CRITICAL( + "Failed to get encoding type: {}", + get_encoding_type_result + ); + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Failure, + __FILENAME__, + __LINE__, + "Failed to get encoding type." + }; + } + clp::ffi::ir_stream::encoded_tag_t metadata_type{}; + std::vector metadata_bytes; + auto const deserialize_preamble_result{ + clp::ffi::ir_stream::deserialize_preamble(*zstd_decompressor, + metadata_type, + metadata_bytes)}; + if (clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != deserialize_preamble_result) { + SPDLOG_CRITICAL( + "Failed to deserialize preamble for version reading: {}", + deserialize_preamble_result + ); + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Failure, + __FILENAME__, + __LINE__, + "Failed to deserialize preamble for version reading." + }; + } + std::string_view const metadata_view{ + clp::size_checked_pointer_cast(metadata_bytes.data()), + metadata_bytes.size() + }; + nlohmann::json const metadata = nlohmann::json::parse(metadata_view); + auto const &version{metadata.at(clp::ffi::ir_stream::cProtocol::Metadata::VersionKey)}; + if (version == "v0.0.0") { + SPDLOG_CRITICAL("this is irv1; gg"); + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Failure, + __FILENAME__, + __LINE__, + "this is irv1; gg." + }; + } + SPDLOG_INFO("The version is {}", version); + + // Seek from the beginning of the file since the metadata bytes have been consumed but the Deserializer()'s factory + // function does not expect so. + zstd_decompressor->seek_from_begin(0); auto result{ clp::ffi::ir_stream::Deserializer::create(*zstd_decompressor) }; From f2ed47ef242ea19f2f2aff5c9df520c09cef0639 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Tue, 8 Oct 2024 20:53:56 +0800 Subject: [PATCH 05/18] Templatize StreamReaderDataContext. --- src/clp_ffi_js/ir/StreamReader.cpp | 4 ++-- src/clp_ffi_js/ir/StreamReader.hpp | 6 ++++-- src/clp_ffi_js/ir/StreamReaderDataContext.hpp | 11 +++++++---- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index 10abe351..a16577d7 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -203,9 +203,9 @@ auto StreamReader::decode_range(size_t begin_idx, size_t end_idx) const -> Decod } StreamReader::StreamReader( - StreamReaderDataContext&& stream_reader_data_context + StreamReaderDataContext&& stream_reader_data_context ) - : m_stream_reader_data_context{std::make_unique( + : m_stream_reader_data_context{std::make_unique>( std::move(stream_reader_data_context) )} {} } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 6e0d84c5..a7e7e4a1 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -77,12 +77,14 @@ class StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx) const -> DecodedResultsTsType; private: + using deserializer_t = clp::ffi::ir_stream::Deserializer; + // Constructor - explicit StreamReader(StreamReaderDataContext&& stream_reader_data_context); + explicit StreamReader(StreamReaderDataContext&& stream_reader_data_context); // Variables std::vector m_encoded_log_events; - std::unique_ptr m_stream_reader_data_context; + std::unique_ptr> m_stream_reader_data_context; }; } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/StreamReaderDataContext.hpp b/src/clp_ffi_js/ir/StreamReaderDataContext.hpp index 3feef3b7..0add0926 100644 --- a/src/clp_ffi_js/ir/StreamReaderDataContext.hpp +++ b/src/clp_ffi_js/ir/StreamReaderDataContext.hpp @@ -11,16 +11,19 @@ namespace clp_ffi_js::ir { /** * The data context for a `StreamReader`. It encapsulates a chain of the following resources: - * A `clp::ir::LogEventDeserializer` that reads from a + * A `clp::ir::LogEventDeserializer` / `clp::ffi::ir_stream::Deserializer` that reads from a * `clp::streaming_compression::zstd::Decompressor`, which in turn reads from a `clp::Array`. + * +* @tparam deserializer_t Type of deserializer for decoding the stream. */ +template class StreamReaderDataContext { public: // Constructors StreamReaderDataContext( clp::Array&& data_buffer, std::unique_ptr&& zstd_decompressor, - clp::ffi::ir_stream::Deserializer deserializer + deserializer_t deserializer ) : m_data_buffer{std::move(data_buffer)}, m_zstd_decompressor{std::move(zstd_decompressor)}, @@ -48,14 +51,14 @@ class StreamReaderDataContext { /** * @return A reference to the deserializer. */ - [[nodiscard]] auto get_deserializer() -> clp::ffi::ir_stream::Deserializer& { + [[nodiscard]] auto get_deserializer() -> deserializer_t& { return m_deserializer; } private: clp::Array m_data_buffer; std::unique_ptr m_zstd_decompressor; - clp::ffi::ir_stream::Deserializer m_deserializer; + deserializer_t m_deserializer; }; } // namespace clp_ffi_js::ir From 9062027bcdc22f8a7ffc19a78f79ab20fe9ce54a Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Tue, 8 Oct 2024 21:04:05 +0800 Subject: [PATCH 06/18] Rename IRv2 specialized StreamReader -> KVPairIRStreamReader. --- CMakeLists.txt | 2 +- ...eamReader.cpp => KVPairIRStreamReader.cpp} | 26 +++++++++---------- ...eamReader.hpp => KVPairIRStreamReader.hpp} | 22 ++++++++-------- 3 files changed, 25 insertions(+), 25 deletions(-) rename src/clp_ffi_js/ir/{StreamReader.cpp => KVPairIRStreamReader.cpp} (88%) rename src/clp_ffi_js/ir/{StreamReader.hpp => KVPairIRStreamReader.hpp} (80%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a48fa4c..44c2586a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,7 +112,7 @@ target_include_directories( target_include_directories(${CLP_FFI_JS_BIN_NAME} PRIVATE src/) -set(CLP_FFI_JS_SRC_MAIN src/clp_ffi_js/ir/StreamReader.cpp) +set(CLP_FFI_JS_SRC_MAIN src/clp_ffi_js/ir/KVPairIRStreamReader.cpp) set(CLP_FFI_JS_SRC_CLP_CORE src/submodules/clp/components/core/src/clp/ffi/ir_stream/decoding_methods.cpp diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp similarity index 88% rename from src/clp_ffi_js/ir/StreamReader.cpp rename to src/clp_ffi_js/ir/KVPairIRStreamReader.cpp index a16577d7..53bf9867 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp @@ -1,4 +1,4 @@ -#include "StreamReader.hpp" +#include "KVPairIRStreamReader.hpp" #include #include @@ -32,9 +32,9 @@ using namespace std::literals::string_literals; using clp::ir::four_byte_encoded_variable_t; namespace clp_ffi_js::ir { -auto StreamReader::create(DataArrayTsType const& data_array) -> StreamReader { +auto KVPairIRStreamReader::create(DataArrayTsType const& data_array) -> KVPairIRStreamReader { auto const length{data_array["length"].as()}; - SPDLOG_INFO("StreamReader::create: got buffer of length={}", length); + SPDLOG_INFO("KVPairIRStreamReader::create: got buffer of length={}", length); // Copy array from JavaScript to C++ clp::Array data_buffer{length}; @@ -122,14 +122,14 @@ auto StreamReader::create(DataArrayTsType const& data_array) -> StreamReader { std::move(zstd_decompressor), std::move(result.value()) }; - return StreamReader{std::move(stream_reader_data_context)}; + return KVPairIRStreamReader{std::move(stream_reader_data_context)}; } -auto StreamReader::get_num_events_buffered() const -> size_t { +auto KVPairIRStreamReader::get_num_events_buffered() const -> size_t { return m_encoded_log_events.size(); } -auto StreamReader::deserialize_range(size_t begin_idx, size_t end_idx) -> size_t { +auto KVPairIRStreamReader::deserialize_range(size_t begin_idx, size_t end_idx) -> size_t { constexpr size_t cFullRangeEndIdx{0}; if (0 != begin_idx || cFullRangeEndIdx != end_idx) { throw ClpFfiJsException{ @@ -170,7 +170,7 @@ auto StreamReader::deserialize_range(size_t begin_idx, size_t end_idx) -> size_t return m_encoded_log_events.size(); } -auto StreamReader::decode_range(size_t begin_idx, size_t end_idx) const -> DecodedResultsTsType { +auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx) const -> DecodedResultsTsType { if (m_encoded_log_events.size() < end_idx || begin_idx >= end_idx) { return DecodedResultsTsType(emscripten::val::null()); } @@ -202,7 +202,7 @@ auto StreamReader::decode_range(size_t begin_idx, size_t end_idx) const -> Decod return DecodedResultsTsType(results); } -StreamReader::StreamReader( +KVPairIRStreamReader::KVPairIRStreamReader( StreamReaderDataContext&& stream_reader_data_context ) : m_stream_reader_data_context{std::make_unique>( @@ -216,16 +216,16 @@ EMSCRIPTEN_BINDINGS(ClpIrStreamReader) { emscripten::register_type( "Array<[string, number]>" ); - emscripten::class_("ClpIrStreamReader") + emscripten::class_("ClpKVPairIRStreamReader") .constructor( - &clp_ffi_js::ir::StreamReader::create, + &clp_ffi_js::ir::KVPairIRStreamReader::create, emscripten::return_value_policy::take_ownership() ) .function( "getNumEventsBuffered", - &clp_ffi_js::ir::StreamReader::get_num_events_buffered + &clp_ffi_js::ir::KVPairIRStreamReader::get_num_events_buffered ) - .function("deserializeRange", &clp_ffi_js::ir::StreamReader::deserialize_range) - .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range); + .function("deserializeRange", &clp_ffi_js::ir::KVPairIRStreamReader::deserialize_range) + .function("decodeRange", &clp_ffi_js::ir::KVPairIRStreamReader::decode_range); } } // namespace diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp similarity index 80% rename from src/clp_ffi_js/ir/StreamReader.hpp rename to src/clp_ffi_js/ir/KVPairIRStreamReader.hpp index a7e7e4a1..5fecc80c 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp @@ -1,5 +1,5 @@ -#ifndef CLP_FFI_JS_IR_STREAM_READER_HPP -#define CLP_FFI_JS_IR_STREAM_READER_HPP +#ifndef CLP_FFI_JS_KV_PAIR_IR_STREAM_READER_HPP +#define CLP_FFI_JS_KV_PAIR_IR_STREAM_READER_HPP #include #include @@ -20,7 +20,7 @@ EMSCRIPTEN_DECLARE_VAL_TYPE(DecodedResultsTsType); * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded * log events. */ -class StreamReader { +class KVPairIRStreamReader { public: /** * Creates a StreamReader to read from the given array. @@ -29,19 +29,19 @@ class StreamReader { * @return The created instance. * @throw ClpFfiJsException if any error occurs. */ - [[nodiscard]] static auto create(DataArrayTsType const& data_array) -> StreamReader; + [[nodiscard]] static auto create(DataArrayTsType const& data_array) -> KVPairIRStreamReader; // Destructor - ~StreamReader() = default; + ~KVPairIRStreamReader() = default; // Disable copy constructor and assignment operator - StreamReader(StreamReader const&) = delete; - auto operator=(StreamReader const&) -> StreamReader& = delete; + KVPairIRStreamReader(KVPairIRStreamReader const&) = delete; + auto operator=(KVPairIRStreamReader const&) -> KVPairIRStreamReader& = delete; // Define default move constructor - StreamReader(StreamReader&&) = default; + KVPairIRStreamReader(KVPairIRStreamReader&&) = default; // Delete move assignment operator since it's also disabled in `clp::ir::LogEventDeserializer`. - auto operator=(StreamReader&&) -> StreamReader& = delete; + auto operator=(KVPairIRStreamReader&&) -> KVPairIRStreamReader& = delete; /** * @return The number of events buffered. @@ -80,7 +80,7 @@ class StreamReader { using deserializer_t = clp::ffi::ir_stream::Deserializer; // Constructor - explicit StreamReader(StreamReaderDataContext&& stream_reader_data_context); + explicit KVPairIRStreamReader(StreamReaderDataContext&& stream_reader_data_context); // Variables std::vector m_encoded_log_events; @@ -88,4 +88,4 @@ class StreamReader { }; } // namespace clp_ffi_js::ir -#endif // CLP_FFI_JS_IR_STREAM_READER_HPP +#endif // CLP_FFI_JS_KV_PAIR_IR_STREAM_READER_HPP From 0f2596260971b6d361b77a97857ecfab1bbd76b3 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Thu, 10 Oct 2024 22:14:29 +0800 Subject: [PATCH 07/18] Create a new StreamReader base class and refactor KVPairIRStreamReader to inherit from it. --- CMakeLists.txt | 5 +- src/clp_ffi_js/ir/KVPairIRStreamReader.cpp | 85 ++++----------- src/clp_ffi_js/ir/KVPairIRStreamReader.hpp | 27 +++-- src/clp_ffi_js/ir/StreamReader.cpp | 101 ++++++++++++++++++ src/clp_ffi_js/ir/StreamReader.hpp | 77 +++++++++++++ src/clp_ffi_js/ir/StreamReaderDataContext.hpp | 1 + 6 files changed, 218 insertions(+), 78 deletions(-) create mode 100644 src/clp_ffi_js/ir/StreamReader.cpp create mode 100644 src/clp_ffi_js/ir/StreamReader.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 44c2586a..f02e063e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,7 +112,10 @@ target_include_directories( target_include_directories(${CLP_FFI_JS_BIN_NAME} PRIVATE src/) -set(CLP_FFI_JS_SRC_MAIN src/clp_ffi_js/ir/KVPairIRStreamReader.cpp) +set(CLP_FFI_JS_SRC_MAIN + src/clp_ffi_js/ir/KVPairIRStreamReader.cpp + src/clp_ffi_js/ir/StreamReader.cpp +) set(CLP_FFI_JS_SRC_CLP_CORE src/submodules/clp/components/core/src/clp/ffi/ir_stream/decoding_methods.cpp diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp index 53bf9867..c4f37d6c 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp @@ -27,6 +27,7 @@ #include #include #include +#include using namespace std::literals::string_literals; using clp::ir::four_byte_encoded_variable_t; @@ -46,59 +47,6 @@ auto KVPairIRStreamReader::create(DataArrayTsType const& data_array) -> KVPairIR auto zstd_decompressor{std::make_unique()}; zstd_decompressor->open(data_buffer.data(), length); - bool is_four_byte_encoding{}; - auto const get_encoding_type_result{ - clp::ffi::ir_stream::get_encoding_type(*zstd_decompressor, is_four_byte_encoding)}; - if (clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != get_encoding_type_result) { - SPDLOG_CRITICAL( - "Failed to get encoding type: {}", - get_encoding_type_result - ); - throw ClpFfiJsException{ - clp::ErrorCode::ErrorCode_Failure, - __FILENAME__, - __LINE__, - "Failed to get encoding type." - }; - } - clp::ffi::ir_stream::encoded_tag_t metadata_type{}; - std::vector metadata_bytes; - auto const deserialize_preamble_result{ - clp::ffi::ir_stream::deserialize_preamble(*zstd_decompressor, - metadata_type, - metadata_bytes)}; - if (clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != deserialize_preamble_result) { - SPDLOG_CRITICAL( - "Failed to deserialize preamble for version reading: {}", - deserialize_preamble_result - ); - throw ClpFfiJsException{ - clp::ErrorCode::ErrorCode_Failure, - __FILENAME__, - __LINE__, - "Failed to deserialize preamble for version reading." - }; - } - std::string_view const metadata_view{ - clp::size_checked_pointer_cast(metadata_bytes.data()), - metadata_bytes.size() - }; - nlohmann::json const metadata = nlohmann::json::parse(metadata_view); - auto const &version{metadata.at(clp::ffi::ir_stream::cProtocol::Metadata::VersionKey)}; - if (version == "v0.0.0") { - SPDLOG_CRITICAL("this is irv1; gg"); - throw ClpFfiJsException{ - clp::ErrorCode::ErrorCode_Failure, - __FILENAME__, - __LINE__, - "this is irv1; gg." - }; - } - SPDLOG_INFO("The version is {}", version); - - // Seek from the beginning of the file since the metadata bytes have been consumed but the Deserializer()'s factory - // function does not expect so. - zstd_decompressor->seek_from_begin(0); auto result{ clp::ffi::ir_stream::Deserializer::create(*zstd_decompressor) }; @@ -129,16 +77,15 @@ auto KVPairIRStreamReader::get_num_events_buffered() const -> size_t { return m_encoded_log_events.size(); } -auto KVPairIRStreamReader::deserialize_range(size_t begin_idx, size_t end_idx) -> size_t { - constexpr size_t cFullRangeEndIdx{0}; - if (0 != begin_idx || cFullRangeEndIdx != end_idx) { - throw ClpFfiJsException{ - clp::ErrorCode::ErrorCode_Unsupported, - __FILENAME__, - __LINE__, - "Partial range deserialization is not yet supported." - }; - } +auto KVPairIRStreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType { + return FilteredLogEventMapTsType(emscripten::val::null()); +} + +auto KVPairIRStreamReader::filter_log_events(emscripten::val const &log_level_filter) -> void { + +} + +auto KVPairIRStreamReader::deserialize_stream() -> size_t { if (nullptr != m_stream_reader_data_context) { constexpr size_t cDefaultNumReservedLogEvents{500'000}; m_encoded_log_events.reserve(cDefaultNumReservedLogEvents); @@ -170,7 +117,7 @@ auto KVPairIRStreamReader::deserialize_range(size_t begin_idx, size_t end_idx) - return m_encoded_log_events.size(); } -auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx) const -> DecodedResultsTsType { +auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType { if (m_encoded_log_events.size() < end_idx || begin_idx >= end_idx) { return DecodedResultsTsType(emscripten::val::null()); } @@ -216,7 +163,7 @@ EMSCRIPTEN_BINDINGS(ClpIrStreamReader) { emscripten::register_type( "Array<[string, number]>" ); - emscripten::class_("ClpKVPairIRStreamReader") + emscripten::class_>("ClpKVPairIRStreamReader") .constructor( &clp_ffi_js::ir::KVPairIRStreamReader::create, emscripten::return_value_policy::take_ownership() @@ -225,7 +172,13 @@ EMSCRIPTEN_BINDINGS(ClpIrStreamReader) { "getNumEventsBuffered", &clp_ffi_js::ir::KVPairIRStreamReader::get_num_events_buffered ) - .function("deserializeRange", &clp_ffi_js::ir::KVPairIRStreamReader::deserialize_range) + .function("deserializeStream", &clp_ffi_js::ir::KVPairIRStreamReader::deserialize_stream) .function("decodeRange", &clp_ffi_js::ir::KVPairIRStreamReader::decode_range); + + emscripten::class_("ClpStreamReader") + .constructor( + &clp_ffi_js::ir::StreamReader::create, + emscripten::return_value_policy::take_ownership() + ); } } // namespace diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp index 5fecc80c..fe6ad93b 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp @@ -11,16 +11,14 @@ #include #include +#include namespace clp_ffi_js::ir { -EMSCRIPTEN_DECLARE_VAL_TYPE(DataArrayTsType); -EMSCRIPTEN_DECLARE_VAL_TYPE(DecodedResultsTsType); - /** * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded * log events. */ -class KVPairIRStreamReader { +class KVPairIRStreamReader: public StreamReader { public: /** * Creates a StreamReader to read from the given array. @@ -32,7 +30,7 @@ class KVPairIRStreamReader { [[nodiscard]] static auto create(DataArrayTsType const& data_array) -> KVPairIRStreamReader; // Destructor - ~KVPairIRStreamReader() = default; + ~KVPairIRStreamReader() override = default; // Disable copy constructor and assignment operator KVPairIRStreamReader(KVPairIRStreamReader const&) = delete; @@ -46,8 +44,11 @@ class KVPairIRStreamReader { /** * @return The number of events buffered. */ - [[nodiscard]] auto get_num_events_buffered() const -> size_t; + [[nodiscard]] auto get_num_events_buffered() const -> size_t override; + + [[nodiscard]] auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType override; + auto filter_log_events(emscripten::val const &log_level_filter) -> void override; /** * Deserializes and buffers log events in the range `[beginIdx, endIdx)`. After the stream has * been exhausted, it will be deallocated. @@ -59,7 +60,7 @@ class KVPairIRStreamReader { * @param end_idx * @return The number of successfully deserialized ("valid") log events. */ - [[nodiscard]] auto deserialize_range(size_t begin_idx, size_t end_idx) -> size_t; + [[nodiscard]] auto deserialize_stream() -> size_t override; /** * Decodes the deserialized log events in the range `[beginIdx, endIdx)`. @@ -74,13 +75,17 @@ class KVPairIRStreamReader { * @return null if any log event in the range doesn't exist (e.g., the range exceeds the number * of log events in the file). */ - [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx) const -> DecodedResultsTsType; + [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; + + + using deserializer_t = clp::ffi::ir_stream::Deserializer; + + // Constructor + explicit KVPairIRStreamReader(StreamReaderDataContext&& stream_reader_data_context); private: - using deserializer_t = clp::ffi::ir_stream::Deserializer; - // Constructor - explicit KVPairIRStreamReader(StreamReaderDataContext&& stream_reader_data_context); + // Variables std::vector m_encoded_log_events; diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp new file mode 100644 index 00000000..be3b0f4d --- /dev/null +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -0,0 +1,101 @@ +#include "StreamReader.hpp" +#include "KVPairIRStreamReader.hpp" + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace clp_ffi_js::ir { + auto StreamReader::create(DataArrayTsType const& data_array) -> std::unique_ptr { + auto const length{data_array["length"].as()}; + SPDLOG_INFO("KVPairIRStreamReader::create: got buffer of length={}", length); + + // Copy array from JavaScript to C++ + clp::Array data_buffer{length}; + // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) + emscripten::val::module_property("HEAPU8") + .call("set", data_array, reinterpret_cast(data_buffer.data())); + // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) + + auto zstd_decompressor{std::make_unique()}; + zstd_decompressor->open(data_buffer.data(), length); + + bool is_four_byte_encoding{}; + auto const get_encoding_type_result{ + clp::ffi::ir_stream::get_encoding_type(*zstd_decompressor, is_four_byte_encoding)}; + if (clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != get_encoding_type_result) { + SPDLOG_CRITICAL( + "Failed to get encoding type: {}", + get_encoding_type_result + ); + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Failure, + __FILENAME__, + __LINE__, + "Failed to get encoding type." + }; + } + clp::ffi::ir_stream::encoded_tag_t metadata_type{}; + std::vector metadata_bytes; + auto const deserialize_preamble_result{ + clp::ffi::ir_stream::deserialize_preamble(*zstd_decompressor, + metadata_type, + metadata_bytes)}; + if (clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != deserialize_preamble_result) { + SPDLOG_CRITICAL( + "Failed to deserialize preamble for version reading: {}", + deserialize_preamble_result + ); + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Failure, + __FILENAME__, + __LINE__, + "Failed to deserialize preamble for version reading." + }; + } + std::string_view const metadata_view{ + clp::size_checked_pointer_cast(metadata_bytes.data()), + metadata_bytes.size() + }; + nlohmann::json const metadata = nlohmann::json::parse(metadata_view); + auto const &version{metadata.at(clp::ffi::ir_stream::cProtocol::Metadata::VersionKey)}; + if (version == "v0.0.0") { + SPDLOG_CRITICAL("this is irv1; gg"); + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Failure, + __FILENAME__, + __LINE__, + "this is irv1; gg." + }; + } + SPDLOG_INFO("The version is {}", version); + + return std::make_unique(KVPairIRStreamReader::create(std::move(data_array))); + } +} + diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp new file mode 100644 index 00000000..f09d7a35 --- /dev/null +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -0,0 +1,77 @@ +#ifndef CLP_FFI_JS_IR_STREAM_READER_HPP +#define CLP_FFI_JS_IR_STREAM_READER_HPP + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +namespace clp_ffi_js::ir { + EMSCRIPTEN_DECLARE_VAL_TYPE(DataArrayTsType); + EMSCRIPTEN_DECLARE_VAL_TYPE(DecodedResultsTsType); + EMSCRIPTEN_DECLARE_VAL_TYPE(FilteredLogEventMapTsType); + + /** + * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded + * log events. + */ + class StreamReader { + public: + virtual ~StreamReader() = default; + + [[nodiscard]] static auto create(DataArrayTsType const& data_array) -> std::unique_ptr; + + /** + * @return The number of events buffered. + */ + [[nodiscard]] virtual auto get_num_events_buffered() const -> size_t = 0; + + /** + * @return The filtered log events map. + */ + [[nodiscard]] virtual auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType = 0; + + /** + * Generates a filtered collection from all log events. + * + * @param log_level_filter Array of selected log levels + */ + virtual auto filter_log_events(emscripten::val const &log_level_filter) -> void = 0; + + /** + * Deserializes all log events in the file. After the stream has been exhausted, it will be + * deallocated. + * + * @return The number of successfully deserialized ("valid") log events. + */ + [[nodiscard]] virtual auto deserialize_stream() -> size_t = 0; + + /** + * Decodes log events in the range `[beginIdx, endIdx)` of the filtered or unfiltered + * (depending on the value of `useFilter`) log events collection. + * + * @param begin_idx + * @param end_idx + * @param use_filter If true, decode from the filtered log events collection; otherwise, decode + * from the unfiltered one. + * @return An array where each element is a decoded log event represented by an array of: + * - The log event's message + * - The log event's timestamp as milliseconds since the Unix epoch + * - The log event's log level as an integer that indexes into `cLogLevelNames` + * - The log event's number (1-indexed) in the stream + * @return null if any log event in the range doesn't exist (e.g. the range exceeds the number + * of log events in the collection). + */ + [[nodiscard]] virtual auto + decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType = 0; + }; +} // namespace clp_ffi_js::ir + +#endif // CLP_FFI_JS_IR_STREAM_READER_HPP diff --git a/src/clp_ffi_js/ir/StreamReaderDataContext.hpp b/src/clp_ffi_js/ir/StreamReaderDataContext.hpp index 0add0926..eace4d85 100644 --- a/src/clp_ffi_js/ir/StreamReaderDataContext.hpp +++ b/src/clp_ffi_js/ir/StreamReaderDataContext.hpp @@ -19,6 +19,7 @@ namespace clp_ffi_js::ir { template class StreamReaderDataContext { public: + // Constructors StreamReaderDataContext( clp::Array&& data_buffer, From 80f059b422bf82295e977ee938baed5b62bfd021 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Thu, 10 Oct 2024 22:41:30 +0800 Subject: [PATCH 08/18] Revert KVPairIRStreamReader(StreamReaderDataContext&& stream_reader_data_context) back to private. --- src/clp_ffi_js/ir/KVPairIRStreamReader.hpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp index fe6ad93b..cc2bd8e2 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp @@ -77,15 +77,11 @@ class KVPairIRStreamReader: public StreamReader { */ [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - - using deserializer_t = clp::ffi::ir_stream::Deserializer; - - // Constructor - explicit KVPairIRStreamReader(StreamReaderDataContext&& stream_reader_data_context); - private: + using deserializer_t = clp::ffi::ir_stream::Deserializer; - + // Constructor + explicit KVPairIRStreamReader(StreamReaderDataContext&& stream_reader_data_context); // Variables std::vector m_encoded_log_events; From 9be08c423e6db5957fa136f1877afa693ae174a9 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Fri, 11 Oct 2024 03:21:39 +0800 Subject: [PATCH 09/18] Reformat code. --- src/clp_ffi_js/ir/KVPairIRStreamReader.cpp | 49 ++++---- src/clp_ffi_js/ir/KVPairIRStreamReader.hpp | 19 ++-- src/clp_ffi_js/ir/StreamReader.cpp | 97 ++++++++-------- src/clp_ffi_js/ir/StreamReader.hpp | 105 +++++++++--------- src/clp_ffi_js/ir/StreamReaderDataContext.hpp | 9 +- 5 files changed, 135 insertions(+), 144 deletions(-) diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp index c4f37d6c..09bc80a6 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp @@ -1,22 +1,13 @@ #include "KVPairIRStreamReader.hpp" -#include -#include #include -#include -#include #include #include -#include -#include #include #include #include #include -#include -#include -#include #include #include #include @@ -25,9 +16,8 @@ #include #include -#include -#include #include +#include using namespace std::literals::string_literals; using clp::ir::four_byte_encoded_variable_t; @@ -47,9 +37,7 @@ auto KVPairIRStreamReader::create(DataArrayTsType const& data_array) -> KVPairIR auto zstd_decompressor{std::make_unique()}; zstd_decompressor->open(data_buffer.data(), length); - auto result{ - clp::ffi::ir_stream::Deserializer::create(*zstd_decompressor) - }; + auto result{clp::ffi::ir_stream::Deserializer::create(*zstd_decompressor)}; if (result.has_error()) { auto const error_code{result.error()}; SPDLOG_CRITICAL( @@ -81,17 +69,16 @@ auto KVPairIRStreamReader::get_filtered_log_event_map() const -> FilteredLogEven return FilteredLogEventMapTsType(emscripten::val::null()); } -auto KVPairIRStreamReader::filter_log_events(emscripten::val const &log_level_filter) -> void { - -} +auto KVPairIRStreamReader::filter_log_events(emscripten::val const& log_level_filter) -> void {} auto KVPairIRStreamReader::deserialize_stream() -> size_t { if (nullptr != m_stream_reader_data_context) { constexpr size_t cDefaultNumReservedLogEvents{500'000}; m_encoded_log_events.reserve(cDefaultNumReservedLogEvents); - auto &reader{m_stream_reader_data_context->get_reader()}; + auto& reader{m_stream_reader_data_context->get_reader()}; while (true) { - auto result{m_stream_reader_data_context->get_deserializer().deserialize_to_next_log_event(reader)}; + auto result{m_stream_reader_data_context->get_deserializer() + .deserialize_to_next_log_event(reader)}; if (false == result.has_error()) { m_encoded_log_events.emplace_back(std::move(result.value())); continue; @@ -117,7 +104,8 @@ auto KVPairIRStreamReader::deserialize_stream() -> size_t { return m_encoded_log_events.size(); } -auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType { +auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const + -> DecodedResultsTsType { if (m_encoded_log_events.size() < end_idx || begin_idx >= end_idx) { return DecodedResultsTsType(emscripten::val::null()); } @@ -160,10 +148,10 @@ KVPairIRStreamReader::KVPairIRStreamReader( namespace { EMSCRIPTEN_BINDINGS(ClpIrStreamReader) { emscripten::register_type("Uint8Array"); - emscripten::register_type( - "Array<[string, number]>" - ); - emscripten::class_>("ClpKVPairIRStreamReader") + emscripten::register_type("Array<[string, number]>"); + emscripten::class_< + clp_ffi_js::ir::KVPairIRStreamReader, + emscripten::base>("ClpKVPairIRStreamReader") .constructor( &clp_ffi_js::ir::KVPairIRStreamReader::create, emscripten::return_value_policy::take_ownership() @@ -172,13 +160,16 @@ EMSCRIPTEN_BINDINGS(ClpIrStreamReader) { "getNumEventsBuffered", &clp_ffi_js::ir::KVPairIRStreamReader::get_num_events_buffered ) - .function("deserializeStream", &clp_ffi_js::ir::KVPairIRStreamReader::deserialize_stream) + .function( + "deserializeStream", + &clp_ffi_js::ir::KVPairIRStreamReader::deserialize_stream + ) .function("decodeRange", &clp_ffi_js::ir::KVPairIRStreamReader::decode_range); emscripten::class_("ClpStreamReader") - .constructor( - &clp_ffi_js::ir::StreamReader::create, - emscripten::return_value_policy::take_ownership() - ); + .constructor( + &clp_ffi_js::ir::StreamReader::create, + emscripten::return_value_policy::take_ownership() + ); } } // namespace diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp index cc2bd8e2..deb0ec59 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp @@ -5,20 +5,20 @@ #include #include -#include #include +#include #include #include -#include #include +#include namespace clp_ffi_js::ir { /** * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded * log events. */ -class KVPairIRStreamReader: public StreamReader { +class KVPairIRStreamReader : public StreamReader { public: /** * Creates a StreamReader to read from the given array. @@ -44,11 +44,11 @@ class KVPairIRStreamReader: public StreamReader { /** * @return The number of events buffered. */ - [[nodiscard]] auto get_num_events_buffered() const -> size_t override; + [[nodiscard]] auto get_num_events_buffered() const -> size_t override; - [[nodiscard]] auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType override; + [[nodiscard]] auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType override; - auto filter_log_events(emscripten::val const &log_level_filter) -> void override; + auto filter_log_events(emscripten::val const& log_level_filter) -> void override; /** * Deserializes and buffers log events in the range `[beginIdx, endIdx)`. After the stream has * been exhausted, it will be deallocated. @@ -75,13 +75,16 @@ class KVPairIRStreamReader: public StreamReader { * @return null if any log event in the range doesn't exist (e.g., the range exceeds the number * of log events in the file). */ - [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; + [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const + -> DecodedResultsTsType override; private: using deserializer_t = clp::ffi::ir_stream::Deserializer; // Constructor - explicit KVPairIRStreamReader(StreamReaderDataContext&& stream_reader_data_context); + explicit KVPairIRStreamReader( + StreamReaderDataContext&& stream_reader_data_context + ); // Variables std::vector m_encoded_log_events; diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index be3b0f4d..14524896 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -1,6 +1,4 @@ #include "StreamReader.hpp" -#include "KVPairIRStreamReader.hpp" - #include #include @@ -17,8 +15,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -30,72 +28,73 @@ #include #include +#include "KVPairIRStreamReader.hpp" + namespace clp_ffi_js::ir { - auto StreamReader::create(DataArrayTsType const& data_array) -> std::unique_ptr { - auto const length{data_array["length"].as()}; - SPDLOG_INFO("KVPairIRStreamReader::create: got buffer of length={}", length); +auto StreamReader::create(DataArrayTsType const& data_array) -> std::unique_ptr { + auto const length{data_array["length"].as()}; + SPDLOG_INFO("KVPairIRStreamReader::create: got buffer of length={}", length); - // Copy array from JavaScript to C++ - clp::Array data_buffer{length}; - // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) - emscripten::val::module_property("HEAPU8") - .call("set", data_array, reinterpret_cast(data_buffer.data())); - // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) + // Copy array from JavaScript to C++ + clp::Array data_buffer{length}; + // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) + emscripten::val::module_property("HEAPU8") + .call("set", data_array, reinterpret_cast(data_buffer.data())); + // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) - auto zstd_decompressor{std::make_unique()}; - zstd_decompressor->open(data_buffer.data(), length); + auto zstd_decompressor{std::make_unique()}; + zstd_decompressor->open(data_buffer.data(), length); - bool is_four_byte_encoding{}; - auto const get_encoding_type_result{ - clp::ffi::ir_stream::get_encoding_type(*zstd_decompressor, is_four_byte_encoding)}; - if (clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != get_encoding_type_result) { - SPDLOG_CRITICAL( - "Failed to get encoding type: {}", - get_encoding_type_result - ); - throw ClpFfiJsException{ + bool is_four_byte_encoding{}; + auto const get_encoding_type_result{ + clp::ffi::ir_stream::get_encoding_type(*zstd_decompressor, is_four_byte_encoding) + }; + if (clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != get_encoding_type_result) { + SPDLOG_CRITICAL("Failed to get encoding type: {}", get_encoding_type_result); + throw ClpFfiJsException{ clp::ErrorCode::ErrorCode_Failure, __FILENAME__, __LINE__, "Failed to get encoding type." - }; - } - clp::ffi::ir_stream::encoded_tag_t metadata_type{}; - std::vector metadata_bytes; - auto const deserialize_preamble_result{ - clp::ffi::ir_stream::deserialize_preamble(*zstd_decompressor, + }; + } + clp::ffi::ir_stream::encoded_tag_t metadata_type{}; + std::vector metadata_bytes; + auto const deserialize_preamble_result{clp::ffi::ir_stream::deserialize_preamble( + *zstd_decompressor, metadata_type, - metadata_bytes)}; - if (clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != deserialize_preamble_result) { - SPDLOG_CRITICAL( + metadata_bytes + )}; + if (clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != deserialize_preamble_result) { + SPDLOG_CRITICAL( "Failed to deserialize preamble for version reading: {}", deserialize_preamble_result - ); - throw ClpFfiJsException{ + ); + throw ClpFfiJsException{ clp::ErrorCode::ErrorCode_Failure, __FILENAME__, __LINE__, "Failed to deserialize preamble for version reading." - }; - } - std::string_view const metadata_view{ + }; + } + std::string_view const metadata_view{ clp::size_checked_pointer_cast(metadata_bytes.data()), metadata_bytes.size() - }; - nlohmann::json const metadata = nlohmann::json::parse(metadata_view); - auto const &version{metadata.at(clp::ffi::ir_stream::cProtocol::Metadata::VersionKey)}; - if (version == "v0.0.0") { - SPDLOG_CRITICAL("this is irv1; gg"); - throw ClpFfiJsException{ + }; + nlohmann::json const metadata = nlohmann::json::parse(metadata_view); + auto const& version{metadata.at(clp::ffi::ir_stream::cProtocol::Metadata::VersionKey)}; + if (version == "v0.0.0") { + SPDLOG_CRITICAL("this is irv1; gg"); + throw ClpFfiJsException{ clp::ErrorCode::ErrorCode_Failure, __FILENAME__, __LINE__, "this is irv1; gg." - }; - } - SPDLOG_INFO("The version is {}", version); - - return std::make_unique(KVPairIRStreamReader::create(std::move(data_array))); + }; } -} + SPDLOG_INFO("The version is {}", version); + return std::make_unique(KVPairIRStreamReader::create(std::move(data_array) + )); +} +} // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index f09d7a35..4cdecd0d 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -14,64 +14,65 @@ #include namespace clp_ffi_js::ir { - EMSCRIPTEN_DECLARE_VAL_TYPE(DataArrayTsType); - EMSCRIPTEN_DECLARE_VAL_TYPE(DecodedResultsTsType); - EMSCRIPTEN_DECLARE_VAL_TYPE(FilteredLogEventMapTsType); +EMSCRIPTEN_DECLARE_VAL_TYPE(DataArrayTsType); +EMSCRIPTEN_DECLARE_VAL_TYPE(DecodedResultsTsType); +EMSCRIPTEN_DECLARE_VAL_TYPE(FilteredLogEventMapTsType); - /** - * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded - * log events. - */ - class StreamReader { - public: - virtual ~StreamReader() = default; +/** + * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded + * log events. + */ +class StreamReader { +public: + virtual ~StreamReader() = default; - [[nodiscard]] static auto create(DataArrayTsType const& data_array) -> std::unique_ptr; + [[nodiscard]] static auto create(DataArrayTsType const& data_array + ) -> std::unique_ptr; - /** - * @return The number of events buffered. - */ - [[nodiscard]] virtual auto get_num_events_buffered() const -> size_t = 0; + /** + * @return The number of events buffered. + */ + [[nodiscard]] virtual auto get_num_events_buffered() const -> size_t = 0; - /** - * @return The filtered log events map. - */ - [[nodiscard]] virtual auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType = 0; + /** + * @return The filtered log events map. + */ + [[nodiscard]] virtual auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType = 0; - /** - * Generates a filtered collection from all log events. - * - * @param log_level_filter Array of selected log levels - */ - virtual auto filter_log_events(emscripten::val const &log_level_filter) -> void = 0; + /** + * Generates a filtered collection from all log events. + * + * @param log_level_filter Array of selected log levels + */ + virtual auto filter_log_events(emscripten::val const& log_level_filter) -> void = 0; - /** - * Deserializes all log events in the file. After the stream has been exhausted, it will be - * deallocated. - * - * @return The number of successfully deserialized ("valid") log events. - */ - [[nodiscard]] virtual auto deserialize_stream() -> size_t = 0; + /** + * Deserializes all log events in the file. After the stream has been exhausted, it will be + * deallocated. + * + * @return The number of successfully deserialized ("valid") log events. + */ + [[nodiscard]] virtual auto deserialize_stream() -> size_t = 0; - /** - * Decodes log events in the range `[beginIdx, endIdx)` of the filtered or unfiltered - * (depending on the value of `useFilter`) log events collection. - * - * @param begin_idx - * @param end_idx - * @param use_filter If true, decode from the filtered log events collection; otherwise, decode - * from the unfiltered one. - * @return An array where each element is a decoded log event represented by an array of: - * - The log event's message - * - The log event's timestamp as milliseconds since the Unix epoch - * - The log event's log level as an integer that indexes into `cLogLevelNames` - * - The log event's number (1-indexed) in the stream - * @return null if any log event in the range doesn't exist (e.g. the range exceeds the number - * of log events in the collection). - */ - [[nodiscard]] virtual auto - decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType = 0; - }; -} // namespace clp_ffi_js::ir + /** + * Decodes log events in the range `[beginIdx, endIdx)` of the filtered or unfiltered + * (depending on the value of `useFilter`) log events collection. + * + * @param begin_idx + * @param end_idx + * @param use_filter If true, decode from the filtered log events collection; otherwise, decode + * from the unfiltered one. + * @return An array where each element is a decoded log event represented by an array of: + * - The log event's message + * - The log event's timestamp as milliseconds since the Unix epoch + * - The log event's log level as an integer that indexes into `cLogLevelNames` + * - The log event's number (1-indexed) in the stream + * @return null if any log event in the range doesn't exist (e.g. the range exceeds the number + * of log events in the collection). + */ + [[nodiscard]] virtual auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const + -> DecodedResultsTsType = 0; +}; +} // namespace clp_ffi_js::ir #endif // CLP_FFI_JS_IR_STREAM_READER_HPP diff --git a/src/clp_ffi_js/ir/StreamReaderDataContext.hpp b/src/clp_ffi_js/ir/StreamReaderDataContext.hpp index eace4d85..bf7867a1 100644 --- a/src/clp_ffi_js/ir/StreamReaderDataContext.hpp +++ b/src/clp_ffi_js/ir/StreamReaderDataContext.hpp @@ -1,12 +1,12 @@ #ifndef CLP_FFI_JS_IR_STREAMREADERDATACONTEXT_HPP #define CLP_FFI_JS_IR_STREAMREADERDATACONTEXT_HPP +#include #include #include #include #include -#include namespace clp_ffi_js::ir { /** @@ -14,12 +14,11 @@ namespace clp_ffi_js::ir { * A `clp::ir::LogEventDeserializer` / `clp::ffi::ir_stream::Deserializer` that reads from a * `clp::streaming_compression::zstd::Decompressor`, which in turn reads from a `clp::Array`. * -* @tparam deserializer_t Type of deserializer for decoding the stream. + * @tparam deserializer_t Type of deserializer for decoding the stream. */ template class StreamReaderDataContext { public: - // Constructors StreamReaderDataContext( clp::Array&& data_buffer, @@ -52,9 +51,7 @@ class StreamReaderDataContext { /** * @return A reference to the deserializer. */ - [[nodiscard]] auto get_deserializer() -> deserializer_t& { - return m_deserializer; - } + [[nodiscard]] auto get_deserializer() -> deserializer_t& { return m_deserializer; } private: clp::Array m_data_buffer; From 7842d29d643cdcc5ad4db152e0bc8d717c803e15 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Fri, 11 Oct 2024 03:25:31 +0800 Subject: [PATCH 10/18] Optimize imports. --- src/clp_ffi_js/ir/KVPairIRStreamReader.cpp | 6 +++++- src/clp_ffi_js/ir/KVPairIRStreamReader.hpp | 2 +- src/clp_ffi_js/ir/StreamReader.cpp | 16 ++++------------ src/clp_ffi_js/ir/StreamReader.hpp | 6 ------ src/clp_ffi_js/ir/StreamReaderDataContext.hpp | 1 - 5 files changed, 10 insertions(+), 21 deletions(-) diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp index 09bc80a6..656a7f91 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp @@ -1,8 +1,12 @@ #include "KVPairIRStreamReader.hpp" +#include #include +#include +#include #include #include +#include #include #include @@ -104,7 +108,7 @@ auto KVPairIRStreamReader::deserialize_stream() -> size_t { return m_encoded_log_events.size(); } -auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const +auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool /*use_filter*/) const -> DecodedResultsTsType { if (m_encoded_log_events.size() < end_idx || begin_idx >= end_idx) { return DecodedResultsTsType(emscripten::val::null()); diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp index deb0ec59..0e1ed8a9 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp @@ -2,11 +2,11 @@ #define CLP_FFI_JS_KV_PAIR_IR_STREAM_READER_HPP #include +#include #include #include #include -#include #include #include diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index 14524896..b30efe98 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -1,32 +1,25 @@ #include "StreamReader.hpp" -#include #include #include -#include +#include +#include #include -#include #include #include -#include +#include #include #include #include #include #include -#include -#include -#include #include #include -#include #include #include #include -#include -#include #include "KVPairIRStreamReader.hpp" @@ -94,7 +87,6 @@ auto StreamReader::create(DataArrayTsType const& data_array) -> std::unique_ptr< } SPDLOG_INFO("The version is {}", version); - return std::make_unique(KVPairIRStreamReader::create(std::move(data_array) - )); + return std::make_unique(KVPairIRStreamReader::create(data_array)); } } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 4cdecd0d..15865527 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -3,16 +3,10 @@ #include #include -#include -#include -#include -#include #include #include -#include - namespace clp_ffi_js::ir { EMSCRIPTEN_DECLARE_VAL_TYPE(DataArrayTsType); EMSCRIPTEN_DECLARE_VAL_TYPE(DecodedResultsTsType); diff --git a/src/clp_ffi_js/ir/StreamReaderDataContext.hpp b/src/clp_ffi_js/ir/StreamReaderDataContext.hpp index bf7867a1..04a90a1a 100644 --- a/src/clp_ffi_js/ir/StreamReaderDataContext.hpp +++ b/src/clp_ffi_js/ir/StreamReaderDataContext.hpp @@ -1,7 +1,6 @@ #ifndef CLP_FFI_JS_IR_STREAMREADERDATACONTEXT_HPP #define CLP_FFI_JS_IR_STREAMREADERDATACONTEXT_HPP -#include #include #include From 5b49ec06efc48612a235d452972f7ef4248ebb3c Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Fri, 11 Oct 2024 04:20:39 +0800 Subject: [PATCH 11/18] Rename the original StreamReader -> IrStreamReader and adapt it to the interfaces of the newly proposed StreamReader. --- CMakeLists.txt | 4 + src/clp_ffi_js/bindings.cpp | 54 +++++ src/clp_ffi_js/ir/IrStreamReader.cpp | 254 ++++++++++++++++++++ src/clp_ffi_js/ir/IrStreamReader.hpp | 110 +++++++++ src/clp_ffi_js/ir/KVPairIRStreamReader.cpp | 28 --- src/clp_ffi_js/ir/StreamReader.cpp | 265 +++------------------ src/clp_ffi_js/ir/StreamReader.hpp | 64 +---- 7 files changed, 469 insertions(+), 310 deletions(-) create mode 100644 src/clp_ffi_js/bindings.cpp create mode 100644 src/clp_ffi_js/ir/IrStreamReader.cpp create mode 100644 src/clp_ffi_js/ir/IrStreamReader.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f02e063e..8e5b2c64 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -113,7 +113,9 @@ target_include_directories( target_include_directories(${CLP_FFI_JS_BIN_NAME} PRIVATE src/) set(CLP_FFI_JS_SRC_MAIN + src/clp_ffi_js/bindings.cpp src/clp_ffi_js/ir/KVPairIRStreamReader.cpp + src/clp_ffi_js/ir/IrStreamReader.cpp src/clp_ffi_js/ir/StreamReader.cpp ) @@ -125,9 +127,11 @@ set(CLP_FFI_JS_SRC_CLP_CORE src/submodules/clp/components/core/src/clp/ffi/KeyValuePairLogEvent.cpp src/submodules/clp/components/core/src/clp/ffi/SchemaTree.cpp src/submodules/clp/components/core/src/clp/ir/EncodedTextAst.cpp + src/submodules/clp/components/core/src/clp/ir/LogEventDeserializer.cpp src/submodules/clp/components/core/src/clp/ReadOnlyMemoryMappedFile.cpp src/submodules/clp/components/core/src/clp/ReaderInterface.cpp src/submodules/clp/components/core/src/clp/streaming_compression/zstd/Decompressor.cpp + src/submodules/clp/components/core/src/clp/TimestampPattern.cpp ) set(CLP_FFI_JS_SRC_FMT src/submodules/fmt/src/format.cc) diff --git a/src/clp_ffi_js/bindings.cpp b/src/clp_ffi_js/bindings.cpp new file mode 100644 index 00000000..8af72db4 --- /dev/null +++ b/src/clp_ffi_js/bindings.cpp @@ -0,0 +1,54 @@ +#include +#include "clp_ffi_js/ir/StreamReader.hpp" +#include "clp_ffi_js/ir/KVPairIRStreamReader.hpp" +#include "clp_ffi_js/ir/IrStreamReader.hpp" + +namespace { + EMSCRIPTEN_BINDINGS(ClpIrStreamReader) { + emscripten::register_type("Uint8Array"); + emscripten::register_type( + "Array<[string, number, number, number]>" + ); + emscripten::register_type("number[] | null"); + + emscripten::class_>("ClpIrStreamReader") + .constructor( + &clp_ffi_js::ir::IrStreamReader::create, + emscripten::return_value_policy::take_ownership() + ) + .function( + "getNumEventsBuffered", + &clp_ffi_js::ir::IrStreamReader::get_num_events_buffered + ) + .function( + "getFilteredLogEventMap", + &clp_ffi_js::ir::IrStreamReader::get_filtered_log_event_map + ) + .function("filterLogEvents", &clp_ffi_js::ir::IrStreamReader::filter_log_events) + .function("deserializeStream", &clp_ffi_js::ir::IrStreamReader::deserialize_stream) + .function("decodeRange", &clp_ffi_js::ir::IrStreamReader::decode_range); + + emscripten::class_>("ClpKVPairIRStreamReader") + .constructor( + &clp_ffi_js::ir::KVPairIRStreamReader::create, + emscripten::return_value_policy::take_ownership() + ) + .function( + "getNumEventsBuffered", + &clp_ffi_js::ir::KVPairIRStreamReader::get_num_events_buffered + ) + .function( + "deserializeStream", + &clp_ffi_js::ir::KVPairIRStreamReader::deserialize_stream + ) + .function("decodeRange", &clp_ffi_js::ir::KVPairIRStreamReader::decode_range); + + emscripten::class_("ClpStreamReader") + .constructor( + &clp_ffi_js::ir::StreamReader::create, + emscripten::return_value_policy::take_ownership() + ); + } +} // namespace diff --git a/src/clp_ffi_js/ir/IrStreamReader.cpp b/src/clp_ffi_js/ir/IrStreamReader.cpp new file mode 100644 index 00000000..a7041d4a --- /dev/null +++ b/src/clp_ffi_js/ir/IrStreamReader.cpp @@ -0,0 +1,254 @@ +#include "IrStreamReader.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +using namespace std::literals::string_literals; +using clp::ir::four_byte_encoded_variable_t; + +namespace clp_ffi_js::ir { +auto IrStreamReader::create(DataArrayTsType const& data_array) -> IrStreamReader { + auto const length{data_array["length"].as()}; + SPDLOG_INFO("IrStreamReader::create: got buffer of length={}", length); + + // Copy array from JavaScript to C++ + clp::Array data_buffer{length}; + // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) + emscripten::val::module_property("HEAPU8") + .call("set", data_array, reinterpret_cast(data_buffer.data())); + // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) + + auto zstd_decompressor{std::make_unique()}; + zstd_decompressor->open(data_buffer.data(), length); + + bool is_four_bytes_encoding{true}; + if (auto const err{ + clp::ffi::ir_stream::get_encoding_type(*zstd_decompressor, is_four_bytes_encoding) + }; + clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != err) + { + SPDLOG_CRITICAL("Failed to decode encoding type, err={}", err); + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_MetadataCorrupted, + __FILENAME__, + __LINE__, + "Failed to decode encoding type." + }; + } + if (false == is_four_bytes_encoding) { + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Unsupported, + __FILENAME__, + __LINE__, + "IR stream uses unsupported encoding." + }; + } + + auto result{ + clp::ir::LogEventDeserializer::create(*zstd_decompressor) + }; + if (result.has_error()) { + auto const error_code{result.error()}; + SPDLOG_CRITICAL( + "Failed to create deserializer: {}:{}", + error_code.category().name(), + error_code.message() + ); + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Failure, + __FILENAME__, + __LINE__, + "Failed to create deserializer" + }; + } + + StreamReaderDataContext> stream_reader_data_context{ + std::move(data_buffer), + std::move(zstd_decompressor), + std::move(result.value()) + }; + return IrStreamReader{std::move(stream_reader_data_context)}; +} + +auto IrStreamReader::get_num_events_buffered() const -> size_t { + return m_encoded_log_events.size(); +} + +auto IrStreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType { + if (false == m_filtered_log_event_map.has_value()) { + return FilteredLogEventMapTsType{emscripten::val::null()}; + } + + return FilteredLogEventMapTsType{emscripten::val::array(m_filtered_log_event_map.value())}; +} + +void IrStreamReader::filter_log_events(emscripten::val const& log_level_filter) { + if (log_level_filter.isNull()) { + m_filtered_log_event_map.reset(); + return; + } + + m_filtered_log_event_map.emplace(); + auto filter_levels{emscripten::vecFromJSArray>(log_level_filter + )}; + for (size_t log_event_idx = 0; log_event_idx < m_encoded_log_events.size(); ++log_event_idx) { + auto const& log_event = m_encoded_log_events[log_event_idx]; + if (std::ranges::find( + filter_levels, + clp::enum_to_underlying_type(log_event.get_log_level()) + ) + != filter_levels.end()) + { + m_filtered_log_event_map->emplace_back(log_event_idx); + } + } +} + +auto IrStreamReader::deserialize_stream() -> size_t { + if (nullptr == m_stream_reader_data_context) { + return m_encoded_log_events.size(); + } + + constexpr size_t cDefaultNumReservedLogEvents{500'000}; + m_encoded_log_events.reserve(cDefaultNumReservedLogEvents); + + while (true) { + auto result{m_stream_reader_data_context->get_deserializer().deserialize_log_event()}; + if (result.has_error()) { + auto const error{result.error()}; + if (std::errc::no_message_available == error) { + break; + } + if (std::errc::result_out_of_range == error) { + SPDLOG_ERROR("File contains an incomplete IR stream"); + break; + } + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Corrupt, + __FILENAME__, + __LINE__, + "Failed to deserialize: "s + error.category().name() + ":" + error.message() + }; + } + auto const& log_event = result.value(); + auto const& message = log_event.get_message(); + + auto const& logtype = message.get_logtype(); + constexpr size_t cLogLevelPositionInMessages{1}; + LogLevel log_level{LogLevel::NONE}; + if (logtype.length() > cLogLevelPositionInMessages) { + // NOLINTNEXTLINE(readability-qualified-auto) + auto const log_level_name_it{std::find_if( + cLogLevelNames.begin() + static_cast(cValidLogLevelsBeginIdx), + cLogLevelNames.end(), + [&](std::string_view level) { + return logtype.substr(cLogLevelPositionInMessages).starts_with(level); + } + )}; + if (log_level_name_it != cLogLevelNames.end()) { + log_level = static_cast( + std::distance(cLogLevelNames.begin(), log_level_name_it) + ); + } + } + + auto log_viewer_event{LogEventWithLevel( + log_event.get_timestamp(), + log_event.get_utc_offset(), + message, + log_level + )}; + m_encoded_log_events.emplace_back(std::move(log_viewer_event)); + } + m_stream_reader_data_context.reset(nullptr); + return m_encoded_log_events.size(); +} + +auto IrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const + -> DecodedResultsTsType { + if (use_filter && false == m_filtered_log_event_map.has_value()) { + return DecodedResultsTsType{emscripten::val::null()}; + } + + size_t length{0}; + if (use_filter) { + length = m_filtered_log_event_map->size(); + } else { + length = m_encoded_log_events.size(); + } + if (length < end_idx || begin_idx > end_idx) { + return DecodedResultsTsType{emscripten::val::null()}; + } + + std::string message; + constexpr size_t cDefaultReservedMessageLength{512}; + message.reserve(cDefaultReservedMessageLength); + auto const results{emscripten::val::array()}; + + for (size_t i = begin_idx; i < end_idx; ++i) { + size_t log_event_idx{0}; + if (use_filter) { + log_event_idx = m_filtered_log_event_map->at(i); + } else { + log_event_idx = i; + } + auto const& log_event{m_encoded_log_events[log_event_idx]}; + + auto const parsed{log_event.get_message().decode_and_unparse()}; + if (false == parsed.has_value()) { + SPDLOG_ERROR("Failed to decode message."); + break; + } + message = parsed.value(); + + m_ts_pattern.insert_formatted_timestamp(log_event.get_timestamp(), message); + + EM_ASM( + { Emval.toValue($0).push([UTF8ToString($1), $2, $3, $4]); }, + results.as_handle(), + message.c_str(), + log_event.get_timestamp(), + log_event.get_log_level(), + log_event_idx + 1 + ); + } + + return DecodedResultsTsType(results); +} + + IrStreamReader::IrStreamReader( + StreamReaderDataContext>&& stream_reader_data_context +) + : m_stream_reader_data_context{std::make_unique< + StreamReaderDataContext>>( + std::move(stream_reader_data_context) + )}, + m_ts_pattern{m_stream_reader_data_context->get_deserializer().get_timestamp_pattern()} {} +} // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/IrStreamReader.hpp b/src/clp_ffi_js/ir/IrStreamReader.hpp new file mode 100644 index 00000000..e7494217 --- /dev/null +++ b/src/clp_ffi_js/ir/IrStreamReader.hpp @@ -0,0 +1,110 @@ +#ifndef CLP_FFI_JS_IR_IR_STREAM_READER_HPP +#define CLP_FFI_JS_IR_IR_STREAM_READER_HPP + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace clp_ffi_js::ir { +/** + * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded + * log events. + */ +class IrStreamReader: public StreamReader { +public: + /** + * Mapping between an index in the filtered log events collection to an index in the unfiltered + * log events collection. + */ + using FilteredLogEventsMap = std::optional>; + + /** + * Creates a IrStreamReader to read from the given array. + * + * @param data_array An array containing a Zstandard-compressed IR stream. + * @return The created instance. + * @throw ClpFfiJsException if any error occurs. + */ + [[nodiscard]] static auto create(DataArrayTsType const& data_array) -> IrStreamReader; + + // Destructor + ~IrStreamReader() = default; + + // Disable copy constructor and assignment operator + IrStreamReader(IrStreamReader const&) = delete; + auto operator=(IrStreamReader const&) -> IrStreamReader& = delete; + + // Define default move constructor + IrStreamReader(IrStreamReader&&) = default; + // Delete move assignment operator since it's also disabled in `clp::ir::LogEventDeserializer`. + auto operator=(IrStreamReader&&) -> IrStreamReader& = delete; + + /** + * @return The number of events buffered. + */ + [[nodiscard]] auto get_num_events_buffered() const -> size_t; + + /** + * @return The filtered log events map. + */ + [[nodiscard]] auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType; + + /** + * Generates a filtered collection from all log events. + * + * @param log_level_filter Array of selected log levels + */ + void filter_log_events(emscripten::val const& log_level_filter); + + /** + * Deserializes all log events in the stream. After the stream has been exhausted, it will be + * deallocated. + * + * @return The number of successfully deserialized ("valid") log events. + */ + [[nodiscard]] auto deserialize_stream() -> size_t; + + /** + * Decodes log events in the range `[beginIdx, endIdx)` of the filtered or unfiltered + * (depending on the value of `useFilter`) log events collection. + * + * @param begin_idx + * @param end_idx + * @param use_filter Whether to decode from the filtered or unfiltered log events collection. + * @return An array where each element is a decoded log event represented by an array of: + * - The log event's message + * - The log event's timestamp as milliseconds since the Unix epoch + * - The log event's log level as an integer that indexes into `cLogLevelNames` + * - The log event's number (1-indexed) in the stream + * @return null if any log event in the range doesn't exist (e.g. the range exceeds the number + * of log events in the collection). + */ + [[nodiscard]] auto + decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType; + +private: + // Constructor + explicit IrStreamReader(StreamReaderDataContext>&& + stream_reader_data_context); + + // Variables + std::vector> m_encoded_log_events; + std::unique_ptr>> + m_stream_reader_data_context; + FilteredLogEventsMap m_filtered_log_event_map; + clp::TimestampPattern m_ts_pattern; +}; +} // namespace clp_ffi_js::ir + +#endif // CLP_FFI_JS_IR_IR_STREAM_READER_HPP diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp index 656a7f91..5ea5acbb 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp @@ -149,31 +149,3 @@ KVPairIRStreamReader::KVPairIRStreamReader( )} {} } // namespace clp_ffi_js::ir -namespace { -EMSCRIPTEN_BINDINGS(ClpIrStreamReader) { - emscripten::register_type("Uint8Array"); - emscripten::register_type("Array<[string, number]>"); - emscripten::class_< - clp_ffi_js::ir::KVPairIRStreamReader, - emscripten::base>("ClpKVPairIRStreamReader") - .constructor( - &clp_ffi_js::ir::KVPairIRStreamReader::create, - emscripten::return_value_policy::take_ownership() - ) - .function( - "getNumEventsBuffered", - &clp_ffi_js::ir::KVPairIRStreamReader::get_num_events_buffered - ) - .function( - "deserializeStream", - &clp_ffi_js::ir::KVPairIRStreamReader::deserialize_stream - ) - .function("decodeRange", &clp_ffi_js::ir::KVPairIRStreamReader::decode_range); - - emscripten::class_("ClpStreamReader") - .constructor( - &clp_ffi_js::ir::StreamReader::create, - emscripten::return_value_policy::take_ownership() - ); -} -} // namespace diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index b9c86b6b..5ee5fb08 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -1,42 +1,32 @@ #include "StreamReader.hpp" -#include #include #include -#include +#include +#include #include -#include #include #include -#include -#include +#include #include #include #include #include #include -#include -#include #include #include -#include -#include #include #include #include -#include -#include -#include - -using namespace std::literals::string_literals; -using clp::ir::four_byte_encoded_variable_t; +#include +#include namespace clp_ffi_js::ir { -auto StreamReader::create(DataArrayTsType const& data_array) -> StreamReader { +auto StreamReader::create(DataArrayTsType const& data_array) -> std::unique_ptr { auto const length{data_array["length"].as()}; - SPDLOG_INFO("StreamReader::create: got buffer of length={}", length); + SPDLOG_INFO("KVPairIRStreamReader::create: got buffer of length={}", length); // Copy array from JavaScript to C++ clp::Array data_buffer{length}; @@ -48,234 +38,49 @@ auto StreamReader::create(DataArrayTsType const& data_array) -> StreamReader { auto zstd_decompressor{std::make_unique()}; zstd_decompressor->open(data_buffer.data(), length); - bool is_four_bytes_encoding{true}; - if (auto const err{ - clp::ffi::ir_stream::get_encoding_type(*zstd_decompressor, is_four_bytes_encoding) - }; - clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != err) - { - SPDLOG_CRITICAL("Failed to decode encoding type, err={}", err); - throw ClpFfiJsException{ - clp::ErrorCode::ErrorCode_MetadataCorrupted, - __FILENAME__, - __LINE__, - "Failed to decode encoding type." - }; - } - if (false == is_four_bytes_encoding) { + bool is_four_byte_encoding{}; + auto const get_encoding_type_result{ + clp::ffi::ir_stream::get_encoding_type(*zstd_decompressor, is_four_byte_encoding) + }; + if (clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != get_encoding_type_result) { + SPDLOG_CRITICAL("Failed to get encoding type: {}", get_encoding_type_result); throw ClpFfiJsException{ - clp::ErrorCode::ErrorCode_Unsupported, + clp::ErrorCode::ErrorCode_Failure, __FILENAME__, __LINE__, - "IR stream uses unsupported encoding." + "Failed to get encoding type." }; } - - auto result{ - clp::ir::LogEventDeserializer::create(*zstd_decompressor) - }; - if (result.has_error()) { - auto const error_code{result.error()}; + clp::ffi::ir_stream::encoded_tag_t metadata_type{}; + std::vector metadata_bytes; + auto const deserialize_preamble_result{clp::ffi::ir_stream::deserialize_preamble( + *zstd_decompressor, + metadata_type, + metadata_bytes + )}; + if (clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success != deserialize_preamble_result) { SPDLOG_CRITICAL( - "Failed to create deserializer: {}:{}", - error_code.category().name(), - error_code.message() + "Failed to deserialize preamble for version reading: {}", + deserialize_preamble_result ); throw ClpFfiJsException{ clp::ErrorCode::ErrorCode_Failure, __FILENAME__, __LINE__, - "Failed to create deserializer" + "Failed to deserialize preamble for version reading." }; } - - StreamReaderDataContext stream_reader_data_context{ - std::move(data_buffer), - std::move(zstd_decompressor), - std::move(result.value()) + std::string_view const metadata_view{ + clp::size_checked_pointer_cast(metadata_bytes.data()), + metadata_bytes.size() }; - return StreamReader{std::move(stream_reader_data_context)}; -} - -auto StreamReader::get_num_events_buffered() const -> size_t { - return m_encoded_log_events.size(); -} - -auto StreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType { - if (false == m_filtered_log_event_map.has_value()) { - return FilteredLogEventMapTsType{emscripten::val::null()}; - } - - return FilteredLogEventMapTsType{emscripten::val::array(m_filtered_log_event_map.value())}; -} - -void StreamReader::filter_log_events(emscripten::val const& log_level_filter) { - if (log_level_filter.isNull()) { - m_filtered_log_event_map.reset(); - return; + nlohmann::json const metadata = nlohmann::json::parse(metadata_view); + auto const& version{metadata.at(clp::ffi::ir_stream::cProtocol::Metadata::VersionKey)}; + SPDLOG_INFO("The version is {}", version); + if (version == "v0.0.0") { + return std::make_unique(IrStreamReader::create(data_array)); } - m_filtered_log_event_map.emplace(); - auto filter_levels{emscripten::vecFromJSArray>(log_level_filter - )}; - for (size_t log_event_idx = 0; log_event_idx < m_encoded_log_events.size(); ++log_event_idx) { - auto const& log_event = m_encoded_log_events[log_event_idx]; - if (std::ranges::find( - filter_levels, - clp::enum_to_underlying_type(log_event.get_log_level()) - ) - != filter_levels.end()) - { - m_filtered_log_event_map->emplace_back(log_event_idx); - } - } + return std::make_unique(KVPairIRStreamReader::create(data_array)); } - -auto StreamReader::deserialize_stream() -> size_t { - if (nullptr == m_stream_reader_data_context) { - return m_encoded_log_events.size(); - } - - constexpr size_t cDefaultNumReservedLogEvents{500'000}; - m_encoded_log_events.reserve(cDefaultNumReservedLogEvents); - - while (true) { - auto result{m_stream_reader_data_context->get_deserializer().deserialize_log_event()}; - if (result.has_error()) { - auto const error{result.error()}; - if (std::errc::no_message_available == error) { - break; - } - if (std::errc::result_out_of_range == error) { - SPDLOG_ERROR("File contains an incomplete IR stream"); - break; - } - throw ClpFfiJsException{ - clp::ErrorCode::ErrorCode_Corrupt, - __FILENAME__, - __LINE__, - "Failed to deserialize: "s + error.category().name() + ":" + error.message() - }; - } - auto const& log_event = result.value(); - auto const& message = log_event.get_message(); - - auto const& logtype = message.get_logtype(); - constexpr size_t cLogLevelPositionInMessages{1}; - LogLevel log_level{LogLevel::NONE}; - if (logtype.length() > cLogLevelPositionInMessages) { - // NOLINTNEXTLINE(readability-qualified-auto) - auto const log_level_name_it{std::find_if( - cLogLevelNames.begin() + static_cast(cValidLogLevelsBeginIdx), - cLogLevelNames.end(), - [&](std::string_view level) { - return logtype.substr(cLogLevelPositionInMessages).starts_with(level); - } - )}; - if (log_level_name_it != cLogLevelNames.end()) { - log_level = static_cast( - std::distance(cLogLevelNames.begin(), log_level_name_it) - ); - } - } - - auto log_viewer_event{LogEventWithLevel( - log_event.get_timestamp(), - log_event.get_utc_offset(), - message, - log_level - )}; - m_encoded_log_events.emplace_back(std::move(log_viewer_event)); - } - m_stream_reader_data_context.reset(nullptr); - return m_encoded_log_events.size(); -} - -auto StreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const - -> DecodedResultsTsType { - if (use_filter && false == m_filtered_log_event_map.has_value()) { - return DecodedResultsTsType{emscripten::val::null()}; - } - - size_t length{0}; - if (use_filter) { - length = m_filtered_log_event_map->size(); - } else { - length = m_encoded_log_events.size(); - } - if (length < end_idx || begin_idx > end_idx) { - return DecodedResultsTsType{emscripten::val::null()}; - } - - std::string message; - constexpr size_t cDefaultReservedMessageLength{512}; - message.reserve(cDefaultReservedMessageLength); - auto const results{emscripten::val::array()}; - - for (size_t i = begin_idx; i < end_idx; ++i) { - size_t log_event_idx{0}; - if (use_filter) { - log_event_idx = m_filtered_log_event_map->at(i); - } else { - log_event_idx = i; - } - auto const& log_event{m_encoded_log_events[log_event_idx]}; - - auto const parsed{log_event.get_message().decode_and_unparse()}; - if (false == parsed.has_value()) { - SPDLOG_ERROR("Failed to decode message."); - break; - } - message = parsed.value(); - - m_ts_pattern.insert_formatted_timestamp(log_event.get_timestamp(), message); - - EM_ASM( - { Emval.toValue($0).push([UTF8ToString($1), $2, $3, $4]); }, - results.as_handle(), - message.c_str(), - log_event.get_timestamp(), - log_event.get_log_level(), - log_event_idx + 1 - ); - } - - return DecodedResultsTsType(results); -} - -StreamReader::StreamReader( - StreamReaderDataContext&& stream_reader_data_context -) - : m_stream_reader_data_context{std::make_unique< - StreamReaderDataContext>( - std::move(stream_reader_data_context) - )}, - m_ts_pattern{m_stream_reader_data_context->get_deserializer().get_timestamp_pattern()} {} } // namespace clp_ffi_js::ir - -namespace { -EMSCRIPTEN_BINDINGS(ClpIrStreamReader) { - emscripten::register_type("Uint8Array"); - emscripten::register_type( - "Array<[string, number, number, number]>" - ); - emscripten::register_type("number[] | null"); - - emscripten::class_("ClpIrStreamReader") - .constructor( - &clp_ffi_js::ir::StreamReader::create, - emscripten::return_value_policy::take_ownership() - ) - .function( - "getNumEventsBuffered", - &clp_ffi_js::ir::StreamReader::get_num_events_buffered - ) - .function( - "getFilteredLogEventMap", - &clp_ffi_js::ir::StreamReader::get_filtered_log_event_map - ) - .function("filterLogEvents", &clp_ffi_js::ir::StreamReader::filter_log_events) - .function("deserializeStream", &clp_ffi_js::ir::StreamReader::deserialize_stream) - .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range); -} -} // namespace diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index dec6c360..15865527 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -3,17 +3,10 @@ #include #include -#include -#include -#include -#include #include #include -#include -#include - namespace clp_ffi_js::ir { EMSCRIPTEN_DECLARE_VAL_TYPE(DataArrayTsType); EMSCRIPTEN_DECLARE_VAL_TYPE(DecodedResultsTsType); @@ -25,57 +18,35 @@ EMSCRIPTEN_DECLARE_VAL_TYPE(FilteredLogEventMapTsType); */ class StreamReader { public: - /** - * Mapping between an index in the filtered log events collection to an index in the unfiltered - * log events collection. - */ - using FilteredLogEventsMap = std::optional>; + virtual ~StreamReader() = default; - /** - * Creates a StreamReader to read from the given array. - * - * @param data_array An array containing a Zstandard-compressed IR stream. - * @return The created instance. - * @throw ClpFfiJsException if any error occurs. - */ - [[nodiscard]] static auto create(DataArrayTsType const& data_array) -> StreamReader; - - // Destructor - ~StreamReader() = default; - - // Disable copy constructor and assignment operator - StreamReader(StreamReader const&) = delete; - auto operator=(StreamReader const&) -> StreamReader& = delete; - - // Define default move constructor - StreamReader(StreamReader&&) = default; - // Delete move assignment operator since it's also disabled in `clp::ir::LogEventDeserializer`. - auto operator=(StreamReader&&) -> StreamReader& = delete; + [[nodiscard]] static auto create(DataArrayTsType const& data_array + ) -> std::unique_ptr; /** * @return The number of events buffered. */ - [[nodiscard]] auto get_num_events_buffered() const -> size_t; + [[nodiscard]] virtual auto get_num_events_buffered() const -> size_t = 0; /** * @return The filtered log events map. */ - [[nodiscard]] auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType; + [[nodiscard]] virtual auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType = 0; /** * Generates a filtered collection from all log events. * * @param log_level_filter Array of selected log levels */ - void filter_log_events(emscripten::val const& log_level_filter); + virtual auto filter_log_events(emscripten::val const& log_level_filter) -> void = 0; /** - * Deserializes all log events in the stream. After the stream has been exhausted, it will be + * Deserializes all log events in the file. After the stream has been exhausted, it will be * deallocated. * * @return The number of successfully deserialized ("valid") log events. */ - [[nodiscard]] auto deserialize_stream() -> size_t; + [[nodiscard]] virtual auto deserialize_stream() -> size_t = 0; /** * Decodes log events in the range `[beginIdx, endIdx)` of the filtered or unfiltered @@ -83,7 +54,8 @@ class StreamReader { * * @param begin_idx * @param end_idx - * @param use_filter Whether to decode from the filtered or unfiltered log events collection. + * @param use_filter If true, decode from the filtered log events collection; otherwise, decode + * from the unfiltered one. * @return An array where each element is a decoded log event represented by an array of: * - The log event's message * - The log event's timestamp as milliseconds since the Unix epoch @@ -92,20 +64,8 @@ class StreamReader { * @return null if any log event in the range doesn't exist (e.g. the range exceeds the number * of log events in the collection). */ - [[nodiscard]] auto - decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType; - -private: - // Constructor - explicit StreamReader(StreamReaderDataContext&& - stream_reader_data_context); - - // Variables - std::vector> m_encoded_log_events; - std::unique_ptr> - m_stream_reader_data_context; - FilteredLogEventsMap m_filtered_log_event_map; - clp::TimestampPattern m_ts_pattern; + [[nodiscard]] virtual auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const + -> DecodedResultsTsType = 0; }; } // namespace clp_ffi_js::ir From ff7321ba9f375fa996bd282b5000eb4bccc76d37 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Fri, 11 Oct 2024 19:31:35 +0800 Subject: [PATCH 12/18] Add log level filtering to KVPairIRStreamReader. --- src/clp_ffi_js/bindings.cpp | 6 ++ src/clp_ffi_js/ir/IrStreamReader.cpp | 17 +++--- src/clp_ffi_js/ir/IrStreamReader.hpp | 12 +--- src/clp_ffi_js/ir/KVPairIRStreamReader.cpp | 65 +++++++++++++++++++--- src/clp_ffi_js/ir/KVPairIRStreamReader.hpp | 11 +++- src/clp_ffi_js/ir/LogEventWithLevel.hpp | 15 ++--- src/clp_ffi_js/ir/StreamReader.cpp | 6 +- src/clp_ffi_js/ir/StreamReader.hpp | 9 ++- 8 files changed, 98 insertions(+), 43 deletions(-) diff --git a/src/clp_ffi_js/bindings.cpp b/src/clp_ffi_js/bindings.cpp index 8af72db4..7bff9e70 100644 --- a/src/clp_ffi_js/bindings.cpp +++ b/src/clp_ffi_js/bindings.cpp @@ -10,6 +10,7 @@ namespace { "Array<[string, number, number, number]>" ); emscripten::register_type("number[] | null"); + emscripten::register_type("interface{logLevelKey?: string, timestampKey?: string}"); emscripten::class_>("ClpIrStreamReader") @@ -39,6 +40,11 @@ namespace { "getNumEventsBuffered", &clp_ffi_js::ir::KVPairIRStreamReader::get_num_events_buffered ) + .function( + "getFilteredLogEventMap", + &clp_ffi_js::ir::KVPairIRStreamReader::get_filtered_log_event_map + ) + .function("filterLogEvents", &clp_ffi_js::ir::KVPairIRStreamReader::filter_log_events) .function( "deserializeStream", &clp_ffi_js::ir::KVPairIRStreamReader::deserialize_stream diff --git a/src/clp_ffi_js/ir/IrStreamReader.cpp b/src/clp_ffi_js/ir/IrStreamReader.cpp index a7041d4a..470cbb2d 100644 --- a/src/clp_ffi_js/ir/IrStreamReader.cpp +++ b/src/clp_ffi_js/ir/IrStreamReader.cpp @@ -34,7 +34,7 @@ using namespace std::literals::string_literals; using clp::ir::four_byte_encoded_variable_t; namespace clp_ffi_js::ir { -auto IrStreamReader::create(DataArrayTsType const& data_array) -> IrStreamReader { +auto IrStreamReader::create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) -> IrStreamReader { auto const length{data_array["length"].as()}; SPDLOG_INFO("IrStreamReader::create: got buffer of length={}", length); @@ -94,7 +94,7 @@ auto IrStreamReader::create(DataArrayTsType const& data_array) -> IrStreamReader std::move(zstd_decompressor), std::move(result.value()) }; - return IrStreamReader{std::move(stream_reader_data_context)}; + return IrStreamReader{std::move(stream_reader_data_context), std::move(reader_options)}; } auto IrStreamReader::get_num_events_buffered() const -> size_t { @@ -179,10 +179,8 @@ auto IrStreamReader::deserialize_stream() -> size_t { } } - auto log_viewer_event{LogEventWithLevel( - log_event.get_timestamp(), - log_event.get_utc_offset(), - message, + auto log_viewer_event{LogEventWithLevel>( + std::move(log_event), log_level )}; m_encoded_log_events.emplace_back(std::move(log_viewer_event)); @@ -219,7 +217,8 @@ auto IrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_fil } else { log_event_idx = i; } - auto const& log_event{m_encoded_log_events[log_event_idx]}; + auto const& log_event_with_level{m_encoded_log_events[log_event_idx]}; + auto const& log_event{log_event_with_level.get_log_event()}; auto const parsed{log_event.get_message().decode_and_unparse()}; if (false == parsed.has_value()) { @@ -235,7 +234,7 @@ auto IrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_fil results.as_handle(), message.c_str(), log_event.get_timestamp(), - log_event.get_log_level(), + log_event_with_level.get_log_level(), log_event_idx + 1 ); } @@ -244,7 +243,7 @@ auto IrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_fil } IrStreamReader::IrStreamReader( - StreamReaderDataContext>&& stream_reader_data_context + StreamReaderDataContext>&& stream_reader_data_context, ReaderOptions const& reader_options ) : m_stream_reader_data_context{std::make_unique< StreamReaderDataContext>>( diff --git a/src/clp_ffi_js/ir/IrStreamReader.hpp b/src/clp_ffi_js/ir/IrStreamReader.hpp index e7494217..b31ea67b 100644 --- a/src/clp_ffi_js/ir/IrStreamReader.hpp +++ b/src/clp_ffi_js/ir/IrStreamReader.hpp @@ -23,12 +23,6 @@ namespace clp_ffi_js::ir { */ class IrStreamReader: public StreamReader { public: - /** - * Mapping between an index in the filtered log events collection to an index in the unfiltered - * log events collection. - */ - using FilteredLogEventsMap = std::optional>; - /** * Creates a IrStreamReader to read from the given array. * @@ -36,7 +30,7 @@ class IrStreamReader: public StreamReader { * @return The created instance. * @throw ClpFfiJsException if any error occurs. */ - [[nodiscard]] static auto create(DataArrayTsType const& data_array) -> IrStreamReader; + [[nodiscard]] static auto create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) -> IrStreamReader; // Destructor ~IrStreamReader() = default; @@ -96,10 +90,10 @@ class IrStreamReader: public StreamReader { private: // Constructor explicit IrStreamReader(StreamReaderDataContext>&& - stream_reader_data_context); + stream_reader_data_context, ReaderOptions const& reader_options); // Variables - std::vector> m_encoded_log_events; + std::vector>> m_encoded_log_events; std::unique_ptr>> m_stream_reader_data_context; FilteredLogEventsMap m_filtered_log_event_map; diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp index 5ea5acbb..ad29e838 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp @@ -27,7 +27,7 @@ using namespace std::literals::string_literals; using clp::ir::four_byte_encoded_variable_t; namespace clp_ffi_js::ir { -auto KVPairIRStreamReader::create(DataArrayTsType const& data_array) -> KVPairIRStreamReader { +auto KVPairIRStreamReader::create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) -> KVPairIRStreamReader { auto const length{data_array["length"].as()}; SPDLOG_INFO("KVPairIRStreamReader::create: got buffer of length={}", length); @@ -62,7 +62,7 @@ auto KVPairIRStreamReader::create(DataArrayTsType const& data_array) -> KVPairIR std::move(zstd_decompressor), std::move(result.value()) }; - return KVPairIRStreamReader{std::move(stream_reader_data_context)}; + return KVPairIRStreamReader{std::move(stream_reader_data_context), std::move(reader_options)}; } auto KVPairIRStreamReader::get_num_events_buffered() const -> size_t { @@ -70,21 +70,66 @@ auto KVPairIRStreamReader::get_num_events_buffered() const -> size_t { } auto KVPairIRStreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType { - return FilteredLogEventMapTsType(emscripten::val::null()); + if (false == m_filtered_log_event_map.has_value()) { + return FilteredLogEventMapTsType{emscripten::val::null()}; + } + + return FilteredLogEventMapTsType{emscripten::val::array(m_filtered_log_event_map.value())}; } -auto KVPairIRStreamReader::filter_log_events(emscripten::val const& log_level_filter) -> void {} +auto KVPairIRStreamReader::filter_log_events(emscripten::val const& log_level_filter) -> void { + if (log_level_filter.isNull()) { + m_filtered_log_event_map.reset(); + return; + } + + m_filtered_log_event_map.emplace(); + auto filter_levels{emscripten::vecFromJSArray>(log_level_filter + )}; + for (size_t log_event_idx = 0; log_event_idx < m_encoded_log_events.size(); ++log_event_idx) { + auto const& log_event = m_encoded_log_events[log_event_idx]; + if (std::ranges::find( + filter_levels, + clp::enum_to_underlying_type(log_event.get_log_level()) + ) + != filter_levels.end()) + { + m_filtered_log_event_map->emplace_back(log_event_idx); + } + } +} auto KVPairIRStreamReader::deserialize_stream() -> size_t { if (nullptr != m_stream_reader_data_context) { constexpr size_t cDefaultNumReservedLogEvents{500'000}; m_encoded_log_events.reserve(cDefaultNumReservedLogEvents); auto& reader{m_stream_reader_data_context->get_reader()}; + clp::ffi::SchemaTreeNode::id_t log_level_node_id{clp::ffi::SchemaTree::cRootId}; while (true) { auto result{m_stream_reader_data_context->get_deserializer() .deserialize_to_next_log_event(reader)}; if (false == result.has_error()) { - m_encoded_log_events.emplace_back(std::move(result.value())); + LogLevel log_level{LogLevel::NONE}; + if (clp::ffi::SchemaTree::cRootId == log_level_node_id) { + auto const log_level_node_id_result{result.value().get_schema_tree().try_get_node_id( + {clp::ffi::SchemaTree::cRootId, m_log_level_key, clp::ffi::SchemaTreeNode::Type::Str} + )}; + if (log_level_node_id_result.has_value()) { + log_level_node_id = log_level_node_id_result.value(); + } + } else { + auto const &log_level_value{result.value().get_node_id_value_pairs().at(log_level_node_id)}; + if (log_level_value.has_value() && + "short_string" == log_level_value.value().get_immutable_view()) { + log_level = LogLevel::ERROR; + } + } + + auto log_event_with_level {LogEventWithLevel( + std::move(result.value()), + log_level + )}; + m_encoded_log_events.emplace_back(std::move(log_event_with_level)); continue; } auto const error{result.error()}; @@ -122,8 +167,8 @@ auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool / }; size_t log_num{begin_idx + 1}; auto const results{emscripten::val::array()}; - for (auto const& log_event : log_events_span) { - auto const json{log_event.serialize_to_json()}; + for (auto const& log_event_with_level : log_events_span) { + auto const json{log_event_with_level.get_log_event().serialize_to_json()}; if (false == json.has_value()) { SPDLOG_ERROR("Failed to decode message."); break; @@ -142,10 +187,12 @@ auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool / } KVPairIRStreamReader::KVPairIRStreamReader( - StreamReaderDataContext&& stream_reader_data_context + StreamReaderDataContext&& stream_reader_data_context, ReaderOptions const& reader_options ) : m_stream_reader_data_context{std::make_unique>( std::move(stream_reader_data_context) - )} {} + )}, + m_log_level_key{reader_options["logLevelKey"].as()}, + m_timestamp_key{reader_options["timestampKey"].as()} {} } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp index 0e1ed8a9..687e6207 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -27,7 +28,7 @@ class KVPairIRStreamReader : public StreamReader { * @return The created instance. * @throw ClpFfiJsException if any error occurs. */ - [[nodiscard]] static auto create(DataArrayTsType const& data_array) -> KVPairIRStreamReader; + [[nodiscard]] static auto create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) -> KVPairIRStreamReader; // Destructor ~KVPairIRStreamReader() override = default; @@ -83,12 +84,16 @@ class KVPairIRStreamReader : public StreamReader { // Constructor explicit KVPairIRStreamReader( - StreamReaderDataContext&& stream_reader_data_context + StreamReaderDataContext&& stream_reader_data_context, ReaderOptions const& reader_options ); // Variables - std::vector m_encoded_log_events; + std::vector> m_encoded_log_events; std::unique_ptr> m_stream_reader_data_context; + + std::string m_log_level_key; + std::string m_timestamp_key; + FilteredLogEventsMap m_filtered_log_event_map; }; } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/LogEventWithLevel.hpp b/src/clp_ffi_js/ir/LogEventWithLevel.hpp index 22a00af3..95e4b834 100644 --- a/src/clp_ffi_js/ir/LogEventWithLevel.hpp +++ b/src/clp_ffi_js/ir/LogEventWithLevel.hpp @@ -18,22 +18,19 @@ namespace clp_ffi_js::ir { * IR log event will contain a set of key-value pairs, one of which should be the log level. * @tparam encoded_variable_t The type of encoded variables in the event */ -template -class LogEventWithLevel : public clp::ir::LogEvent { +template +class LogEventWithLevel { public: // Constructors - LogEventWithLevel( - clp::ir::epoch_time_ms_t timestamp, - clp::UtcOffset utc_offset, - clp::ir::EncodedTextAst message, - LogLevel log_level - ) - : clp::ir::LogEvent{timestamp, utc_offset, std::move(message)}, + explicit LogEventWithLevel(log_event_t log_event, LogLevel log_level) + : m_log_event{std::move(log_event)}, m_log_level{log_level} {} + [[nodiscard]] auto get_log_event() const -> const log_event_t& { return m_log_event; } [[nodiscard]] auto get_log_level() const -> LogLevel { return m_log_level; } private: + log_event_t m_log_event; LogLevel m_log_level; }; } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index 5ee5fb08..414a2b85 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -24,7 +24,7 @@ #include namespace clp_ffi_js::ir { -auto StreamReader::create(DataArrayTsType const& data_array) -> std::unique_ptr { +auto StreamReader::create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) -> std::unique_ptr { auto const length{data_array["length"].as()}; SPDLOG_INFO("KVPairIRStreamReader::create: got buffer of length={}", length); @@ -78,9 +78,9 @@ auto StreamReader::create(DataArrayTsType const& data_array) -> std::unique_ptr< auto const& version{metadata.at(clp::ffi::ir_stream::cProtocol::Metadata::VersionKey)}; SPDLOG_INFO("The version is {}", version); if (version == "v0.0.0") { - return std::make_unique(IrStreamReader::create(data_array)); + return std::make_unique(IrStreamReader::create(data_array, std::move(reader_options))); } - return std::make_unique(KVPairIRStreamReader::create(data_array)); + return std::make_unique(KVPairIRStreamReader::create(data_array, std::move(reader_options))); } } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 15865527..248ea659 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -11,6 +11,7 @@ namespace clp_ffi_js::ir { EMSCRIPTEN_DECLARE_VAL_TYPE(DataArrayTsType); EMSCRIPTEN_DECLARE_VAL_TYPE(DecodedResultsTsType); EMSCRIPTEN_DECLARE_VAL_TYPE(FilteredLogEventMapTsType); +EMSCRIPTEN_DECLARE_VAL_TYPE(ReaderOptions); /** * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded @@ -18,9 +19,15 @@ EMSCRIPTEN_DECLARE_VAL_TYPE(FilteredLogEventMapTsType); */ class StreamReader { public: + /** + * Mapping between an index in the filtered log events collection to an index in the unfiltered + * log events collection. + */ + using FilteredLogEventsMap = std::optional>; + virtual ~StreamReader() = default; - [[nodiscard]] static auto create(DataArrayTsType const& data_array + [[nodiscard]] static auto create(DataArrayTsType const& data_array, ReaderOptions const& reader_options ) -> std::unique_ptr; /** From 98b0b704e17c1b9f831c7a2d79d0e6c5aff2e16a Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Fri, 11 Oct 2024 19:34:01 +0800 Subject: [PATCH 13/18] Rename IrStreamReader -> IRStreamReader. --- CMakeLists.txt | 2 +- src/clp_ffi_js/bindings.cpp | 16 +++++++-------- ...{IrStreamReader.cpp => IRStreamReader.cpp} | 20 +++++++++---------- ...{IrStreamReader.hpp => IRStreamReader.hpp} | 18 ++++++++--------- src/clp_ffi_js/ir/StreamReader.cpp | 4 ++-- 5 files changed, 30 insertions(+), 30 deletions(-) rename src/clp_ffi_js/ir/{IrStreamReader.cpp => IRStreamReader.cpp} (93%) rename src/clp_ffi_js/ir/{IrStreamReader.hpp => IRStreamReader.hpp} (87%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e5b2c64..51b279e7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,7 +115,7 @@ target_include_directories(${CLP_FFI_JS_BIN_NAME} PRIVATE src/) set(CLP_FFI_JS_SRC_MAIN src/clp_ffi_js/bindings.cpp src/clp_ffi_js/ir/KVPairIRStreamReader.cpp - src/clp_ffi_js/ir/IrStreamReader.cpp + src/clp_ffi_js/ir/IRStreamReader.cpp src/clp_ffi_js/ir/StreamReader.cpp ) diff --git a/src/clp_ffi_js/bindings.cpp b/src/clp_ffi_js/bindings.cpp index 7bff9e70..f39ded8f 100644 --- a/src/clp_ffi_js/bindings.cpp +++ b/src/clp_ffi_js/bindings.cpp @@ -1,7 +1,7 @@ #include #include "clp_ffi_js/ir/StreamReader.hpp" #include "clp_ffi_js/ir/KVPairIRStreamReader.hpp" -#include "clp_ffi_js/ir/IrStreamReader.hpp" +#include "clp_ffi_js/ir/IRStreamReader.hpp" namespace { EMSCRIPTEN_BINDINGS(ClpIrStreamReader) { @@ -12,23 +12,23 @@ namespace { emscripten::register_type("number[] | null"); emscripten::register_type("interface{logLevelKey?: string, timestampKey?: string}"); - emscripten::class_>("ClpIrStreamReader") .constructor( - &clp_ffi_js::ir::IrStreamReader::create, + &clp_ffi_js::ir::IRStreamReader::create, emscripten::return_value_policy::take_ownership() ) .function( "getNumEventsBuffered", - &clp_ffi_js::ir::IrStreamReader::get_num_events_buffered + &clp_ffi_js::ir::IRStreamReader::get_num_events_buffered ) .function( "getFilteredLogEventMap", - &clp_ffi_js::ir::IrStreamReader::get_filtered_log_event_map + &clp_ffi_js::ir::IRStreamReader::get_filtered_log_event_map ) - .function("filterLogEvents", &clp_ffi_js::ir::IrStreamReader::filter_log_events) - .function("deserializeStream", &clp_ffi_js::ir::IrStreamReader::deserialize_stream) - .function("decodeRange", &clp_ffi_js::ir::IrStreamReader::decode_range); + .function("filterLogEvents", &clp_ffi_js::ir::IRStreamReader::filter_log_events) + .function("deserializeStream", &clp_ffi_js::ir::IRStreamReader::deserialize_stream) + .function("decodeRange", &clp_ffi_js::ir::IRStreamReader::decode_range); emscripten::class_>("ClpKVPairIRStreamReader") diff --git a/src/clp_ffi_js/ir/IrStreamReader.cpp b/src/clp_ffi_js/ir/IRStreamReader.cpp similarity index 93% rename from src/clp_ffi_js/ir/IrStreamReader.cpp rename to src/clp_ffi_js/ir/IRStreamReader.cpp index 470cbb2d..bb8ee5ce 100644 --- a/src/clp_ffi_js/ir/IrStreamReader.cpp +++ b/src/clp_ffi_js/ir/IRStreamReader.cpp @@ -1,4 +1,4 @@ -#include "IrStreamReader.hpp" +#include "IRStreamReader.hpp" #include #include @@ -34,9 +34,9 @@ using namespace std::literals::string_literals; using clp::ir::four_byte_encoded_variable_t; namespace clp_ffi_js::ir { -auto IrStreamReader::create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) -> IrStreamReader { +auto IRStreamReader::create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) -> IRStreamReader { auto const length{data_array["length"].as()}; - SPDLOG_INFO("IrStreamReader::create: got buffer of length={}", length); + SPDLOG_INFO("IRStreamReader::create: got buffer of length={}", length); // Copy array from JavaScript to C++ clp::Array data_buffer{length}; @@ -94,14 +94,14 @@ auto IrStreamReader::create(DataArrayTsType const& data_array, ReaderOptions con std::move(zstd_decompressor), std::move(result.value()) }; - return IrStreamReader{std::move(stream_reader_data_context), std::move(reader_options)}; + return IRStreamReader{std::move(stream_reader_data_context), std::move(reader_options)}; } -auto IrStreamReader::get_num_events_buffered() const -> size_t { +auto IRStreamReader::get_num_events_buffered() const -> size_t { return m_encoded_log_events.size(); } -auto IrStreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType { +auto IRStreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType { if (false == m_filtered_log_event_map.has_value()) { return FilteredLogEventMapTsType{emscripten::val::null()}; } @@ -109,7 +109,7 @@ auto IrStreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTs return FilteredLogEventMapTsType{emscripten::val::array(m_filtered_log_event_map.value())}; } -void IrStreamReader::filter_log_events(emscripten::val const& log_level_filter) { +void IRStreamReader::filter_log_events(emscripten::val const& log_level_filter) { if (log_level_filter.isNull()) { m_filtered_log_event_map.reset(); return; @@ -131,7 +131,7 @@ void IrStreamReader::filter_log_events(emscripten::val const& log_level_filter) } } -auto IrStreamReader::deserialize_stream() -> size_t { +auto IRStreamReader::deserialize_stream() -> size_t { if (nullptr == m_stream_reader_data_context) { return m_encoded_log_events.size(); } @@ -189,7 +189,7 @@ auto IrStreamReader::deserialize_stream() -> size_t { return m_encoded_log_events.size(); } -auto IrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const +auto IRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType { if (use_filter && false == m_filtered_log_event_map.has_value()) { return DecodedResultsTsType{emscripten::val::null()}; @@ -242,7 +242,7 @@ auto IrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_fil return DecodedResultsTsType(results); } - IrStreamReader::IrStreamReader( + IRStreamReader::IRStreamReader( StreamReaderDataContext>&& stream_reader_data_context, ReaderOptions const& reader_options ) : m_stream_reader_data_context{std::make_unique< diff --git a/src/clp_ffi_js/ir/IrStreamReader.hpp b/src/clp_ffi_js/ir/IRStreamReader.hpp similarity index 87% rename from src/clp_ffi_js/ir/IrStreamReader.hpp rename to src/clp_ffi_js/ir/IRStreamReader.hpp index b31ea67b..dd37135f 100644 --- a/src/clp_ffi_js/ir/IrStreamReader.hpp +++ b/src/clp_ffi_js/ir/IRStreamReader.hpp @@ -21,28 +21,28 @@ namespace clp_ffi_js::ir { * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded * log events. */ -class IrStreamReader: public StreamReader { +class IRStreamReader: public StreamReader { public: /** - * Creates a IrStreamReader to read from the given array. + * Creates a IRStreamReader to read from the given array. * * @param data_array An array containing a Zstandard-compressed IR stream. * @return The created instance. * @throw ClpFfiJsException if any error occurs. */ - [[nodiscard]] static auto create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) -> IrStreamReader; + [[nodiscard]] static auto create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) -> IRStreamReader; // Destructor - ~IrStreamReader() = default; + ~IRStreamReader() = default; // Disable copy constructor and assignment operator - IrStreamReader(IrStreamReader const&) = delete; - auto operator=(IrStreamReader const&) -> IrStreamReader& = delete; + IRStreamReader(IRStreamReader const&) = delete; + auto operator=(IRStreamReader const&) -> IRStreamReader& = delete; // Define default move constructor - IrStreamReader(IrStreamReader&&) = default; + IRStreamReader(IRStreamReader&&) = default; // Delete move assignment operator since it's also disabled in `clp::ir::LogEventDeserializer`. - auto operator=(IrStreamReader&&) -> IrStreamReader& = delete; + auto operator=(IRStreamReader&&) -> IRStreamReader& = delete; /** * @return The number of events buffered. @@ -89,7 +89,7 @@ class IrStreamReader: public StreamReader { private: // Constructor - explicit IrStreamReader(StreamReaderDataContext>&& + explicit IRStreamReader(StreamReaderDataContext>&& stream_reader_data_context, ReaderOptions const& reader_options); // Variables diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index 414a2b85..a1feeb5e 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include namespace clp_ffi_js::ir { @@ -78,7 +78,7 @@ auto StreamReader::create(DataArrayTsType const& data_array, ReaderOptions const auto const& version{metadata.at(clp::ffi::ir_stream::cProtocol::Metadata::VersionKey)}; SPDLOG_INFO("The version is {}", version); if (version == "v0.0.0") { - return std::make_unique(IrStreamReader::create(data_array, std::move(reader_options))); + return std::make_unique(IRStreamReader::create(data_array, std::move(reader_options))); } return std::make_unique(KVPairIRStreamReader::create(data_array, std::move(reader_options))); From f080eced6a3ad67812f5db6a0e4f165859f3e28c Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Fri, 1 Nov 2024 05:02:26 -0400 Subject: [PATCH 14/18] Update to latest CLP commit. --- CMakeLists.txt | 2 +- src/clp_ffi_js/ir/KVPairIRStreamReader.cpp | 86 +++++++++++++--------- src/clp_ffi_js/ir/KVPairIRStreamReader.hpp | 35 ++++++++- src/submodules/clp | 2 +- 4 files changed, 86 insertions(+), 39 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 51b279e7..53adc553 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,6 +89,7 @@ target_link_options( -sEXPORT_ES6 -sMODULARIZE -sWASM_BIGINT + -fwasm-exceptions --emit-tsd ${CLP_FFI_JS_BIN_NAME}.d.ts ) target_link_libraries(${CLP_FFI_JS_BIN_NAME} PRIVATE embind) @@ -121,7 +122,6 @@ set(CLP_FFI_JS_SRC_MAIN set(CLP_FFI_JS_SRC_CLP_CORE src/submodules/clp/components/core/src/clp/ffi/ir_stream/decoding_methods.cpp - src/submodules/clp/components/core/src/clp/ffi/ir_stream/Deserializer.cpp src/submodules/clp/components/core/src/clp/ffi/ir_stream/ir_unit_deserialization_methods.cpp src/submodules/clp/components/core/src/clp/ffi/ir_stream/utils.cpp src/submodules/clp/components/core/src/clp/ffi/KeyValuePairLogEvent.cpp diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp index ad29e838..b0fe27d6 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp @@ -41,7 +41,11 @@ auto KVPairIRStreamReader::create(DataArrayTsType const& data_array, ReaderOptio auto zstd_decompressor{std::make_unique()}; zstd_decompressor->open(data_buffer.data(), length); - auto result{clp::ffi::ir_stream::Deserializer::create(*zstd_decompressor)}; + // FIXME: Note we create a vector to store log events in IrUnitHandler at the moment. We should create a + // std::vector to be shared by IrUnitHandler() and KVPairIRStreamReader + // so that even after we free the m_stream_reader_data_context in the KVPairIRStreamReader instance (after we + // deserialize and stored all events in deserialize_stream), we can still access the stored log events. + auto result{clp::ffi::ir_stream::Deserializer::create(*zstd_decompressor, IrUnitHandler())}; if (result.has_error()) { auto const error_code{result.error()}; SPDLOG_CRITICAL( @@ -66,7 +70,8 @@ auto KVPairIRStreamReader::create(DataArrayTsType const& data_array, ReaderOptio } auto KVPairIRStreamReader::get_num_events_buffered() const -> size_t { - return m_encoded_log_events.size(); + // FIXME: we should not access the vector in ir_unit_handler. See above for reasons. + return m_stream_reader_data_context->get_deserializer().get_ir_unit_handler().get_deserialized_log_events().size(); } auto KVPairIRStreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType { @@ -104,36 +109,40 @@ auto KVPairIRStreamReader::deserialize_stream() -> size_t { constexpr size_t cDefaultNumReservedLogEvents{500'000}; m_encoded_log_events.reserve(cDefaultNumReservedLogEvents); auto& reader{m_stream_reader_data_context->get_reader()}; - clp::ffi::SchemaTreeNode::id_t log_level_node_id{clp::ffi::SchemaTree::cRootId}; + clp::ffi::SchemaTree::Node::id_t log_level_node_id{clp::ffi::SchemaTree::cRootId}; while (true) { - auto result{m_stream_reader_data_context->get_deserializer() - .deserialize_to_next_log_event(reader)}; + auto result{m_stream_reader_data_context->get_deserializer().deserialize_next_ir_unit(reader)}; if (false == result.has_error()) { - LogLevel log_level{LogLevel::NONE}; - if (clp::ffi::SchemaTree::cRootId == log_level_node_id) { - auto const log_level_node_id_result{result.value().get_schema_tree().try_get_node_id( - {clp::ffi::SchemaTree::cRootId, m_log_level_key, clp::ffi::SchemaTreeNode::Type::Str} - )}; - if (log_level_node_id_result.has_value()) { - log_level_node_id = log_level_node_id_result.value(); - } - } else { - auto const &log_level_value{result.value().get_node_id_value_pairs().at(log_level_node_id)}; - if (log_level_value.has_value() && - "short_string" == log_level_value.value().get_immutable_view()) { - log_level = LogLevel::ERROR; - } - } - - auto log_event_with_level {LogEventWithLevel( - std::move(result.value()), - log_level - )}; - m_encoded_log_events.emplace_back(std::move(log_event_with_level)); + // FIXME: revisit: we don't really need to parse log levels from key-value-pair log events since the + // levels are assumed to be stored directly as some node as a integer (which depends on our loglib + // implementations) and can be directly access via a given node_id. +// auto &log_events{m_stream_reader_data_context->get_deserializer().get_ir_unit_handler().get_deserialized_log_events()}; +// auto &last_event{log_events[log_events.size() - 1]}; +// LogLevel log_level{LogLevel::NONE}; +// if (clp::ffi::SchemaTree::cRootId == log_level_node_id) { +// auto const log_level_node_id_result{last_event.get_schema_tree().try_get_node_id( +// {clp::ffi::SchemaTree::cRootId, m_log_level_key, clp::ffi::SchemaTree::Node::Type::Str} +// )}; +// if (log_level_node_id_result.has_value()) { +// log_level_node_id = log_level_node_id_result.value(); +// } +// } else { +// auto const &log_level_value{last_event.get_node_id_value_pairs().at(log_level_node_id)}; +// if (log_level_value.has_value() && +// "short_string" == log_level_value.value().get_immutable_view()) { +// log_level = LogLevel::ERROR; +// } +// } +// +// auto log_event_with_level {LogEventWithLevel( +// std::move(last_event), +// log_level +// )}; +// m_encoded_log_events.emplace_back(std::move(log_event_with_level)); continue; } auto const error{result.error()}; - if (std::errc::no_message_available == error) { + if (std::errc::no_message_available == error || std::errc::operation_not_permitted == error) { break; } if (std::errc::result_out_of_range == error) { @@ -147,28 +156,33 @@ auto KVPairIRStreamReader::deserialize_stream() -> size_t { "Failed to deserialize: "s + error.category().name() + ":" + error.message() }; } - m_stream_reader_data_context.reset(nullptr); + // FIXME: we should really free this after we decode all events. See the FIXME in ::create() about why we + // commented out below line. +// m_stream_reader_data_context.reset(nullptr); } - return m_encoded_log_events.size(); + return m_stream_reader_data_context->get_deserializer().get_ir_unit_handler().get_deserialized_log_events().size(); } auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool /*use_filter*/) const -> DecodedResultsTsType { - if (m_encoded_log_events.size() < end_idx || begin_idx >= end_idx) { + // FIXME: we should not access the vector from the ir_unit_handler. See above for reasons. + if (m_stream_reader_data_context->get_deserializer().get_ir_unit_handler().get_deserialized_log_events().size() < end_idx || begin_idx >= end_idx) { return DecodedResultsTsType(emscripten::val::null()); } std::span const log_events_span{ - m_encoded_log_events.begin() - + static_cast(begin_idx), - m_encoded_log_events.begin() - + static_cast(end_idx) + m_stream_reader_data_context->get_deserializer().get_ir_unit_handler().get_deserialized_log_events().begin() + + static_cast::difference_type>(begin_idx), + m_stream_reader_data_context->get_deserializer().get_ir_unit_handler().get_deserialized_log_events().begin() + + static_cast::difference_type>(end_idx) }; size_t log_num{begin_idx + 1}; auto const results{emscripten::val::array()}; - for (auto const& log_event_with_level : log_events_span) { - auto const json{log_event_with_level.get_log_event().serialize_to_json()}; +// for (auto const& log_event_with_level : log_events_span) { +// auto const json{log_event_with_level.get_log_event().serialize_to_json()}; + for (auto const& log_event : log_events_span) { + auto const json{log_event.serialize_to_json()}; if (false == json.has_value()) { SPDLOG_ERROR("Failed to decode message."); break; diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp index 687e6207..ee81f133 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp @@ -15,6 +15,39 @@ #include namespace clp_ffi_js::ir { + +class IrUnitHandler { +public: + // Implements `clp::ffi::ir_stream::IrUnitHandlerInterface` interface + [[nodiscard]] auto handle_log_event(clp::ffi::KeyValuePairLogEvent&& log_event) -> clp::ffi::ir_stream::IRErrorCode { + m_deserialized_log_events.emplace_back(std::move(log_event)); + return clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success; + } + [[nodiscard]] static auto handle_utc_offset_change( + [[maybe_unused]] clp::UtcOffset utc_offset_old, + [[maybe_unused]] clp::UtcOffset utc_offset_new + ) -> clp::ffi::ir_stream::IRErrorCode { + return clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success; + } + [[nodiscard]] static auto handle_schema_tree_node_insertion( + [[maybe_unused]] clp::ffi::SchemaTree::NodeLocator schema_tree_node_locator + ) -> clp::ffi::ir_stream::IRErrorCode { + return clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success; + } + [[nodiscard]] auto handle_end_of_stream() -> clp::ffi::ir_stream::IRErrorCode { + m_is_complete = true; + return clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success; + } + // Methods + [[nodiscard]] auto is_complete() const -> bool { return m_is_complete; } + [[nodiscard]] auto get_deserialized_log_events() const -> std::vector const & { + return m_deserialized_log_events; + } +private: + std::vector m_deserialized_log_events; + bool m_is_complete{false}; +}; + /** * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded * log events. @@ -80,7 +113,7 @@ class KVPairIRStreamReader : public StreamReader { -> DecodedResultsTsType override; private: - using deserializer_t = clp::ffi::ir_stream::Deserializer; + using deserializer_t = clp::ffi::ir_stream::Deserializer; // Constructor explicit KVPairIRStreamReader( diff --git a/src/submodules/clp b/src/submodules/clp index 54962a07..b085cc4a 160000 --- a/src/submodules/clp +++ b/src/submodules/clp @@ -1 +1 @@ -Subproject commit 54962a0708daa443c9bdfb3462ed464b65b62886 +Subproject commit b085cc4a068d20618c2c4b89b8d9bf92e9aff047 From 7c529d883fcc6449e96c96a55ef4f063ee1e7146 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 6 Nov 2024 15:53:01 -0500 Subject: [PATCH 15/18] Upgrade clp submodule commit to 9f6a6ced4da504f6ba3c131efa26fd5b30c6f533 --- src/submodules/clp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/submodules/clp b/src/submodules/clp index b085cc4a..51931f69 160000 --- a/src/submodules/clp +++ b/src/submodules/clp @@ -1 +1 @@ -Subproject commit b085cc4a068d20618c2c4b89b8d9bf92e9aff047 +Subproject commit 51931f69bff0d7bdd8d8488465453e4d3f6be90b From 6c0ecdea3edcf080c1ef53629fca73432d2906da Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 6 Nov 2024 17:07:32 -0500 Subject: [PATCH 16/18] Add tree node id parsing for timestampKey and logLevelKey. --- src/clp_ffi_js/ir/KVPairIRStreamReader.cpp | 117 +++++++++++---------- src/clp_ffi_js/ir/KVPairIRStreamReader.hpp | 56 ++++++++-- 2 files changed, 111 insertions(+), 62 deletions(-) diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp index b0fe27d6..63bc4ead 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp @@ -27,7 +27,10 @@ using namespace std::literals::string_literals; using clp::ir::four_byte_encoded_variable_t; namespace clp_ffi_js::ir { -auto KVPairIRStreamReader::create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) -> KVPairIRStreamReader { +auto KVPairIRStreamReader::create( + DataArrayTsType const& data_array, + ReaderOptions const& reader_options +) -> KVPairIRStreamReader { auto const length{data_array["length"].as()}; SPDLOG_INFO("KVPairIRStreamReader::create: got buffer of length={}", length); @@ -41,11 +44,18 @@ auto KVPairIRStreamReader::create(DataArrayTsType const& data_array, ReaderOptio auto zstd_decompressor{std::make_unique()}; zstd_decompressor->open(data_buffer.data(), length); - // FIXME: Note we create a vector to store log events in IrUnitHandler at the moment. We should create a - // std::vector to be shared by IrUnitHandler() and KVPairIRStreamReader - // so that even after we free the m_stream_reader_data_context in the KVPairIRStreamReader instance (after we - // deserialize and stored all events in deserialize_stream), we can still access the stored log events. - auto result{clp::ffi::ir_stream::Deserializer::create(*zstd_decompressor, IrUnitHandler())}; + // FIXME: Note we create a vector to store log events in IrUnitHandler at the moment. We should + // create a std::vector to be shared by IrUnitHandler() and KVPairIRStreamReader + // so that even after we free the m_stream_reader_data_context in the KVPairIRStreamReader + // instance (after we deserialize and stored all events in deserialize_stream), we can still + // access the stored log events. + auto result{clp::ffi::ir_stream::Deserializer::create( + *zstd_decompressor, + IrUnitHandler( + reader_options["logLevelKey"].as(), + reader_options["timestampKey"].as() + ) + )}; if (result.has_error()) { auto const error_code{result.error()}; SPDLOG_CRITICAL( @@ -66,12 +76,15 @@ auto KVPairIRStreamReader::create(DataArrayTsType const& data_array, ReaderOptio std::move(zstd_decompressor), std::move(result.value()) }; - return KVPairIRStreamReader{std::move(stream_reader_data_context), std::move(reader_options)}; + return KVPairIRStreamReader{std::move(stream_reader_data_context)}; } auto KVPairIRStreamReader::get_num_events_buffered() const -> size_t { // FIXME: we should not access the vector in ir_unit_handler. See above for reasons. - return m_stream_reader_data_context->get_deserializer().get_ir_unit_handler().get_deserialized_log_events().size(); + return m_stream_reader_data_context->get_deserializer() + .get_ir_unit_handler() + .get_deserialized_log_events() + .size(); } auto KVPairIRStreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType { @@ -94,9 +107,9 @@ auto KVPairIRStreamReader::filter_log_events(emscripten::val const& log_level_fi for (size_t log_event_idx = 0; log_event_idx < m_encoded_log_events.size(); ++log_event_idx) { auto const& log_event = m_encoded_log_events[log_event_idx]; if (std::ranges::find( - filter_levels, - clp::enum_to_underlying_type(log_event.get_log_level()) - ) + filter_levels, + clp::enum_to_underlying_type(log_event.get_log_level()) + ) != filter_levels.end()) { m_filtered_log_event_map->emplace_back(log_event_idx); @@ -111,38 +124,16 @@ auto KVPairIRStreamReader::deserialize_stream() -> size_t { auto& reader{m_stream_reader_data_context->get_reader()}; clp::ffi::SchemaTree::Node::id_t log_level_node_id{clp::ffi::SchemaTree::cRootId}; while (true) { - auto result{m_stream_reader_data_context->get_deserializer().deserialize_next_ir_unit(reader)}; + auto result{m_stream_reader_data_context->get_deserializer().deserialize_next_ir_unit( + reader + )}; if (false == result.has_error()) { - // FIXME: revisit: we don't really need to parse log levels from key-value-pair log events since the - // levels are assumed to be stored directly as some node as a integer (which depends on our loglib - // implementations) and can be directly access via a given node_id. -// auto &log_events{m_stream_reader_data_context->get_deserializer().get_ir_unit_handler().get_deserialized_log_events()}; -// auto &last_event{log_events[log_events.size() - 1]}; -// LogLevel log_level{LogLevel::NONE}; -// if (clp::ffi::SchemaTree::cRootId == log_level_node_id) { -// auto const log_level_node_id_result{last_event.get_schema_tree().try_get_node_id( -// {clp::ffi::SchemaTree::cRootId, m_log_level_key, clp::ffi::SchemaTree::Node::Type::Str} -// )}; -// if (log_level_node_id_result.has_value()) { -// log_level_node_id = log_level_node_id_result.value(); -// } -// } else { -// auto const &log_level_value{last_event.get_node_id_value_pairs().at(log_level_node_id)}; -// if (log_level_value.has_value() && -// "short_string" == log_level_value.value().get_immutable_view()) { -// log_level = LogLevel::ERROR; -// } -// } -// -// auto log_event_with_level {LogEventWithLevel( -// std::move(last_event), -// log_level -// )}; -// m_encoded_log_events.emplace_back(std::move(log_event_with_level)); continue; } auto const error{result.error()}; - if (std::errc::no_message_available == error || std::errc::operation_not_permitted == error) { + if (std::errc::no_message_available == error + || std::errc::operation_not_permitted == error) + { break; } if (std::errc::result_out_of_range == error) { @@ -156,31 +147,50 @@ auto KVPairIRStreamReader::deserialize_stream() -> size_t { "Failed to deserialize: "s + error.category().name() + ":" + error.message() }; } - // FIXME: we should really free this after we decode all events. See the FIXME in ::create() about why we - // commented out below line. -// m_stream_reader_data_context.reset(nullptr); + // FIXME: we should really free this after we decode all events. See the FIXME in ::create() + // about why we commented out below line. + // m_stream_reader_data_context.reset(nullptr); } - return m_stream_reader_data_context->get_deserializer().get_ir_unit_handler().get_deserialized_log_events().size(); + return m_stream_reader_data_context->get_deserializer() + .get_ir_unit_handler() + .get_deserialized_log_events() + .size(); } auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool /*use_filter*/) const -> DecodedResultsTsType { // FIXME: we should not access the vector from the ir_unit_handler. See above for reasons. - if (m_stream_reader_data_context->get_deserializer().get_ir_unit_handler().get_deserialized_log_events().size() < end_idx || begin_idx >= end_idx) { + if (m_stream_reader_data_context->get_deserializer() + .get_ir_unit_handler() + .get_deserialized_log_events() + .size() + < end_idx + || begin_idx >= end_idx) + { return DecodedResultsTsType(emscripten::val::null()); } std::span const log_events_span{ - m_stream_reader_data_context->get_deserializer().get_ir_unit_handler().get_deserialized_log_events().begin() - + static_cast::difference_type>(begin_idx), - m_stream_reader_data_context->get_deserializer().get_ir_unit_handler().get_deserialized_log_events().begin() - + static_cast::difference_type>(end_idx) + m_stream_reader_data_context->get_deserializer() + .get_ir_unit_handler() + .get_deserialized_log_events() + .begin() + + static_cast::difference_type>( + begin_idx + ), + m_stream_reader_data_context->get_deserializer() + .get_ir_unit_handler() + .get_deserialized_log_events() + .begin() + + static_cast::difference_type>( + end_idx + ) }; size_t log_num{begin_idx + 1}; auto const results{emscripten::val::array()}; -// for (auto const& log_event_with_level : log_events_span) { -// auto const json{log_event_with_level.get_log_event().serialize_to_json()}; + // for (auto const& log_event_with_level : log_events_span) { + // auto const json{log_event_with_level.get_log_event().serialize_to_json()}; for (auto const& log_event : log_events_span) { auto const json{log_event.serialize_to_json()}; if (false == json.has_value()) { @@ -201,12 +211,9 @@ auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool / } KVPairIRStreamReader::KVPairIRStreamReader( - StreamReaderDataContext&& stream_reader_data_context, ReaderOptions const& reader_options + StreamReaderDataContext&& stream_reader_data_context ) : m_stream_reader_data_context{std::make_unique>( std::move(stream_reader_data_context) - )}, - m_log_level_key{reader_options["logLevelKey"].as()}, - m_timestamp_key{reader_options["timestampKey"].as()} {} + )} {} } // namespace clp_ffi_js::ir - diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp index ee81f133..65adc4b8 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp @@ -3,47 +3,88 @@ #include #include +#include #include #include #include #include #include +#include #include #include #include namespace clp_ffi_js::ir { +using parsed_tree_node_id_t = std::optional; class IrUnitHandler { public: + IrUnitHandler(std::string log_level_key, std::string timestamp_key) + : m_log_level_key{std::move(log_level_key)}, + m_timestamp_key{std::move(timestamp_key)} {} + // Implements `clp::ffi::ir_stream::IrUnitHandlerInterface` interface - [[nodiscard]] auto handle_log_event(clp::ffi::KeyValuePairLogEvent&& log_event) -> clp::ffi::ir_stream::IRErrorCode { + [[nodiscard]] auto handle_log_event(clp::ffi::KeyValuePairLogEvent&& log_event + ) -> clp::ffi::ir_stream::IRErrorCode { m_deserialized_log_events.emplace_back(std::move(log_event)); return clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success; } + [[nodiscard]] static auto handle_utc_offset_change( [[maybe_unused]] clp::UtcOffset utc_offset_old, [[maybe_unused]] clp::UtcOffset utc_offset_new ) -> clp::ffi::ir_stream::IRErrorCode { return clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success; } - [[nodiscard]] static auto handle_schema_tree_node_insertion( + + [[nodiscard]] auto handle_schema_tree_node_insertion( [[maybe_unused]] clp::ffi::SchemaTree::NodeLocator schema_tree_node_locator ) -> clp::ffi::ir_stream::IRErrorCode { + ++m_current_node_id; + auto const& key_name{schema_tree_node_locator.get_key_name()}; + SPDLOG_DEBUG("m_current_node_id={}, key_name={}", m_current_node_id, key_name); + + if (m_log_level_key == key_name) { + m_level_node_id.emplace(m_current_node_id); + } else if (m_timestamp_key == key_name) { + m_timestamp_node_id.emplace(m_current_node_id); + } + return clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success; } + [[nodiscard]] auto handle_end_of_stream() -> clp::ffi::ir_stream::IRErrorCode { m_is_complete = true; return clp::ffi::ir_stream::IRErrorCode::IRErrorCode_Success; } + // Methods [[nodiscard]] auto is_complete() const -> bool { return m_is_complete; } - [[nodiscard]] auto get_deserialized_log_events() const -> std::vector const & { + + [[nodiscard]] auto get_deserialized_log_events( + ) const -> std::vector const& { return m_deserialized_log_events; } + + [[nodiscard]] auto get_level_node_id() const -> parsed_tree_node_id_t { + return m_level_node_id; + } + + [[nodiscard]] auto get_timestamp_node_id() const -> parsed_tree_node_id_t { + return m_timestamp_node_id; + } + private: + std::string m_log_level_key; + std::string m_timestamp_key; + + // the root node has id=0 + clp::ffi::SchemaTree::Node::id_t m_current_node_id; + parsed_tree_node_id_t m_level_node_id; + parsed_tree_node_id_t m_timestamp_node_id; + std::vector m_deserialized_log_events; bool m_is_complete{false}; }; @@ -61,7 +102,10 @@ class KVPairIRStreamReader : public StreamReader { * @return The created instance. * @throw ClpFfiJsException if any error occurs. */ - [[nodiscard]] static auto create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) -> KVPairIRStreamReader; + [[nodiscard]] static auto create( + DataArrayTsType const& data_array, + ReaderOptions const& reader_options + ) -> KVPairIRStreamReader; // Destructor ~KVPairIRStreamReader() override = default; @@ -117,15 +161,13 @@ class KVPairIRStreamReader : public StreamReader { // Constructor explicit KVPairIRStreamReader( - StreamReaderDataContext&& stream_reader_data_context, ReaderOptions const& reader_options + StreamReaderDataContext&& stream_reader_data_context ); // Variables std::vector> m_encoded_log_events; std::unique_ptr> m_stream_reader_data_context; - std::string m_log_level_key; - std::string m_timestamp_key; FilteredLogEventsMap m_filtered_log_event_map; }; } // namespace clp_ffi_js::ir From 40908f09d6589e87a0f3d67f28995c89b42394d6 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 6 Nov 2024 18:02:18 -0500 Subject: [PATCH 17/18] Complete log level filtering. --- src/clp_ffi_js/ir/KVPairIRStreamReader.cpp | 112 ++++++++++----------- src/clp_ffi_js/ir/KVPairIRStreamReader.hpp | 20 ++-- 2 files changed, 69 insertions(+), 63 deletions(-) diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp index 63bc4ead..38608c20 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.cpp @@ -44,14 +44,12 @@ auto KVPairIRStreamReader::create( auto zstd_decompressor{std::make_unique()}; zstd_decompressor->open(data_buffer.data(), length); - // FIXME: Note we create a vector to store log events in IrUnitHandler at the moment. We should - // create a std::vector to be shared by IrUnitHandler() and KVPairIRStreamReader - // so that even after we free the m_stream_reader_data_context in the KVPairIRStreamReader - // instance (after we deserialize and stored all events in deserialize_stream), we can still - // access the stored log events. + auto deserialized_log_events{std::make_shared>()}; + auto result{clp::ffi::ir_stream::Deserializer::create( *zstd_decompressor, IrUnitHandler( + *deserialized_log_events, reader_options["logLevelKey"].as(), reader_options["timestampKey"].as() ) @@ -76,15 +74,14 @@ auto KVPairIRStreamReader::create( std::move(zstd_decompressor), std::move(result.value()) }; - return KVPairIRStreamReader{std::move(stream_reader_data_context)}; + return KVPairIRStreamReader{ + std::move(stream_reader_data_context), + std::move(deserialized_log_events) + }; } auto KVPairIRStreamReader::get_num_events_buffered() const -> size_t { - // FIXME: we should not access the vector in ir_unit_handler. See above for reasons. - return m_stream_reader_data_context->get_deserializer() - .get_ir_unit_handler() - .get_deserialized_log_events() - .size(); + return m_deserialized_log_events->size(); } auto KVPairIRStreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType { @@ -96,7 +93,7 @@ auto KVPairIRStreamReader::get_filtered_log_event_map() const -> FilteredLogEven } auto KVPairIRStreamReader::filter_log_events(emscripten::val const& log_level_filter) -> void { - if (log_level_filter.isNull()) { + if (log_level_filter.isNull() || false == m_level_node_id.has_value()) { m_filtered_log_event_map.reset(); return; } @@ -104,11 +101,15 @@ auto KVPairIRStreamReader::filter_log_events(emscripten::val const& log_level_fi m_filtered_log_event_map.emplace(); auto filter_levels{emscripten::vecFromJSArray>(log_level_filter )}; - for (size_t log_event_idx = 0; log_event_idx < m_encoded_log_events.size(); ++log_event_idx) { - auto const& log_event = m_encoded_log_events[log_event_idx]; + for (size_t log_event_idx = 0; log_event_idx < m_deserialized_log_events->size(); + ++log_event_idx) + { + auto const& log_event{m_deserialized_log_events->at(log_event_idx)}; + auto const& id_value_pairs{log_event.get_node_id_value_pairs()}; + auto const& log_level{id_value_pairs.at(m_level_node_id.value())}; if (std::ranges::find( filter_levels, - clp::enum_to_underlying_type(log_event.get_log_level()) + log_level.value().get_immutable_view() ) != filter_levels.end()) { @@ -120,7 +121,7 @@ auto KVPairIRStreamReader::filter_log_events(emscripten::val const& log_level_fi auto KVPairIRStreamReader::deserialize_stream() -> size_t { if (nullptr != m_stream_reader_data_context) { constexpr size_t cDefaultNumReservedLogEvents{500'000}; - m_encoded_log_events.reserve(cDefaultNumReservedLogEvents); + m_deserialized_log_events->reserve(cDefaultNumReservedLogEvents); auto& reader{m_stream_reader_data_context->get_reader()}; clp::ffi::SchemaTree::Node::id_t log_level_node_id{clp::ffi::SchemaTree::cRootId}; while (true) { @@ -147,73 +148,70 @@ auto KVPairIRStreamReader::deserialize_stream() -> size_t { "Failed to deserialize: "s + error.category().name() + ":" + error.message() }; } - // FIXME: we should really free this after we decode all events. See the FIXME in ::create() - // about why we commented out below line. - // m_stream_reader_data_context.reset(nullptr); + m_stream_reader_data_context.reset(nullptr); } - return m_stream_reader_data_context->get_deserializer() - .get_ir_unit_handler() - .get_deserialized_log_events() - .size(); + return m_deserialized_log_events->size(); } -auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool /*use_filter*/) const +auto KVPairIRStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType { - // FIXME: we should not access the vector from the ir_unit_handler. See above for reasons. - if (m_stream_reader_data_context->get_deserializer() - .get_ir_unit_handler() - .get_deserialized_log_events() - .size() - < end_idx - || begin_idx >= end_idx) - { + if (use_filter && false == m_filtered_log_event_map.has_value()) { + return DecodedResultsTsType{emscripten::val::null()}; + } + + size_t length + = use_filter ? m_filtered_log_event_map->size() : m_deserialized_log_events->size(); + if (length < end_idx || begin_idx > end_idx) { return DecodedResultsTsType(emscripten::val::null()); } - std::span const log_events_span{ - m_stream_reader_data_context->get_deserializer() - .get_ir_unit_handler() - .get_deserialized_log_events() - .begin() - + static_cast::difference_type>( - begin_idx - ), - m_stream_reader_data_context->get_deserializer() - .get_ir_unit_handler() - .get_deserialized_log_events() - .begin() - + static_cast::difference_type>( - end_idx - ) - }; - size_t log_num{begin_idx + 1}; auto const results{emscripten::val::array()}; - // for (auto const& log_event_with_level : log_events_span) { - // auto const json{log_event_with_level.get_log_event().serialize_to_json()}; - for (auto const& log_event : log_events_span) { + for (size_t i = begin_idx; i < end_idx; ++i) { + size_t log_event_idx = use_filter ? m_filtered_log_event_map->at(i) : i; + auto const& log_event{m_deserialized_log_events->at(log_event_idx)}; + auto const json{log_event.serialize_to_json()}; if (false == json.has_value()) { SPDLOG_ERROR("Failed to decode message."); break; } + auto const& id_value_pairs{log_event.get_node_id_value_pairs()}; + clp::ffi::value_int_t log_level{static_cast(LogLevel::NONE)}; + if (m_level_node_id.has_value()) { + auto const& log_level_pair{id_value_pairs.at(m_level_node_id.value())}; + log_level = log_level_pair.has_value() + ? log_level_pair.value().get_immutable_view() + : static_cast(LogLevel::NONE); + } + clp::ffi::value_int_t timestamp{0}; + if (m_timestamp_node_id.has_value()) { + auto const& timestamp_pair{id_value_pairs.at(m_timestamp_node_id.value())}; + timestamp = timestamp_pair.has_value() + ? timestamp_pair.value().get_immutable_view() + : 0; + } + EM_ASM( - { Emval.toValue($0).push([UTF8ToString($1), $2]); }, + { Emval.toValue($0).push([UTF8ToString($1), $2, $3, $4]); }, results.as_handle(), json.value().dump().c_str(), - log_num + log_level, + timestamp, + log_event_idx + 1 ); - ++log_num; } return DecodedResultsTsType(results); } KVPairIRStreamReader::KVPairIRStreamReader( - StreamReaderDataContext&& stream_reader_data_context + StreamReaderDataContext&& stream_reader_data_context, + std::shared_ptr> deserialized_log_events ) : m_stream_reader_data_context{std::make_unique>( std::move(stream_reader_data_context) - )} {} + )}, + m_deserialized_log_events{std::move(deserialized_log_events)} {} } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp index 65adc4b8..5e9f1492 100644 --- a/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp +++ b/src/clp_ffi_js/ir/KVPairIRStreamReader.hpp @@ -21,8 +21,13 @@ using parsed_tree_node_id_t = std::optional; class IrUnitHandler { public: - IrUnitHandler(std::string log_level_key, std::string timestamp_key) - : m_log_level_key{std::move(log_level_key)}, + IrUnitHandler( + std::vector& deserialized_log_events, + std::string log_level_key, + std::string timestamp_key + ) + : m_deserialized_log_events{deserialized_log_events}, + m_log_level_key{std::move(log_level_key)}, m_timestamp_key{std::move(timestamp_key)} {} // Implements `clp::ffi::ir_stream::IrUnitHandlerInterface` interface @@ -85,7 +90,7 @@ class IrUnitHandler { parsed_tree_node_id_t m_level_node_id; parsed_tree_node_id_t m_timestamp_node_id; - std::vector m_deserialized_log_events; + std::vector& m_deserialized_log_events; bool m_is_complete{false}; }; @@ -160,14 +165,17 @@ class KVPairIRStreamReader : public StreamReader { using deserializer_t = clp::ffi::ir_stream::Deserializer; // Constructor - explicit KVPairIRStreamReader( - StreamReaderDataContext&& stream_reader_data_context + KVPairIRStreamReader( + StreamReaderDataContext&& stream_reader_data_context, + std::shared_ptr> deserialized_log_events ); // Variables - std::vector> m_encoded_log_events; + std::shared_ptr> m_deserialized_log_events; std::unique_ptr> m_stream_reader_data_context; + parsed_tree_node_id_t m_level_node_id; + parsed_tree_node_id_t m_timestamp_node_id; FilteredLogEventsMap m_filtered_log_event_map; }; } // namespace clp_ffi_js::ir From e411420b6212baf1859ad11a2e6aa4273d3063c4 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 6 Nov 2024 18:43:34 -0500 Subject: [PATCH 18/18] Update yscope-dev-utils version. --- tools/yscope-dev-utils | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/yscope-dev-utils b/tools/yscope-dev-utils index 159768c7..ad576e43 160000 --- a/tools/yscope-dev-utils +++ b/tools/yscope-dev-utils @@ -1 +1 @@ -Subproject commit 159768c7d171595ed2cba17b758c10043a2efe96 +Subproject commit ad576e43c1a43d7a6afde79fc9c3c952b7bf28bd