y-scope · davemarco · Dec 18, 2024 · Nov 9, 2024 · Nov 9, 2024 · Nov 9, 2024
@@ -84,6 +84,7 @@ endif()
 set(CLP_FFI_JS_SRC_MAIN
     src/clp_ffi_js/ir/StreamReader.cpp
     src/clp_ffi_js/ir/StructuredIrStreamReader.cpp
+    src/clp_ffi_js/ir/StructuredIrUnitHandler.cpp
     src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp
 )
 

@@ -4,6 +4,7 @@
 #include <array>
 #include <cstdint>
 #include <string_view>
+#include <type_utils.hpp>
 
 namespace clp_ffi_js {
 /**
@@ -17,6 +18,7 @@ enum class LogLevel : std::uint8_t {
     WARN,
     ERROR,
     FATAL,
+    LENGTH,  // This isn't a valid log level.
 };
-};
+    Length
+};
-};
+    Length
+};
 constexpr LogLevel cValidLogLevelsBeginIdx{LogLevel::TRACE};
 
@@ -25,15 +27,16 @@ constexpr LogLevel cValidLogLevelsBeginIdx{LogLevel::TRACE};
  *
  * NOTE: These must be kept in sync manually.
  */
-constexpr std::array<std::string_view, 7> cLogLevelNames{
-        "NONE",  // This isn't a valid log level.
-        "TRACE",
-        "DEBUG",
-        "INFO",
-        "WARN",
-        "ERROR",
-        "FATAL",
-};
+constexpr std::array<std::string_view, clp::enum_to_underlying_type(LogLevel::LENGTH)>
+        cLogLevelNames{
+                "NONE",  // This isn't a valid log level.
+                "TRACE",
+                "DEBUG",
+                "INFO",
+                "WARN",
+                "ERROR",
+                "FATAL",
+        };
 }  // namespace clp_ffi_js
 
 #endif  // CLP_FFI_JS_CONSTANTS_HPP
@@ -4,6 +4,7 @@
 #include <concepts>
 #include <utility>
 
+#include <clp/ffi/KeyValuePairLogEvent.hpp>
 #include <clp/ir/LogEvent.hpp>
 #include <clp/ir/types.hpp>
 
@@ -12,6 +13,7 @@
 namespace clp_ffi_js::ir {
 using clp::ir::four_byte_encoded_variable_t;
 using UnstructuredLogEvent = clp::ir::LogEvent<four_byte_encoded_variable_t>;
+using StructuredLogEvent = clp::ffi::KeyValuePairLogEvent;
 
 /**
  * A templated class that extends a log event type with processed versions of some of its fields,
@@ -21,7 +23,8 @@ using UnstructuredLogEvent = clp::ir::LogEvent<four_byte_encoded_variable_t>;
  * @tparam LogEvent The type of the log event.
  */
 template <typename LogEvent>
-requires std::same_as<LogEvent, UnstructuredLogEvent>
+requires std::same_as<LogEvent, UnstructuredLogEvent> || std::same_as<LogEvent, StructuredLogEvent>
+
 class LogEventWithFilterData {
 public:
     // Constructor

@@ -117,7 +117,9 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) {
     // JS types used as inputs
     emscripten::register_type<clp_ffi_js::ir::DataArrayTsType>("Uint8Array");
     emscripten::register_type<clp_ffi_js::ir::LogLevelFilterTsType>("number[] | null");
-    emscripten::register_type<clp_ffi_js::ir::ReaderOptions>("{timestampKey: string} | null");
+    emscripten::register_type<clp_ffi_js::ir::ReaderOptions>(
+            "{logLevelKey: string, timestampKey: string} | null"
+    );
 
     // JS types used as outputs
     emscripten::enum_<clp_ffi_js::ir::StreamType>("IrStreamType")

@@ -4,6 +4,8 @@
 #include <cstddef>
 #include <cstdint>
 #include <memory>
+#include <optional>
+#include <vector>
 
 #include <clp/streaming_compression/zstd/Decompressor.hpp>
 #include <emscripten/val.h>
@@ -23,6 +25,12 @@ enum class StreamType : uint8_t {
     Unstructured,
 };
 
+/**
+ * Mapping between an index in the filtered log events collection to an index in the unfiltered
+ * log events collection.
+ */
+using FilteredLogEventsMap = std::optional<std::vector<size_t>>;
+
 /**
  * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded
  * log events.

@@ -7,45 +7,37 @@
 #include <string_view>
 #include <system_error>
 #include <utility>
-#include <vector>
 
 #include <clp/Array.hpp>
 #include <clp/ErrorCode.hpp>
 #include <clp/ffi/ir_stream/Deserializer.hpp>
-#include <clp/ffi/KeyValuePairLogEvent.hpp>
-#include <clp/ffi/Value.hpp>
-#include <clp/ir/types.hpp>
 #include <clp/TraceableException.hpp>
-#include <emscripten/em_asm.h>
 #include <emscripten/val.h>
 #include <spdlog/spdlog.h>
 
 #include <clp_ffi_js/ClpFfiJsException.hpp>
-#include <clp_ffi_js/constants.hpp>
 #include <clp_ffi_js/ir/StreamReader.hpp>
 #include <clp_ffi_js/ir/StreamReaderDataContext.hpp>
+#include <clp_ffi_js/ir/StructuredIrUnitHandler.hpp>
+#include <clp_ffi_js/ir/utils.hpp>
 
 namespace clp_ffi_js::ir {
 namespace {
-constexpr std::string_view cEmptyJsonStr{"{}"};
-constexpr std::string_view cLogLevelFilteringNotSupportedErrorMsg{
-        "Log level filtering is not yet supported in this reader."
-};
+constexpr std::string_view cReaderOptionsLogLevelKey{"logLevelKey"};
 constexpr std::string_view cReaderOptionsTimestampKey{"timestampKey"};
 }  // namespace
 
-using clp::ir::four_byte_encoded_variable_t;
-
 auto StructuredIrStreamReader::create(
         std::unique_ptr<ZstdDecompressor>&& zstd_decompressor,
         clp::Array<char> data_array,
         ReaderOptions const& reader_options
 ) -> StructuredIrStreamReader {
-    auto deserialized_log_events{std::make_shared<std::vector<clp::ffi::KeyValuePairLogEvent>>()};
+    auto deserialized_log_events{std::make_shared<StructuredLogEvents>()};
     auto result{StructuredIrDeserializer::create(
             *zstd_decompressor,
-            IrUnitHandler{
+            StructuredIrUnitHandler{
                     deserialized_log_events,
+                    reader_options[cReaderOptionsLogLevelKey.data()].as<std::string>(),
                     reader_options[cReaderOptionsTimestampKey.data()].as<std::string>()
             }
     )};
@@ -75,15 +67,19 @@ auto StructuredIrStreamReader::get_num_events_buffered() const -> size_t {
 }
 
 auto StructuredIrStreamReader::get_filtered_log_event_map() const -> FilteredLogEventMapTsType {
-    SPDLOG_ERROR(cLogLevelFilteringNotSupportedErrorMsg);
-    return FilteredLogEventMapTsType{emscripten::val::null()};
+    if (false == m_filtered_log_event_map.has_value()) {
+        return FilteredLogEventMapTsType{emscripten::val::null()};
+    }
+
+    return FilteredLogEventMapTsType{emscripten::val::array(m_filtered_log_event_map.value())};
 }
 
 void StructuredIrStreamReader::filter_log_events(LogLevelFilterTsType const& log_level_filter) {
-    if (log_level_filter.isNull()) {
-        return;
-    }
-    SPDLOG_ERROR(cLogLevelFilteringNotSupportedErrorMsg);
+    generic_filter_log_events(
+            m_filtered_log_event_map,
+            log_level_filter,
+            *m_deserialized_log_events
+    );
 }
 
 auto StructuredIrStreamReader::deserialize_stream() -> size_t {
@@ -94,16 +90,14 @@ auto StructuredIrStreamReader::deserialize_stream() -> size_t {
     constexpr size_t cDefaultNumReservedLogEvents{500'000};
     m_deserialized_log_events->reserve(cDefaultNumReservedLogEvents);
     auto& reader{m_stream_reader_data_context->get_reader()};
-    while (true) {
-        auto result{m_stream_reader_data_context->get_deserializer().deserialize_next_ir_unit(reader
-        )};
+    auto& deserializer = m_stream_reader_data_context->get_deserializer();
+
+    while (false == deserializer.is_stream_completed()) {
+        auto result{deserializer.deserialize_next_ir_unit(reader)};
         if (false == result.has_error()) {
             continue;
         }
         auto const error{result.error()};
-        if (std::errc::operation_not_permitted == error) {
-            break;
-        }
         if (std::errc::result_out_of_range == error) {
-    while (false == deserializer.is_stream_completed()) {
-        auto result{deserializer.deserialize_next_ir_unit(reader)};
-        if (false == result.has_error()) {
-            continue;
-        }
-        auto const error{result.error()};
-        if (std::errc::operation_not_permitted == error) {
-            break;
-        }
-        if (std::errc::result_out_of_range == error) {
+    while (true) {
+        auto result{deserializer.deserialize_next_ir_unit(reader)};
+        if (result.has_error()) {
+            if (std::errc::result_out_of_range == result.error()) {
+                SPDLOG_ERROR("File contains an incomplete IR stream");
+                break;
+            }
+            throw ClpFfiJsException{
+                clp::ErrorCode::ErrorCode_Corrupt,
+                __FILENAME__,
+                __LINE__,
+                std::format(
+                    "Failed to deserialize IR unit: {}:{}",
+                    result.error().category().name(),
+                    result.error().message()
+                )
+            };
+        }
-    while (false == deserializer.is_stream_completed()) {
-        auto result{deserializer.deserialize_next_ir_unit(reader)};
-        if (false == result.has_error()) {
-            continue;
-        }
-        auto const error{result.error()};
-        if (std::errc::operation_not_permitted == error) {
-            break;
-        }
-        if (std::errc::result_out_of_range == error) {
+    while (true) {
+        auto result{deserializer.deserialize_next_ir_unit(reader)};
+        if (result.has_error()) {
+            if (std::errc::result_out_of_range == result.error()) {
+                SPDLOG_ERROR("File contains an incomplete IR stream");
+                break;
+            }
+            throw ClpFfiJsException{
+                clp::ErrorCode::ErrorCode_Corrupt,
+                __FILENAME__,
+                __LINE__,
+                std::format(
+                    "Failed to deserialize IR unit: {}:{}",
+                    result.error().category().name(),
+                    result.error().message()
+                )
+            };
+        }
             SPDLOG_ERROR("File contains an incomplete IR stream");
             break;
@@ -119,72 +113,27 @@ auto StructuredIrStreamReader::deserialize_stream() -> size_t {
                 )
         };
     }
-    m_timestamp_node_id = m_stream_reader_data_context->get_deserializer()
-                                  .get_ir_unit_handler()
-                                  .get_timestamp_node_id();
     m_stream_reader_data_context.reset(nullptr);
     return m_deserialized_log_events->size();
 }
 
 auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const
         -> DecodedResultsTsType {
-    if (use_filter) {
-        SPDLOG_ERROR(cLogLevelFilteringNotSupportedErrorMsg);
-        return DecodedResultsTsType{emscripten::val::null()};
-    }
-
-    if (m_deserialized_log_events->size() < end_idx || begin_idx > end_idx) {
-        return DecodedResultsTsType{emscripten::val::null()};
-    }
-
-    auto const results{emscripten::val::array()};
-
-    for (size_t log_event_idx = begin_idx; log_event_idx < end_idx; ++log_event_idx) {
-        auto const& log_event{m_deserialized_log_events->at(log_event_idx)};
-
-        auto const json_result{log_event.serialize_to_json()};
-        std::string json_str{cEmptyJsonStr};
-        if (false == json_result.has_value()) {
-            auto error_code{json_result.error()};
-            SPDLOG_ERROR(
-                    "Failed to deserialize log event to JSON: {}:{}",
-                    error_code.category().name(),
-                    error_code.message()
-            );
-        } else {
-            json_str = json_result.value().dump();
-        }
-
-        auto const& id_value_pairs{log_event.get_node_id_value_pairs()};
-        clp::ffi::value_int_t timestamp{0};
-        if (m_timestamp_node_id.has_value()) {
-            auto const& timestamp_pair{id_value_pairs.at(m_timestamp_node_id.value())};
-            if (timestamp_pair.has_value()) {
-                if (timestamp_pair->is<clp::ffi::value_int_t>()) {
-                    timestamp = timestamp_pair.value().get_immutable_view<clp::ffi::value_int_t>();
-                } else {
-                    // TODO: Add support for parsing timestamp values of string type.
-                    SPDLOG_ERROR("Unable to parse timestamp for log_event_idx={}", log_event_idx);
-                }
-            }
-        }
-
-        EM_ASM(
-                { Emval.toValue($0).push([UTF8ToString($1), $2, $3, $4]); },
-                results.as_handle(),
-                json_str.c_str(),
-                timestamp,
-                LogLevel::NONE,
-                log_event_idx + 1
-        );
-    }
-
-    return DecodedResultsTsType(results);
+    return generic_decode_range(
+            begin_idx,
+            end_idx,
+            m_filtered_log_event_map,
+            *m_deserialized_log_events,
+            use_filter,
+            // `ts_pattern` argument is unused in `generic_decode_range<StructuredLogEvents>`.
+            // Default timestamp pattern is used to conform to `generic_decode_range<T>` interface.
+            clp::TimestampPattern()
+    );
 }
 
 StructuredIrStreamReader::StructuredIrStreamReader(
         StreamReaderDataContext<StructuredIrDeserializer>&& stream_reader_data_context,
-        std::shared_ptr<std::vector<clp::ffi::KeyValuePairLogEvent>> deserialized_log_events
+        std::shared_ptr<StructuredLogEvents> deserialized_log_events
 )
         : m_deserialized_log_events{std::move(deserialized_log_events)},
           m_stream_reader_data_context{