From dedb75853ec92eccbc49c4bfd7c5fd8a512b3669 Mon Sep 17 00:00:00 2001 From: hoophalab <200652805+hoophalab@users.noreply.github.com> Date: Mon, 11 Aug 2025 16:41:33 -0400 Subject: [PATCH 1/7] feat!: Add KQL filtering support. --- CMakeLists.txt | 36 ++++- src/clp_ffi_js/ir/StreamReader.hpp | 4 +- .../ir/StructuredIrStreamReader.cpp | 61 +++++++- .../ir/StructuredIrStreamReader.hpp | 5 +- src/clp_ffi_js/ir/StructuredIrUnitHandler.cpp | 6 +- src/clp_ffi_js/ir/StructuredIrUnitHandler.hpp | 2 +- .../ir/UnstructuredIrStreamReader.cpp | 8 +- .../ir/UnstructuredIrStreamReader.hpp | 5 +- src/clp_ffi_js/ir/query_methods.cpp | 137 ++++++++++++++++++ src/clp_ffi_js/ir/query_methods.hpp | 14 ++ taskfiles/deps.yaml | 64 +++++++- 11 files changed, 324 insertions(+), 18 deletions(-) create mode 100644 src/clp_ffi_js/ir/query_methods.cpp create mode 100644 src/clp_ffi_js/ir/query_methods.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f656426..fd688985 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,6 +102,7 @@ execute_process( set(CLP_FFI_JS_SRC_MAIN src/clp_ffi_js/ir/decoding_methods.cpp + src/clp_ffi_js/ir/query_methods.cpp src/clp_ffi_js/ir/StreamReader.cpp src/clp_ffi_js/ir/StructuredIrStreamReader.cpp src/clp_ffi_js/ir/StructuredIrUnitHandler.cpp @@ -113,6 +114,8 @@ set(CLP_FFI_JS_SRC_CLP_CORE ${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp/ffi/ir_stream/decoding_methods.cpp ${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp/ffi/ir_stream/ir_unit_deserialization_methods.cpp ${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp/ffi/ir_stream/search/ErrorCode.cpp + ${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp/ffi/ir_stream/search/QueryHandlerImpl.cpp + ${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp/ffi/ir_stream/search/utils.cpp ${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp/ffi/ir_stream/utils.cpp ${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp/ffi/KeyValuePairLogEvent.cpp ${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp/ffi/SchemaTree.cpp @@ -144,6 +147,27 @@ set(CLP_FFI_JS_SUPPORTED_ENVIRONMENTS "List of supported environments." ) +set(CLP_BUILD_CLP_STRING_UTILS ON) +add_subdirectory(${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp/string_utils) + +set(CLP_BUILD_EXECUTABLES OFF) +set(CLP_BUILD_CLP_S_ARCHIVEREADER OFF) +set(CLP_BUILD_CLP_S_ARCHIVEWRITER OFF) +set(CLP_BUILD_CLP_S_CLP_DEPENDENCIES OFF) +set(CLP_BUILD_CLP_S_IO OFF) +set(CLP_BUILD_CLP_S_JSONCONSTRUCTOR OFF) +set(CLP_BUILD_CLP_S_REDUCER_DEPENDENCIES OFF) +set(CLP_BUILD_CLP_S_SEARCH OFF) +set(CLP_BUILD_CLP_S_SEARCH_AST ON) +set(CLP_BUILD_CLP_S_SEARCH_KQL ON) +set(CLP_BUILD_CLP_S_TIMESTAMPPATTERN ON) +add_subdirectory(${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp_s) + +add_subdirectory(${CMAKE_SOURCE_DIR}/build/deps/antlr-runtime-src) +add_subdirectory(${CMAKE_SOURCE_DIR}/build/deps/date-src) +add_subdirectory(${CMAKE_SOURCE_DIR}/build/deps/simdjson-src) +add_subdirectory(${CMAKE_SOURCE_DIR}/build/deps/spdlog-src) + foreach(env ${CLP_FFI_JS_SUPPORTED_ENVIRONMENTS}) set(CLP_FFI_JS_BIN_NAME "ClpFfiJs-${env}") add_executable(${CLP_FFI_JS_BIN_NAME}) @@ -154,7 +178,16 @@ foreach(env ${CLP_FFI_JS_SUPPORTED_ENVIRONMENTS}) target_compile_options(${CLP_FFI_JS_BIN_NAME} PRIVATE ${CLP_FFI_JS_COMMON_COMPILE_OPTIONS}) # Set up link options - target_link_libraries(${CLP_FFI_JS_BIN_NAME} PRIVATE embind) + target_link_libraries(${CLP_FFI_JS_BIN_NAME} + PRIVATE + antlr4_static + clp_s::search::ast + clp_s::search::kql + embind + simdjson::simdjson + spdlog::spdlog + string_utils + ) set(CLP_FFI_JS_LINK_OPTIONS ${CLP_FFI_JS_COMMON_LINK_OPTIONS} --emit-tsd=${CLP_FFI_JS_BIN_NAME}.d.ts @@ -184,7 +217,6 @@ Link options: ${CLP_FFI_JS_LINK_OPTIONS}." ${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp ${CLP_FFI_JS_FMT_SOURCE_DIRECTORY}/include ${CLP_FFI_JS_NLOHMANN_JSON_SOURCE_DIRECTORY}/include - ${CLP_FFI_JS_SPDLOG_SOURCE_DIRECTORY}/include ${CLP_FFI_JS_YSTDLIB_SOURCE_DIRECTORY}/src ${CLP_FFI_JS_ZSTD_SOURCE_DIRECTORY}/lib ) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index d9606157..a6269a91 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -104,7 +104,9 @@ class StreamReader { * * @param log_level_filter Array of selected log levels */ - virtual void filter_log_events(LogLevelFilterTsType const& log_level_filter) = 0; + virtual void + filter_log_events(LogLevelFilterTsType const& log_level_filter, std::string const& kql_filter) + = 0; /** * Deserializes all log events in the stream. diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index d1564526..bd58a91c 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -28,6 +28,8 @@ #include #include +#include "clp_ffi_js/ir/query_methods.hpp" + namespace clp_ffi_js::ir { namespace { constexpr std::string_view cEmptyJsonStr{"{}"}; @@ -139,12 +141,58 @@ auto StructuredIrStreamReader::get_filtered_log_event_map() const -> FilteredLog return FilteredLogEventMapTsType{emscripten::val::array(m_filtered_log_event_map.value())}; } -void StructuredIrStreamReader::filter_log_events(LogLevelFilterTsType const& log_level_filter) { - generic_filter_log_events( - m_filtered_log_event_map, - log_level_filter, - *m_deserialized_log_events - ); +void StructuredIrStreamReader::filter_log_events( + LogLevelFilterTsType const& log_level_filter, + std::string const& kql_filter +) { + m_filtered_log_event_map = std::nullopt; + + if (0 != kql_filter.size()) { + auto& reader{m_stream_reader_data_context->get_reader()}; + reader.seek_from_begin(0); + auto indexes{query_index(reader, kql_filter)}; + m_filtered_log_event_map = std::make_optional(std::move(indexes)); + } + + if (false == log_level_filter.isNull()) { + std::vector filtered_log_event_map; + + auto filter_levels{ + emscripten::vecFromJSArray>(log_level_filter) + }; + + auto fn = [&](size_t log_event_idx) { + auto const& log_event{m_deserialized_log_events->at(log_event_idx)}; + if (std::ranges::find( + filter_levels, + clp::enum_to_underlying_type(log_event.get_log_level()) + ) + != filter_levels.end()) + { + filtered_log_event_map.emplace_back(log_event_idx); + } + }; + + if (m_filtered_log_event_map.has_value()) { + for (auto log_event_idx : m_filtered_log_event_map.value()) { + fn(log_event_idx); + } + } else { + for (size_t log_event_idx = 0; log_event_idx < m_deserialized_log_events->size(); + ++log_event_idx) + { + fn(log_event_idx); + } + } + + m_filtered_log_event_map = std::make_optional(std::move(filtered_log_event_map)); + } + + if (m_filtered_log_event_map.has_value() + && m_filtered_log_event_map->size() == m_deserialized_log_events->size()) + { + m_filtered_log_event_map = std::nullopt; + } } auto StructuredIrStreamReader::deserialize_stream() -> size_t { @@ -178,7 +226,6 @@ auto StructuredIrStreamReader::deserialize_stream() -> size_t { ) }; } - m_stream_reader_data_context.reset(nullptr); return m_deserialized_log_events->size(); } diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp index adf5ef69..06c6a5e0 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp @@ -63,7 +63,10 @@ class StructuredIrStreamReader : public StreamReader { [[nodiscard]] auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType override; - void filter_log_events(LogLevelFilterTsType const& log_level_filter) override; + void filter_log_events( + LogLevelFilterTsType const& log_level_filter, + std::string const& kql_filter + ) override; /** * @see StreamReader::deserialize_stream diff --git a/src/clp_ffi_js/ir/StructuredIrUnitHandler.cpp b/src/clp_ffi_js/ir/StructuredIrUnitHandler.cpp index e783111d..c05904f1 100644 --- a/src/clp_ffi_js/ir/StructuredIrUnitHandler.cpp +++ b/src/clp_ffi_js/ir/StructuredIrUnitHandler.cpp @@ -129,8 +129,10 @@ auto StructuredIrUnitHandler::SchemaTreeFullBranch::match( return true; } -auto StructuredIrUnitHandler::handle_log_event(StructuredLogEvent&& log_event) - -> clp::ffi::ir_stream::IRErrorCode { +auto StructuredIrUnitHandler::handle_log_event( + StructuredLogEvent&& log_event, + [[maybe_unused]] size_t log_event_ix +) -> clp::ffi::ir_stream::IRErrorCode { auto const timestamp = get_timestamp(log_event); auto const log_level = get_log_level(log_event); auto const utc_offset = get_utc_offset(log_event); diff --git a/src/clp_ffi_js/ir/StructuredIrUnitHandler.hpp b/src/clp_ffi_js/ir/StructuredIrUnitHandler.hpp index 535d7a01..e243d05e 100644 --- a/src/clp_ffi_js/ir/StructuredIrUnitHandler.hpp +++ b/src/clp_ffi_js/ir/StructuredIrUnitHandler.hpp @@ -106,7 +106,7 @@ class StructuredIrUnitHandler { * @param log_event * @return IRErrorCode::IRErrorCode_Success */ - [[nodiscard]] auto handle_log_event(StructuredLogEvent&& log_event) + [[nodiscard]] auto handle_log_event(StructuredLogEvent&& log_event, size_t log_event_ix) -> clp::ffi::ir_stream::IRErrorCode; /** diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index 8e4c91b7..c684e974 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -80,7 +80,13 @@ auto UnstructuredIrStreamReader::get_filtered_log_event_map() const -> FilteredL return FilteredLogEventMapTsType{emscripten::val::array(m_filtered_log_event_map.value())}; } -void UnstructuredIrStreamReader::filter_log_events(LogLevelFilterTsType const& log_level_filter) { +void UnstructuredIrStreamReader::filter_log_events( + LogLevelFilterTsType const& log_level_filter, + [[maybe_unused]] std::string const& kql_filter +) { + SPDLOG_WARN( + "KQL filters aren't supported for unstructured IR streams, so they're being ignored." + ); generic_filter_log_events(m_filtered_log_event_map, log_level_filter, m_encoded_log_events); } diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp index 72387d2c..860c9db0 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp @@ -58,7 +58,10 @@ class UnstructuredIrStreamReader : public StreamReader { [[nodiscard]] auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType override; - void filter_log_events(LogLevelFilterTsType const& log_level_filter) override; + void filter_log_events( + LogLevelFilterTsType const& log_level_filter, + std::string const& kql_filter + ) override; /** * @see StreamReader::deserialize_stream diff --git a/src/clp_ffi_js/ir/query_methods.cpp b/src/clp_ffi_js/ir/query_methods.cpp new file mode 100644 index 00000000..77e5c0f9 --- /dev/null +++ b/src/clp_ffi_js/ir/query_methods.cpp @@ -0,0 +1,137 @@ +#include "query_methods.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "clp_ffi_js/ClpFfiJsException.hpp" +#include "spdlog/common.h" +#include "spdlog/spdlog.h" + +namespace clp_ffi_js::ir { +using clp::ffi::ir_stream::IRErrorCode; +using clp::ffi::KeyValuePairLogEvent; +using clp::UtcOffset; + +class LogEventIndexIrUnitHandler { +public: + [[nodiscard]] auto + handle_log_event([[maybe_unused]] KeyValuePairLogEvent&& log_event, size_t log_event_ix) + -> IRErrorCode { + m_deserialized_log_event_indexes.push_back(log_event_ix); + return IRErrorCode::IRErrorCode_Success; + } + + [[nodiscard]] static auto handle_utc_offset_change( + [[maybe_unused]] UtcOffset utc_offset_old, + [[maybe_unused]] UtcOffset utc_offset_new + ) -> IRErrorCode { + return IRErrorCode::IRErrorCode_Decode_Error; + } + + [[nodiscard]] static auto handle_schema_tree_node_insertion( + [[maybe_unused]] bool is_auto_generated, + [[maybe_unused]] clp::ffi::SchemaTree::NodeLocator schema_tree_node_locator, + [[maybe_unused]] std::shared_ptr const& schema_tree + ) -> IRErrorCode { + return IRErrorCode::IRErrorCode_Success; + } + + [[nodiscard]] static auto handle_end_of_stream() -> IRErrorCode { + return IRErrorCode::IRErrorCode_Success; + } + + [[nodiscard]] auto get_deserialized_log_event_indexes() const -> std::vector const& { + return m_deserialized_log_event_indexes; + } + +private: + std::vector m_deserialized_log_event_indexes; +}; + +auto trivial_new_projected_schema_tree_node_callback( + [[maybe_unused]] bool is_auto_generated, + [[maybe_unused]] clp::ffi::SchemaTree::Node::id_t node_id, + [[maybe_unused]] std::string_view projected_key_path +) -> ystdlib::error_handling::Result { + return ystdlib::error_handling::success(); +} + +[[nodiscard]] auto query_index(clp::ReaderInterface& reader, std::string const& query_string) + -> std::vector { + std::istringstream query_string_stream{query_string}; + auto query_handler_result{ + clp::ffi::ir_stream::search::QueryHandler< + decltype(&trivial_new_projected_schema_tree_node_callback)>:: + create(&trivial_new_projected_schema_tree_node_callback, + clp_s::search::kql::parse_kql_expression(query_string_stream), + {}, + false) + }; + + if (query_handler_result.has_error()) { + auto const error_code{query_handler_result.error()}; + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Failure, + __FILENAME__, + __LINE__, + std::format( + "Failed to create qury handler: {} {}", + error_code.category().name(), + error_code.message() + ) + }; + } + + auto deserializer_result{clp::ffi::ir_stream::make_deserializer( + reader, + LogEventIndexIrUnitHandler{}, + std::move(query_handler_result.value()) + )}; + + if (deserializer_result.has_error()) { + auto const error_code{deserializer_result.error()}; + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Failure, + __FILENAME__, + __LINE__, + std::format( + "Failed to create deserializer: {} {}", + error_code.category().name(), + error_code.message() + ) + }; + } + + auto& deserializer{deserializer_result.value()}; + while (false == deserializer.is_stream_completed()) { + auto result{deserializer.deserialize_next_ir_unit(reader)}; + if (false == result.has_error()) { + continue; + } + auto const error{result.error()}; + if (std::errc::result_out_of_range == error) { + SPDLOG_ERROR("File contains an incomplete IR stream"); + break; + } + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Corrupt, + __FILENAME__, + __LINE__, + std::format( + "Failed to deserialize IR unit: {}:{}", + error.category().name(), + error.message() + ) + }; + } + + return deserializer.get_ir_unit_handler().get_deserialized_log_event_indexes(); +} +} // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/query_methods.hpp b/src/clp_ffi_js/ir/query_methods.hpp new file mode 100644 index 00000000..7988fe1d --- /dev/null +++ b/src/clp_ffi_js/ir/query_methods.hpp @@ -0,0 +1,14 @@ +#ifndef CLP_FFI_JS_IR_QUERY_METHODS_HPP +#define CLP_FFI_JS_IR_QUERY_METHODS_HPP + +#include + +#include +#include + +namespace clp_ffi_js::ir { +[[nodiscard]] auto query_index(clp::ReaderInterface& reader, std::string const& query_string) + -> std::vector; +} // namespace clp_ffi_js::ir + +#endif // CLP_FFI_JS_IR_QUERY_METHODS_HPP diff --git a/taskfiles/deps.yaml b/taskfiles/deps.yaml index a14e4dea..615060e7 100644 --- a/taskfiles/deps.yaml +++ b/taskfiles/deps.yaml @@ -16,14 +16,40 @@ tasks: download-all: internal: true deps: + - "download-antlr-runtime" - "download-boost" - "download-clp" + - "download-date" - "download-fmt" - "download-nlohmann-json" + - "download-simdjson" - "download-spdlog" - "download-ystdlib" - "download-zstd" + + download-antlr-runtime: + internal: true + vars: + LIB_NAME: "antlr-runtime" + ANTLR_RUNTIME_OUTPUT_DIR: "{{.G_DEPS_DIR}}/{{.LIB_NAME}}-src" + ANTLR_RUNTIME_VERSION: "4.13.2" + INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.LIB_NAME}}-install" + run: "once" + cmds: + - task: ":utils:remote:download-and-extract-tar" + vars: + FILE_SHA256: "9f18272a9b32b622835a3365f850dd1063d60f5045fb1e12ce475ae6e18a35bb" + INCLUDE_PATTERNS: ["*/runtime/Cpp"] + NUM_COMPONENTS_TO_STRIP: 3 + OUTPUT_DIR: "{{.ANTLR_RUNTIME_OUTPUT_DIR}}" + URL: "https://github.com/antlr/antlr4/archive/refs/tags/{{.ANTLR_RUNTIME_VERSION}}.tar.gz" + - >- + echo "set( + CLP_FFI_JS_ANTLR_RUNTIME_SOURCE_DIRECTORY \"{{.ANTLR_RUNTIME_OUTPUT_DIR}}\" + )" > "{{.G_DEPS_CMAKE_SETTINGS_DIR}}/{{.LIB_NAME}}.cmake" + + download-boost: internal: true vars: @@ -52,14 +78,31 @@ tasks: cmds: - task: ":utils:remote:download-and-extract-tar" vars: - FILE_SHA256: "9315fbd3cbb46487e36a0156fae95f6edfc2de483bbde82df541d478e044c98b" + FILE_SHA256: "00fa3880ac74c00c453832d51f9a533533e59f89b94159f05ad4c767225178f0" OUTPUT_DIR: "{{.CLP_OUTPUT_DIR}}" - URL: "https://github.com/y-scope/clp/archive/88b8b46.tar.gz" + URL: "https://github.com/y-scope/clp/archive/6a00a87.tar.gz" - >- echo "set( CLP_FFI_JS_CLP_SOURCE_DIRECTORY \"{{.CLP_OUTPUT_DIR}}\" )" > "{{.G_DEPS_CMAKE_SETTINGS_DIR}}/{{.LIB_NAME}}.cmake" + download-date: + internal: true + vars: + LIB_NAME: "date" + DATE_OUTPUT_DIR: "{{.G_DEPS_DIR}}/{{.LIB_NAME}}-src" + run: "once" + cmds: + - task: ":utils:remote:download-and-extract-tar" + vars: + FILE_SHA256: "7a390f200f0ccd207e8cff6757e04817c1a0aec3e327b006b7eb451c57ee3538" + OUTPUT_DIR: "{{.DATE_OUTPUT_DIR}}" + URL: "https://github.com/HowardHinnant/date/archive/refs/tags/v3.0.1.tar.gz" + - >- + echo "set( + CLP_FFI_JS_DATE_SOURCE_DIRECTORY \"{{.DATE_OUTPUT_DIR}}\" + )" > "{{.G_DEPS_CMAKE_SETTINGS_DIR}}/{{.LIB_NAME}}.cmake" + download-fmt: internal: true vars: @@ -99,6 +142,23 @@ tasks: CLP_FFI_JS_NLOHMANN_JSON_SOURCE_DIRECTORY \"{{.NLOHMANN_JSON_OUTPUT_DIR}}\" )" > "{{.G_DEPS_CMAKE_SETTINGS_DIR}}/{{.LIB_NAME}}.cmake" + download-simdjson: + internal: true + vars: + LIB_NAME: "simdjson" + SIMDJSON_OUTPUT_DIR: "{{.G_DEPS_DIR}}/{{.LIB_NAME}}-src" + run: "once" + cmds: + - task: ":utils:remote:download-and-extract-tar" + vars: + FILE_SHA256: "07a1bb3587aac18fd6a10a83fe4ab09f1100ab39f0cb73baea1317826b9f9e0d" + OUTPUT_DIR: "{{.SIMDJSON_OUTPUT_DIR}}" + URL: "https://github.com/simdjson/simdjson/archive/refs/tags/v3.13.0.tar.gz" + - >- + echo "set( + CLP_FFI_JS_SIMDJSON_SOURCE_DIRECTORY \"{{.SIMDJSON_OUTPUT_DIR}}\" + )" > "{{.G_DEPS_CMAKE_SETTINGS_DIR}}/{{.LIB_NAME}}.cmake" + download-spdlog: internal: true vars: From 002e055db1d882314ce08a59d87bb0c0e7332f74 Mon Sep 17 00:00:00 2001 From: hoophalab <200652805+hoophalab@users.noreply.github.com> Date: Wed, 20 Aug 2025 17:26:13 -0400 Subject: [PATCH 2/7] fix comments --- CMakeLists.txt | 8 +++--- src/clp_ffi_js/ir/StreamReader.hpp | 2 ++ .../ir/StructuredIrStreamReader.cpp | 18 ++++++++---- src/clp_ffi_js/ir/StructuredIrUnitHandler.cpp | 2 +- src/clp_ffi_js/ir/StructuredIrUnitHandler.hpp | 3 +- .../ir/UnstructuredIrStreamReader.hpp | 2 +- src/clp_ffi_js/ir/query_methods.cpp | 28 ++++++++++++++----- src/clp_ffi_js/ir/query_methods.hpp | 14 +++++++++- taskfiles/deps.yaml | 1 - 9 files changed, 56 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fd688985..3d51e775 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -163,10 +163,10 @@ set(CLP_BUILD_CLP_S_SEARCH_KQL ON) set(CLP_BUILD_CLP_S_TIMESTAMPPATTERN ON) add_subdirectory(${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp_s) -add_subdirectory(${CMAKE_SOURCE_DIR}/build/deps/antlr-runtime-src) -add_subdirectory(${CMAKE_SOURCE_DIR}/build/deps/date-src) -add_subdirectory(${CMAKE_SOURCE_DIR}/build/deps/simdjson-src) -add_subdirectory(${CMAKE_SOURCE_DIR}/build/deps/spdlog-src) +add_subdirectory(${CLP_FFI_JS_ANTLR_RUNTIME_SOURCE_DIRECTORY}) +add_subdirectory(${CLP_FFI_JS_DATE_SOURCE_DIRECTORY}) +add_subdirectory(${CLP_FFI_JS_SIMDJSON_SOURCE_DIRECTORY}) +add_subdirectory(${CLP_FFI_JS_SPDLOG_SOURCE_DIRECTORY}) foreach(env ${CLP_FFI_JS_SUPPORTED_ENVIRONMENTS}) set(CLP_FFI_JS_BIN_NAME "ClpFfiJs-${env}") diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index a6269a91..6267dc29 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -103,6 +103,8 @@ class StreamReader { * Generates a filtered collection from all log events. * * @param log_level_filter Array of selected log levels + * @param kql_filter KQL expression to filter structured IR. Use an empty string to disable. For + * unstructured IR, the filter is ignored (a warning is logged). */ virtual void filter_log_events(LogLevelFilterTsType const& log_level_filter, std::string const& kql_filter) diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index bd58a91c..c41ebcc2 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -1,5 +1,6 @@ #include "StructuredIrStreamReader.hpp" +#include #include #include #include @@ -7,29 +8,32 @@ #include #include #include +#include +#include #include +#include #include #include #include #include -#include +#include #include #include -#include +#include #include #include #include +#include #include #include +#include #include #include #include #include -#include "clp_ffi_js/ir/query_methods.hpp" - namespace clp_ffi_js::ir { namespace { constexpr std::string_view cEmptyJsonStr{"{}"}; @@ -147,10 +151,10 @@ void StructuredIrStreamReader::filter_log_events( ) { m_filtered_log_event_map = std::nullopt; - if (0 != kql_filter.size()) { + if (false == kql_filter.empty()) { auto& reader{m_stream_reader_data_context->get_reader()}; reader.seek_from_begin(0); - auto indexes{query_index(reader, kql_filter)}; + auto indexes{query_log_event_indices(reader, kql_filter)}; m_filtered_log_event_map = std::make_optional(std::move(indexes)); } @@ -235,11 +239,13 @@ auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bo auto json_pair_result{log_event.serialize_to_json()}; if (json_pair_result.has_error()) { auto const error_code{json_pair_result.error()}; + // NOLINTBEGIN(bugprone-lambda-function-name) SPDLOG_ERROR( "Failed to deserialize log event to JSON: {}:{}", error_code.category().name(), error_code.message() ); + // NOLINTEND(bugprone-lambda-function-name) return std::string{cEmptyJsonStr}; } diff --git a/src/clp_ffi_js/ir/StructuredIrUnitHandler.cpp b/src/clp_ffi_js/ir/StructuredIrUnitHandler.cpp index c05904f1..c8833168 100644 --- a/src/clp_ffi_js/ir/StructuredIrUnitHandler.cpp +++ b/src/clp_ffi_js/ir/StructuredIrUnitHandler.cpp @@ -131,7 +131,7 @@ auto StructuredIrUnitHandler::SchemaTreeFullBranch::match( auto StructuredIrUnitHandler::handle_log_event( StructuredLogEvent&& log_event, - [[maybe_unused]] size_t log_event_ix + [[maybe_unused]] size_t log_event_idx ) -> clp::ffi::ir_stream::IRErrorCode { auto const timestamp = get_timestamp(log_event); auto const log_level = get_log_level(log_event); diff --git a/src/clp_ffi_js/ir/StructuredIrUnitHandler.hpp b/src/clp_ffi_js/ir/StructuredIrUnitHandler.hpp index e243d05e..53483276 100644 --- a/src/clp_ffi_js/ir/StructuredIrUnitHandler.hpp +++ b/src/clp_ffi_js/ir/StructuredIrUnitHandler.hpp @@ -106,7 +106,8 @@ class StructuredIrUnitHandler { * @param log_event * @return IRErrorCode::IRErrorCode_Success */ - [[nodiscard]] auto handle_log_event(StructuredLogEvent&& log_event, size_t log_event_ix) + [[nodiscard]] auto + handle_log_event(StructuredLogEvent&& log_event, [[maybe_unused]] size_t log_event_idx) -> clp::ffi::ir_stream::IRErrorCode; /** diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp index 860c9db0..45e7520c 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp @@ -60,7 +60,7 @@ class UnstructuredIrStreamReader : public StreamReader { void filter_log_events( LogLevelFilterTsType const& log_level_filter, - std::string const& kql_filter + [[maybe_unused]] std::string const& kql_filter ) override; /** diff --git a/src/clp_ffi_js/ir/query_methods.cpp b/src/clp_ffi_js/ir/query_methods.cpp index 77e5c0f9..949893e6 100644 --- a/src/clp_ffi_js/ir/query_methods.cpp +++ b/src/clp_ffi_js/ir/query_methods.cpp @@ -1,18 +1,29 @@ #include "query_methods.hpp" +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include #include #include +#include #include -#include +#include #include #include -#include +#include +#include +#include -#include "clp_ffi_js/ClpFfiJsException.hpp" -#include "spdlog/common.h" -#include "spdlog/spdlog.h" +#include namespace clp_ffi_js::ir { using clp::ffi::ir_stream::IRErrorCode; @@ -55,6 +66,7 @@ class LogEventIndexIrUnitHandler { std::vector m_deserialized_log_event_indexes; }; +namespace { auto trivial_new_projected_schema_tree_node_callback( [[maybe_unused]] bool is_auto_generated, [[maybe_unused]] clp::ffi::SchemaTree::Node::id_t node_id, @@ -62,8 +74,10 @@ auto trivial_new_projected_schema_tree_node_callback( ) -> ystdlib::error_handling::Result { return ystdlib::error_handling::success(); } +} // namespace -[[nodiscard]] auto query_index(clp::ReaderInterface& reader, std::string const& query_string) +[[nodiscard]] auto +query_log_event_indices(clp::ReaderInterface& reader, std::string const& query_string) -> std::vector { std::istringstream query_string_stream{query_string}; auto query_handler_result{ @@ -82,7 +96,7 @@ auto trivial_new_projected_schema_tree_node_callback( __FILENAME__, __LINE__, std::format( - "Failed to create qury handler: {} {}", + "Failed to create query handler: {} {}", error_code.category().name(), error_code.message() ) diff --git a/src/clp_ffi_js/ir/query_methods.hpp b/src/clp_ffi_js/ir/query_methods.hpp index 7988fe1d..4e854ecd 100644 --- a/src/clp_ffi_js/ir/query_methods.hpp +++ b/src/clp_ffi_js/ir/query_methods.hpp @@ -7,7 +7,19 @@ #include namespace clp_ffi_js::ir { -[[nodiscard]] auto query_index(clp::ReaderInterface& reader, std::string const& query_string) +/** + * This function searches through the log events in the IR stream provided by the `reader` for + * events that match the given `query_string`. It returns a vector of `size_t` representing the + * indices of the log events that satisfy the query. + * + * @param reader An interface for reading log events. + * @param query_string The query string to match against log events. + * @return A vector of indices of the log events that matched the query. + * @throws ClpFfiJsException if the preamble couldn't be deserialized or the Query counln't be + * executed. + */ +[[nodiscard]] auto +query_log_event_indices(clp::ReaderInterface& reader, std::string const& query_string) -> std::vector; } // namespace clp_ffi_js::ir diff --git a/taskfiles/deps.yaml b/taskfiles/deps.yaml index 615060e7..e8c9e25c 100644 --- a/taskfiles/deps.yaml +++ b/taskfiles/deps.yaml @@ -34,7 +34,6 @@ tasks: LIB_NAME: "antlr-runtime" ANTLR_RUNTIME_OUTPUT_DIR: "{{.G_DEPS_DIR}}/{{.LIB_NAME}}-src" ANTLR_RUNTIME_VERSION: "4.13.2" - INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.LIB_NAME}}-install" run: "once" cmds: - task: ":utils:remote:download-and-extract-tar" From 927dabde0189a39564f678a13a9659c68a086ba3 Mon Sep 17 00:00:00 2001 From: hoophalab <200652805+hoophalab@users.noreply.github.com> Date: Wed, 20 Aug 2025 18:11:55 -0400 Subject: [PATCH 3/7] fix coderabbit comments --- src/clp_ffi_js/ir/query_methods.cpp | 5 ++--- src/clp_ffi_js/ir/query_methods.hpp | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/clp_ffi_js/ir/query_methods.cpp b/src/clp_ffi_js/ir/query_methods.cpp index 949893e6..7cace83e 100644 --- a/src/clp_ffi_js/ir/query_methods.cpp +++ b/src/clp_ffi_js/ir/query_methods.cpp @@ -1,10 +1,8 @@ #include "query_methods.hpp" #include -#include #include #include -#include #include #include #include @@ -12,14 +10,15 @@ #include #include +#include #include #include #include #include #include +#include #include #include -#include #include #include diff --git a/src/clp_ffi_js/ir/query_methods.hpp b/src/clp_ffi_js/ir/query_methods.hpp index 4e854ecd..a210e6ee 100644 --- a/src/clp_ffi_js/ir/query_methods.hpp +++ b/src/clp_ffi_js/ir/query_methods.hpp @@ -15,7 +15,7 @@ namespace clp_ffi_js::ir { * @param reader An interface for reading log events. * @param query_string The query string to match against log events. * @return A vector of indices of the log events that matched the query. - * @throws ClpFfiJsException if the preamble couldn't be deserialized or the Query counln't be + * @throws ClpFfiJsException if the preamble couldn't be deserialized or the Query couldn't be * executed. */ [[nodiscard]] auto From 56903d6c1380874bc1c43976701a268e36f42e61 Mon Sep 17 00:00:00 2001 From: hoophalab <200652805+hoophalab@users.noreply.github.com> Date: Wed, 20 Aug 2025 18:19:42 -0400 Subject: [PATCH 4/7] fix coderabbit comments --- src/clp_ffi_js/ir/query_methods.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/clp_ffi_js/ir/query_methods.cpp b/src/clp_ffi_js/ir/query_methods.cpp index 7cace83e..bea28dd0 100644 --- a/src/clp_ffi_js/ir/query_methods.cpp +++ b/src/clp_ffi_js/ir/query_methods.cpp @@ -32,9 +32,9 @@ using clp::UtcOffset; class LogEventIndexIrUnitHandler { public: [[nodiscard]] auto - handle_log_event([[maybe_unused]] KeyValuePairLogEvent&& log_event, size_t log_event_ix) + handle_log_event([[maybe_unused]] KeyValuePairLogEvent&& log_event, size_t log_event_idx) -> IRErrorCode { - m_deserialized_log_event_indexes.push_back(log_event_ix); + m_deserialized_log_event_indexes.push_back(log_event_idx); return IRErrorCode::IRErrorCode_Success; } From d2eef8d8ceb7fda778013e98e4d60248c234b9a0 Mon Sep 17 00:00:00 2001 From: hoophalab <200652805+hoophalab@users.noreply.github.com> Date: Thu, 21 Aug 2025 14:38:44 -0400 Subject: [PATCH 5/7] address comments --- CMakeLists.txt | 12 +++++++++--- src/clp_ffi_js/ir/StreamReader.cpp | 14 +++++++++++++- src/clp_ffi_js/ir/StreamReader.hpp | 14 +++++++++++++- src/clp_ffi_js/ir/StructuredIrStreamReader.cpp | 18 +++++++++--------- .../ir/UnstructuredIrStreamReader.cpp | 9 ++++++--- src/clp_ffi_js/ir/query_methods.cpp | 12 ++++++------ 6 files changed, 56 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d51e775..1daf88a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,6 +150,12 @@ set(CLP_FFI_JS_SUPPORTED_ENVIRONMENTS set(CLP_BUILD_CLP_STRING_UTILS ON) add_subdirectory(${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp/string_utils) +# The `clp_s` component is linked because the IR search feature uses the same KQL syntax as `clp_s`. +# by reusing `clp_s`'s AST and KQL libraries. +# +# However, 'clp-ffi-js' doesn't currently require other 'clp_s' build targets (e.g., executables, +# archive readers, etc.). Disabling them prevents an increase in the binary size and reduces the +# number of dependencies needed during the build process. set(CLP_BUILD_EXECUTABLES OFF) set(CLP_BUILD_CLP_S_ARCHIVEREADER OFF) set(CLP_BUILD_CLP_S_ARCHIVEWRITER OFF) @@ -163,7 +169,10 @@ set(CLP_BUILD_CLP_S_SEARCH_KQL ON) set(CLP_BUILD_CLP_S_TIMESTAMPPATTERN ON) add_subdirectory(${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp_s) +set(ANTLR_BUILD_SHARED OFF) +set(ANTLR_BUILD_STATIC ON) add_subdirectory(${CLP_FFI_JS_ANTLR_RUNTIME_SOURCE_DIRECTORY}) + add_subdirectory(${CLP_FFI_JS_DATE_SOURCE_DIRECTORY}) add_subdirectory(${CLP_FFI_JS_SIMDJSON_SOURCE_DIRECTORY}) add_subdirectory(${CLP_FFI_JS_SPDLOG_SOURCE_DIRECTORY}) @@ -180,13 +189,10 @@ foreach(env ${CLP_FFI_JS_SUPPORTED_ENVIRONMENTS}) # Set up link options target_link_libraries(${CLP_FFI_JS_BIN_NAME} PRIVATE - antlr4_static clp_s::search::ast clp_s::search::kql embind - simdjson::simdjson spdlog::spdlog - string_utils ) set(CLP_FFI_JS_LINK_OPTIONS ${CLP_FFI_JS_COMMON_LINK_OPTIONS} diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index 15606e5b..cc7e417f 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -92,7 +92,19 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { "getFilteredLogEventMap", &clp_ffi_js::ir::StreamReader::get_filtered_log_event_map ) - .function("filterLogEvents", &clp_ffi_js::ir::StreamReader::filter_log_events) + .function( + "filterLogEvents", + emscripten::select_overload( + &clp_ffi_js::ir::StreamReader::filter_log_events + ) + ) + .function( + "filterLogEvents", + emscripten::select_overload< + void(clp_ffi_js::ir::LogLevelFilterTsType const&, std::string const&)>( + &clp_ffi_js::ir::StreamReader::filter_log_events + ) + ) .function("deserializeStream", &clp_ffi_js::ir::StreamReader::deserialize_stream) .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range) .function( diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 6267dc29..bbe96fdc 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -96,6 +96,8 @@ class StreamReader { /** * @return The filtered log events map. + * This is a sorted list of log event indices that match the filter. + * If all log events match the filter, it returns `null` or a vector of all log events. */ [[nodiscard]] virtual auto get_filtered_log_event_map() const -> FilteredLogEventMapTsType = 0; @@ -104,12 +106,22 @@ class StreamReader { * * @param log_level_filter Array of selected log levels * @param kql_filter KQL expression to filter structured IR. Use an empty string to disable. For - * unstructured IR, the filter is ignored (a warning is logged). + * unstructured IR, the filter is ignored and a warning is logged if `kql_filter` is not an + * empty string. */ virtual void filter_log_events(LogLevelFilterTsType const& log_level_filter, std::string const& kql_filter) = 0; + /** + * Generates a filtered collection from all log events. + * + * @param log_level_filter Array of selected log levels + */ + void filter_log_events(LogLevelFilterTsType const& log_level_filter) { + filter_log_events(log_level_filter, ""); + } + /** * Deserializes all log events in the stream. * diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index c41ebcc2..8046cf87 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include @@ -155,17 +155,17 @@ void StructuredIrStreamReader::filter_log_events( auto& reader{m_stream_reader_data_context->get_reader()}; reader.seek_from_begin(0); auto indexes{query_log_event_indices(reader, kql_filter)}; - m_filtered_log_event_map = std::make_optional(std::move(indexes)); + m_filtered_log_event_map = std::move(indexes); } if (false == log_level_filter.isNull()) { std::vector filtered_log_event_map; - auto filter_levels{ + auto const filter_levels{ emscripten::vecFromJSArray>(log_level_filter) }; - auto fn = [&](size_t log_event_idx) { + auto filter_and_collect_idx = [&](size_t const log_event_idx) { auto const& log_event{m_deserialized_log_events->at(log_event_idx)}; if (std::ranges::find( filter_levels, @@ -178,18 +178,18 @@ void StructuredIrStreamReader::filter_log_events( }; if (m_filtered_log_event_map.has_value()) { - for (auto log_event_idx : m_filtered_log_event_map.value()) { - fn(log_event_idx); + for (auto const log_event_idx : m_filtered_log_event_map.value()) { + filter_and_collect_idx(log_event_idx); } } else { for (size_t log_event_idx = 0; log_event_idx < m_deserialized_log_events->size(); ++log_event_idx) { - fn(log_event_idx); + filter_and_collect_idx(log_event_idx); } } - m_filtered_log_event_map = std::make_optional(std::move(filtered_log_event_map)); + m_filtered_log_event_map = std::move(filtered_log_event_map); } if (m_filtered_log_event_map.has_value() @@ -216,7 +216,7 @@ auto StructuredIrStreamReader::deserialize_stream() -> size_t { } auto const error{result.error()}; if (std::errc::result_out_of_range == error) { - SPDLOG_ERROR("File contains an incomplete IR stream"); + SPDLOG_WARN("File contains an incomplete IR stream"); break; } throw ClpFfiJsException{ diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index c684e974..59831b8a 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -84,9 +84,12 @@ void UnstructuredIrStreamReader::filter_log_events( LogLevelFilterTsType const& log_level_filter, [[maybe_unused]] std::string const& kql_filter ) { - SPDLOG_WARN( - "KQL filters aren't supported for unstructured IR streams, so they're being ignored." - ); + if (false == kql_filter.empty()) { + SPDLOG_WARN( + "KQL filters aren't supported for unstructured IR streams, so they're being " + "ignored." + ); + } generic_filter_log_events(m_filtered_log_event_map, log_level_filter, m_encoded_log_events); } diff --git a/src/clp_ffi_js/ir/query_methods.cpp b/src/clp_ffi_js/ir/query_methods.cpp index bea28dd0..0c13cd75 100644 --- a/src/clp_ffi_js/ir/query_methods.cpp +++ b/src/clp_ffi_js/ir/query_methods.cpp @@ -34,7 +34,7 @@ class LogEventIndexIrUnitHandler { [[nodiscard]] auto handle_log_event([[maybe_unused]] KeyValuePairLogEvent&& log_event, size_t log_event_idx) -> IRErrorCode { - m_deserialized_log_event_indexes.push_back(log_event_idx); + m_deserialized_log_event_indices.push_back(log_event_idx); return IRErrorCode::IRErrorCode_Success; } @@ -57,12 +57,12 @@ class LogEventIndexIrUnitHandler { return IRErrorCode::IRErrorCode_Success; } - [[nodiscard]] auto get_deserialized_log_event_indexes() const -> std::vector const& { - return m_deserialized_log_event_indexes; + [[nodiscard]] auto get_deserialized_log_event_indices() const -> std::vector const& { + return m_deserialized_log_event_indices; } private: - std::vector m_deserialized_log_event_indexes; + std::vector m_deserialized_log_event_indices; }; namespace { @@ -130,7 +130,7 @@ query_log_event_indices(clp::ReaderInterface& reader, std::string const& query_s } auto const error{result.error()}; if (std::errc::result_out_of_range == error) { - SPDLOG_ERROR("File contains an incomplete IR stream"); + SPDLOG_WARN("File contains an incomplete IR stream"); break; } throw ClpFfiJsException{ @@ -145,6 +145,6 @@ query_log_event_indices(clp::ReaderInterface& reader, std::string const& query_s }; } - return deserializer.get_ir_unit_handler().get_deserialized_log_event_indexes(); + return deserializer.get_ir_unit_handler().get_deserialized_log_event_indices(); } } // namespace clp_ffi_js::ir From 587c25f5e14d6df5f2e778c91fbf9e59ce9ae2f6 Mon Sep 17 00:00:00 2001 From: hoophalab <200652805+hoophalab@users.noreply.github.com> Date: Thu, 21 Aug 2025 15:09:05 -0400 Subject: [PATCH 6/7] rename indices --- CMakeLists.txt | 2 +- src/clp_ffi_js/ir/StructuredIrStreamReader.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1daf88a6..287d8fd0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,7 +150,7 @@ set(CLP_FFI_JS_SUPPORTED_ENVIRONMENTS set(CLP_BUILD_CLP_STRING_UTILS ON) add_subdirectory(${CLP_FFI_JS_CLP_SOURCE_DIRECTORY}/components/core/src/clp/string_utils) -# The `clp_s` component is linked because the IR search feature uses the same KQL syntax as `clp_s`. +# The `clp_s` component is linked because the IR search feature uses the same KQL syntax as `clp_s`, # by reusing `clp_s`'s AST and KQL libraries. # # However, 'clp-ffi-js' doesn't currently require other 'clp_s' build targets (e.g., executables, diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index 8046cf87..7a56beb8 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -154,8 +154,8 @@ void StructuredIrStreamReader::filter_log_events( if (false == kql_filter.empty()) { auto& reader{m_stream_reader_data_context->get_reader()}; reader.seek_from_begin(0); - auto indexes{query_log_event_indices(reader, kql_filter)}; - m_filtered_log_event_map = std::move(indexes); + auto indices{query_log_event_indices(reader, kql_filter)}; + m_filtered_log_event_map = std::move(indices); } if (false == log_level_filter.isNull()) { From 994a77e9ba4e9beb0baf3b8ba1b63975e9915cec Mon Sep 17 00:00:00 2001 From: hoophalab <200652805+hoophalab@users.noreply.github.com> Date: Thu, 21 Aug 2025 17:33:35 -0400 Subject: [PATCH 7/7] address comments --- src/clp_ffi_js/ir/StreamReader.hpp | 10 +++--- .../ir/StructuredIrStreamReader.cpp | 30 +++------------- src/clp_ffi_js/ir/decoding_methods.hpp | 35 +++++++++++++++++++ src/clp_ffi_js/ir/query_methods.cpp | 28 +++------------ src/clp_ffi_js/ir/query_methods.hpp | 4 +-- 5 files changed, 50 insertions(+), 57 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index bbe96fdc..58d68988 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -104,10 +104,10 @@ class StreamReader { /** * Generates a filtered collection from all log events. * - * @param log_level_filter Array of selected log levels - * @param kql_filter KQL expression to filter structured IR. Use an empty string to disable. For - * unstructured IR, the filter is ignored and a warning is logged if `kql_filter` is not an - * empty string. + * @param log_level_filter Array of selected log levels. + * @param kql_filter: A KQL expression used to filter kv-pairs. + * - For structured IR: the filter is applied when non-empty. + * - For unstructured IR: the filter is always ignored, and a warning is logged when non-empty. */ virtual void filter_log_events(LogLevelFilterTsType const& log_level_filter, std::string const& kql_filter) @@ -116,7 +116,7 @@ class StreamReader { /** * Generates a filtered collection from all log events. * - * @param log_level_filter Array of selected log levels + * @param log_level_filter Array of selected log levels. */ void filter_log_events(LogLevelFilterTsType const& log_level_filter) { filter_log_events(log_level_filter, ""); diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index 7a56beb8..d35862f5 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -149,13 +148,12 @@ void StructuredIrStreamReader::filter_log_events( LogLevelFilterTsType const& log_level_filter, std::string const& kql_filter ) { - m_filtered_log_event_map = std::nullopt; + m_filtered_log_event_map.reset(); if (false == kql_filter.empty()) { auto& reader{m_stream_reader_data_context->get_reader()}; reader.seek_from_begin(0); - auto indices{query_log_event_indices(reader, kql_filter)}; - m_filtered_log_event_map = std::move(indices); + m_filtered_log_event_map.emplace(collect_matched_log_event_indices(reader, kql_filter)); } if (false == log_level_filter.isNull()) { @@ -182,7 +180,7 @@ void StructuredIrStreamReader::filter_log_events( filter_and_collect_idx(log_event_idx); } } else { - for (size_t log_event_idx = 0; log_event_idx < m_deserialized_log_events->size(); + for (size_t log_event_idx{0}; log_event_idx < m_deserialized_log_events->size(); ++log_event_idx) { filter_and_collect_idx(log_event_idx); @@ -209,27 +207,7 @@ auto StructuredIrStreamReader::deserialize_stream() -> size_t { auto& reader{m_stream_reader_data_context->get_reader()}; auto& deserializer = m_stream_reader_data_context->get_deserializer(); - while (false == deserializer.is_stream_completed()) { - auto result{deserializer.deserialize_next_ir_unit(reader)}; - if (false == result.has_error()) { - continue; - } - auto const error{result.error()}; - if (std::errc::result_out_of_range == error) { - SPDLOG_WARN("File contains an incomplete IR stream"); - break; - } - throw ClpFfiJsException{ - clp::ErrorCode::ErrorCode_Corrupt, - __FILENAME__, - __LINE__, - std::format( - "Failed to deserialize IR unit: {}:{}", - error.category().name(), - error.message() - ) - }; - } + deserialize_log_events(deserializer, reader); return m_deserialized_log_events->size(); } diff --git a/src/clp_ffi_js/ir/decoding_methods.hpp b/src/clp_ffi_js/ir/decoding_methods.hpp index 26d25800..e08e0206 100644 --- a/src/clp_ffi_js/ir/decoding_methods.hpp +++ b/src/clp_ffi_js/ir/decoding_methods.hpp @@ -1,10 +1,15 @@ #ifndef CLP_FFI_JS_IR_DECODING_METHODS_HPP #define CLP_FFI_JS_IR_DECODING_METHODS_HPP +#include +#include +#include #include #include +#include #include +#include namespace clp_ffi_js::ir { /** @@ -32,6 +37,36 @@ auto rewind_reader_and_validate_encoding_type(clp::ReaderInterface& reader) -> v * @return The converted JavaScript object. */ [[nodiscard]] auto convert_metadata_to_js_object(nlohmann::json const& metadata) -> MetadataTsType; + +template < + clp::ffi::ir_stream::IrUnitHandlerReq IrUnitHandlerType, + clp::ffi::ir_stream::search::QueryHandlerReq QueryHandlerType> +auto deserialize_log_events( + clp::ffi::ir_stream::Deserializer& deserializer, + clp::ReaderInterface& reader +) -> void { + while (false == deserializer.is_stream_completed()) { + auto const result{deserializer.deserialize_next_ir_unit(reader)}; + if (false == result.has_error()) { + continue; + } + auto const error{result.error()}; + if (std::errc::result_out_of_range == error) { + SPDLOG_WARN("File contains an incomplete IR stream"); + break; + } + throw ClpFfiJsException{ + clp::ErrorCode::ErrorCode_Corrupt, + __FILENAME__, + __LINE__, + std::format( + "Failed to deserialize IR unit: {}:{}", + error.category().name(), + error.message() + ) + }; + } +} } // namespace clp_ffi_js::ir #endif // CLP_FFI_JS_IR_DECODING_METHODS_HPP diff --git a/src/clp_ffi_js/ir/query_methods.cpp b/src/clp_ffi_js/ir/query_methods.cpp index 0c13cd75..1e2b0c40 100644 --- a/src/clp_ffi_js/ir/query_methods.cpp +++ b/src/clp_ffi_js/ir/query_methods.cpp @@ -23,8 +23,10 @@ #include #include +#include namespace clp_ffi_js::ir { +namespace { using clp::ffi::ir_stream::IRErrorCode; using clp::ffi::KeyValuePairLogEvent; using clp::UtcOffset; @@ -65,7 +67,6 @@ class LogEventIndexIrUnitHandler { std::vector m_deserialized_log_event_indices; }; -namespace { auto trivial_new_projected_schema_tree_node_callback( [[maybe_unused]] bool is_auto_generated, [[maybe_unused]] clp::ffi::SchemaTree::Node::id_t node_id, @@ -76,7 +77,7 @@ auto trivial_new_projected_schema_tree_node_callback( } // namespace [[nodiscard]] auto -query_log_event_indices(clp::ReaderInterface& reader, std::string const& query_string) +collect_matched_log_event_indices(clp::ReaderInterface& reader, std::string const& query_string) -> std::vector { std::istringstream query_string_stream{query_string}; auto query_handler_result{ @@ -123,28 +124,7 @@ query_log_event_indices(clp::ReaderInterface& reader, std::string const& query_s } auto& deserializer{deserializer_result.value()}; - while (false == deserializer.is_stream_completed()) { - auto result{deserializer.deserialize_next_ir_unit(reader)}; - if (false == result.has_error()) { - continue; - } - auto const error{result.error()}; - if (std::errc::result_out_of_range == error) { - SPDLOG_WARN("File contains an incomplete IR stream"); - break; - } - throw ClpFfiJsException{ - clp::ErrorCode::ErrorCode_Corrupt, - __FILENAME__, - __LINE__, - std::format( - "Failed to deserialize IR unit: {}:{}", - error.category().name(), - error.message() - ) - }; - } - + deserialize_log_events(deserializer, reader); return deserializer.get_ir_unit_handler().get_deserialized_log_event_indices(); } } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/query_methods.hpp b/src/clp_ffi_js/ir/query_methods.hpp index a210e6ee..40801806 100644 --- a/src/clp_ffi_js/ir/query_methods.hpp +++ b/src/clp_ffi_js/ir/query_methods.hpp @@ -18,8 +18,8 @@ namespace clp_ffi_js::ir { * @throws ClpFfiJsException if the preamble couldn't be deserialized or the Query couldn't be * executed. */ -[[nodiscard]] auto -query_log_event_indices(clp::ReaderInterface& reader, std::string const& query_string) +auto +collect_matched_log_event_indices(clp::ReaderInterface& reader, std::string const& query_string) -> std::vector; } // namespace clp_ffi_js::ir