From 88d83bf419e5dddb54475e9b6720d751b0d4f461 Mon Sep 17 00:00:00 2001 From: Abigail Matthews Date: Mon, 2 Dec 2024 00:48:07 -0500 Subject: [PATCH] test(clp-s): Add end-to-end test case for compression and extraction. (#595) --- components/core/CMakeLists.txt | 54 +++++- .../core/tests/test-clp_s-end_to_end.cpp | 158 ++++++++++++++++++ .../test_no_floats_sorted.jsonl | 4 + .../install-prebuilt-packages.sh | 2 + .../ubuntu-focal/install-prebuilt-packages.sh | 1 + .../ubuntu-jammy/install-prebuilt-packages.sh | 1 + 6 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 components/core/tests/test-clp_s-end_to_end.cpp create mode 100644 components/core/tests/test_log_files/test_no_floats_sorted.jsonl diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index e5c9b06c8..1b4fdb1be 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -241,6 +241,42 @@ add_subdirectory(src/clp_s) add_subdirectory(src/reducer) set(SOURCE_FILES_clp_s_unitTest + src/clp_s/ArchiveReader.cpp + src/clp_s/ArchiveReader.hpp + src/clp_s/ArchiveWriter.cpp + src/clp_s/ArchiveWriter.hpp + src/clp_s/ColumnReader.cpp + src/clp_s/ColumnReader.hpp + src/clp_s/ColumnWriter.cpp + src/clp_s/ColumnWriter.hpp + src/clp_s/DictionaryEntry.cpp + src/clp_s/DictionaryEntry.hpp + src/clp_s/DictionaryWriter.cpp + src/clp_s/DictionaryWriter.hpp + src/clp_s/FileReader.cpp + src/clp_s/FileReader.hpp + src/clp_s/FileWriter.cpp + src/clp_s/FileWriter.hpp + src/clp_s/JsonConstructor.cpp + src/clp_s/JsonConstructor.hpp + src/clp_s/JsonFileIterator.cpp + src/clp_s/JsonFileIterator.hpp + src/clp_s/JsonParser.cpp + src/clp_s/JsonParser.hpp + src/clp_s/PackedStreamReader.cpp + src/clp_s/PackedStreamReader.hpp + src/clp_s/ReaderUtils.cpp + src/clp_s/ReaderUtils.hpp + src/clp_s/Schema.cpp + src/clp_s/Schema.hpp + src/clp_s/SchemaMap.cpp + src/clp_s/SchemaMap.hpp + src/clp_s/SchemaReader.cpp + src/clp_s/SchemaReader.hpp + src/clp_s/SchemaTree.cpp + src/clp_s/SchemaTree.hpp + src/clp_s/SchemaWriter.cpp + src/clp_s/SchemaWriter.hpp src/clp_s/search/AndExpr.cpp src/clp_s/search/AndExpr.hpp src/clp_s/search/BooleanLiteral.cpp @@ -273,11 +309,24 @@ set(SOURCE_FILES_clp_s_unitTest src/clp_s/search/StringLiteral.hpp src/clp_s/search/Transformation.hpp src/clp_s/search/Value.hpp - src/clp_s/SchemaTree.hpp + src/clp_s/TimestampDictionaryReader.cpp + src/clp_s/TimestampDictionaryReader.hpp + src/clp_s/TimestampDictionaryWriter.cpp + src/clp_s/TimestampDictionaryWriter.hpp + src/clp_s/TimestampEntry.cpp + src/clp_s/TimestampEntry.hpp src/clp_s/TimestampPattern.cpp src/clp_s/TimestampPattern.hpp src/clp_s/Utils.cpp src/clp_s/Utils.hpp + src/clp_s/VariableDecoder.cpp + src/clp_s/VariableDecoder.hpp + src/clp_s/VariableEncoder.cpp + src/clp_s/VariableEncoder.hpp + src/clp_s/ZstdCompressor.cpp + src/clp_s/ZstdCompressor.hpp + src/clp_s/ZstdDecompressor.cpp + src/clp_s/ZstdDecompressor.hpp ) set(SOURCE_FILES_unitTest @@ -499,6 +548,7 @@ set(SOURCE_FILES_unitTest tests/LogSuppressor.hpp tests/test-Array.cpp tests/test-BufferedFileReader.cpp + tests/test-clp_s-end_to_end.cpp tests/test-EncodedVariableInterpreter.cpp tests/test-encoding_methods.cpp tests/test-ffi_IrUnitHandlerInterface.cpp @@ -542,6 +592,8 @@ target_link_libraries(unitTest log_surgeon::log_surgeon LibArchive::LibArchive MariaDBClient::MariaDBClient + ${MONGOCXX_TARGET} + simdjson spdlog::spdlog OpenSSL::Crypto ${sqlite_LIBRARY_DEPENDENCIES} diff --git a/components/core/tests/test-clp_s-end_to_end.cpp b/components/core/tests/test-clp_s-end_to_end.cpp new file mode 100644 index 000000000..3f138b472 --- /dev/null +++ b/components/core/tests/test-clp_s-end_to_end.cpp @@ -0,0 +1,158 @@ +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "../src/clp_s/JsonConstructor.hpp" +#include "../src/clp_s/JsonParser.hpp" + +constexpr std::string_view cTestEndToEndArchiveDirectory{"test-end-to-end-archive"}; +constexpr std::string_view cTestEndToEndOutputDirectory{"test-end-to-end-out"}; +constexpr std::string_view cTestEndToEndOutputSortedJson{"test-end-to-end_sorted.jsonl"}; +constexpr std::string_view cTestEndToEndInputFileDirectory{"test_log_files"}; +constexpr std::string_view cTestEndToEndInputFile{"test_no_floats_sorted.jsonl"}; + +namespace { +/** + * A class that deletes the directories and files created by test cases, both before and after each + * test case where the class is instantiated. + */ +class TestOutputCleaner { +public: + TestOutputCleaner() { delete_files(); } + + ~TestOutputCleaner() { delete_files(); } + + // Delete copy & move constructors and assignment operators + TestOutputCleaner(TestOutputCleaner const&) = delete; + TestOutputCleaner(TestOutputCleaner&&) = delete; + auto operator=(TestOutputCleaner const&) -> TestOutputCleaner& = delete; + auto operator=(TestOutputCleaner&&) -> TestOutputCleaner& = delete; + +private: + static void delete_files() { + std::filesystem::remove_all(cTestEndToEndArchiveDirectory); + std::filesystem::remove_all(cTestEndToEndOutputDirectory); + std::filesystem::remove(cTestEndToEndOutputSortedJson); + } +}; + +auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path; +auto get_test_input_local_path() -> std::string; +void compress(bool structurize_arrays); +auto extract() -> std::filesystem::path; +void compare(std::filesystem::path const& extracted_json_path); + +auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path { + return std::filesystem::path{cTestEndToEndInputFileDirectory} / cTestEndToEndInputFile; +} + +auto get_test_input_local_path() -> std::string { + std::filesystem::path const current_file_path{__FILE__}; + auto const tests_dir{current_file_path.parent_path()}; + return (tests_dir / get_test_input_path_relative_to_tests_dir()).string(); +} + +void compress(bool structurize_arrays) { + constexpr auto cDefaultTargetEncodedSize = 8ULL * 1024 * 1024 * 1024; // 8 GiB + constexpr auto cDefaultMaxDocumentSize = 512ULL * 1024 * 1024; // 512 MiB + constexpr auto cDefaultMinTableSize = 1ULL * 1024 * 1024; // 1 MiB + constexpr auto cDefaultCompressionLevel = 3; + constexpr auto cDefaultPrintArchiveStats = false; + + std::filesystem::create_directory(cTestEndToEndArchiveDirectory); + REQUIRE((std::filesystem::is_directory(cTestEndToEndArchiveDirectory))); + + clp_s::JsonParserOption parser_option{}; + parser_option.file_paths.push_back(get_test_input_local_path()); + parser_option.archives_dir = cTestEndToEndArchiveDirectory; + parser_option.target_encoded_size = cDefaultTargetEncodedSize; + parser_option.max_document_size = cDefaultMaxDocumentSize; + parser_option.min_table_size = cDefaultMinTableSize; + parser_option.compression_level = cDefaultCompressionLevel; + parser_option.print_archive_stats = cDefaultPrintArchiveStats; + parser_option.structurize_arrays = structurize_arrays; + + clp_s::JsonParser parser{parser_option}; + REQUIRE(parser.parse()); + parser.store(); + + REQUIRE((false == std::filesystem::is_empty(cTestEndToEndArchiveDirectory))); +} + +auto extract() -> std::filesystem::path { + constexpr auto cDefaultOrdered = false; + constexpr auto cDefaultTargetOrderedChunkSize = 0; + + std::filesystem::create_directory(cTestEndToEndOutputDirectory); + REQUIRE(std::filesystem::is_directory(cTestEndToEndOutputDirectory)); + + clp_s::JsonConstructorOption constructor_option{}; + constructor_option.archives_dir = cTestEndToEndArchiveDirectory; + constructor_option.output_dir = cTestEndToEndOutputDirectory; + constructor_option.ordered = cDefaultOrdered; + constructor_option.target_ordered_chunk_size = cDefaultTargetOrderedChunkSize; + for (auto const& entry : std::filesystem::directory_iterator(constructor_option.archives_dir)) { + if (false == entry.is_directory()) { + // Skip non-directories + continue; + } + + constructor_option.archive_id = entry.path().filename(); + clp_s::JsonConstructor constructor{constructor_option}; + constructor.store(); + } + std::filesystem::path extracted_json_path{cTestEndToEndOutputDirectory}; + extracted_json_path /= "original"; + REQUIRE(std::filesystem::exists(extracted_json_path)); + + return extracted_json_path; +} + +// Silence the checks below since our use of `std::system` is safe in the context of testing. +// NOLINTBEGIN(cert-env33-c,concurrency-mt-unsafe) +void compare(std::filesystem::path const& extracted_json_path) { + int result{std::system("command -v jq >/dev/null 2>&1")}; + REQUIRE((0 == result)); + auto command = fmt::format( + "jq --sort-keys --compact-output '.' {} | sort > {}", + extracted_json_path.string(), + cTestEndToEndOutputSortedJson + ); + result = std::system(command.c_str()); + REQUIRE((0 == result)); + + REQUIRE((false == std::filesystem::is_empty(cTestEndToEndOutputSortedJson))); + + result = std::system("command -v diff >/dev/null 2>&1"); + REQUIRE((0 == result)); + command = fmt::format( + "diff --unified {} {} > /dev/null", + cTestEndToEndOutputSortedJson, + get_test_input_local_path() + ); + result = std::system(command.c_str()); + REQUIRE((true == WIFEXITED(result))); + REQUIRE((0 == WEXITSTATUS(result))); +} + +// NOLINTEND(cert-env33-c,concurrency-mt-unsafe) +} // namespace + +TEST_CASE("clp-s-compress-extract-no-floats", "[clp-s][end-to-end]") { + auto structurize_arrays = GENERATE(true, false); + + TestOutputCleaner const test_cleanup; + + compress(structurize_arrays); + + auto extracted_json_path = extract(); + + compare(extracted_json_path); +} diff --git a/components/core/tests/test_log_files/test_no_floats_sorted.jsonl b/components/core/tests/test_log_files/test_no_floats_sorted.jsonl new file mode 100644 index 000000000..8dfcd85f6 --- /dev/null +++ b/components/core/tests/test_log_files/test_no_floats_sorted.jsonl @@ -0,0 +1,4 @@ +{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true} +{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true} +{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"non_empty_object2":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true} +{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"nonempty_array":[1,2,3,4,5],"null":null,"string":"short_string","true":true} diff --git a/components/core/tools/scripts/lib_install/centos-stream-9/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/centos-stream-9/install-prebuilt-packages.sh index e90f54733..66ea4ac4f 100755 --- a/components/core/tools/scripts/lib_install/centos-stream-9/install-prebuilt-packages.sh +++ b/components/core/tools/scripts/lib_install/centos-stream-9/install-prebuilt-packages.sh @@ -8,9 +8,11 @@ set -u dnf install -y \ cmake \ + diffutils \ gcc-c++ \ git \ java-11-openjdk \ + jq \ libarchive-devel \ libcurl-devel \ libzstd-devel \ diff --git a/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh index 706674764..8997ffe01 100755 --- a/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh +++ b/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh @@ -17,6 +17,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ gcc \ gcc-10 \ git \ + jq \ libcurl4 \ libcurl4-openssl-dev \ libmariadb-dev \ diff --git a/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh index 92d965b9b..9ed6b9b10 100755 --- a/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh +++ b/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh @@ -14,6 +14,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ curl \ build-essential \ git \ + jq \ libboost-filesystem-dev \ libboost-iostreams-dev \ libboost-program-options-dev \