From dceb5648e8a2df0ecc65cbe81a07f538f5538359 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Mon, 25 Nov 2024 09:53:02 -0500 Subject: [PATCH 01/35] Add lzma download and port lzma scripts --- components/core/.clang-format | 2 +- components/core/CMakeLists.txt | 24 +- .../clp/streaming_compression/Constants.hpp | 1 + .../streaming_compression/lzma/Compressor.cpp | 303 +++++++++++++++ .../streaming_compression/lzma/Compressor.hpp | 133 +++++++ .../streaming_compression/lzma/Constants.hpp | 15 + .../lzma/Decompressor.cpp | 362 ++++++++++++++++++ .../lzma/Decompressor.hpp | 162 ++++++++ .../core/tests/test-StreamingCompression.cpp | 1 + .../core/tools/scripts/lib_install/liblzma.sh | 66 ++++ .../install-packages-from-source.sh | 1 + .../ubuntu-focal/install-prebuilt-packages.sh | 1 + .../install-packages-from-source.sh | 1 + .../ubuntu-jammy/install-prebuilt-packages.sh | 1 + 14 files changed, 1071 insertions(+), 2 deletions(-) create mode 100644 components/core/src/clp/streaming_compression/lzma/Compressor.cpp create mode 100644 components/core/src/clp/streaming_compression/lzma/Compressor.hpp create mode 100644 components/core/src/clp/streaming_compression/lzma/Constants.hpp create mode 100644 components/core/src/clp/streaming_compression/lzma/Decompressor.cpp create mode 100644 components/core/src/clp/streaming_compression/lzma/Decompressor.hpp create mode 100755 components/core/tools/scripts/lib_install/liblzma.sh diff --git a/components/core/.clang-format b/components/core/.clang-format index ff65adbae..4d0d3a87c 100644 --- a/components/core/.clang-format +++ b/components/core/.clang-format @@ -4,7 +4,7 @@ IncludeCategories: # NOTE: A header is grouped by first matching regex # Library headers. Update when adding new libraries. # NOTE: clang-format retains leading white-space on a line in violation of the YAML spec. - - Regex: "<(absl|antlr4|archive|boost|bsoncxx|catch2|curl|date|fmt|json|log_surgeon|mongocxx\ + - Regex: "<(absl|antlr4|archive|boost|bsoncxx|catch2|curl|date|fmt|json|log_surgeon|lzma|mongocxx\ |msgpack|mysql|openssl|outcome|regex_utils|simdjson|spdlog|sqlite3|string_utils|yaml-cpp|zstd)" Priority: 3 # C system headers diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index e5c9b06c8..92bb6af19 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -11,13 +11,16 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # Set general compressor set(GENERAL_COMPRESSOR "zstd" CACHE STRING "The general-purpose compressor used as the 2nd-stage compressor") -set_property(CACHE GENERAL_COMPRESSOR PROPERTY STRINGS passthrough zstd) +set_property(CACHE GENERAL_COMPRESSOR PROPERTY STRINGS passthrough zstd lzma) if ("${GENERAL_COMPRESSOR}" STREQUAL "passthrough") add_definitions(-DUSE_PASSTHROUGH_COMPRESSION=1) message(STATUS "Using passthrough compression") elseif ("${GENERAL_COMPRESSOR}" STREQUAL "zstd") add_definitions(-DUSE_ZSTD_COMPRESSION=1) message(STATUS "Using Zstandard compression") +elseif ("${GENERAL_COMPRESSOR}" STREQUAL "lzma") + add_definitions(-DUSE_LZMA_COMPRESSION=1) + message(STATUS "Using Lempel–Ziv–Markov chain Algorithm compression") else() message(SEND_ERROR "GENERAL_COMPRESSOR=${GENERAL_COMPRESSOR} is unimplemented.") endif() @@ -224,6 +227,19 @@ else() message(FATAL_ERROR "Could not find ${CLP_LIBS_STRING} libraries for ZStd") endif() +# Find and setup LZMA Library +# Notice that we don't have support to switch between static and shared libraries. +# TODO: add a script in ./cmake/Modules to resolve .a vs. .so +find_package(LibLZMA REQUIRED) +if(LIBLZMA_FOUND) + message(STATUS "Found LIBLZMA_FOUND ${LIBLZMA_VERSION_STRING}") + message(STATUS "Lzma library location: ${LIBLZMA_LIBRARIES}") +else() + message(FATAL_ERROR "Could not find ${CLP_LIBS_STRING} libraries for LIBLZMA_FOUND") +endif() +include_directories(${LIBLZMA_INCLUDE_DIRS}) +message("LZMA Include Dir: ${LIBLZMA_INCLUDE_DIRS}") + # sqlite dependencies set(sqlite_DYNAMIC_LIBS "dl;m;pthread") include(cmake/Modules/FindLibraryDependencies.cmake) @@ -462,6 +478,11 @@ set(SOURCE_FILES_unitTest src/clp/streaming_compression/Compressor.hpp src/clp/streaming_compression/Constants.hpp src/clp/streaming_compression/Decompressor.hpp + src/clp/streaming_compression/lzma/Compressor.cpp + src/clp/streaming_compression/lzma/Compressor.hpp + src/clp/streaming_compression/lzma/Decompressor.cpp + src/clp/streaming_compression/lzma/Decompressor.hpp + src/clp/streaming_compression/lzma/Constants.hpp src/clp/streaming_compression/passthrough/Compressor.cpp src/clp/streaming_compression/passthrough/Compressor.hpp src/clp/streaming_compression/passthrough/Decompressor.cpp @@ -549,6 +570,7 @@ target_link_libraries(unitTest clp::regex_utils clp::string_utils yaml-cpp::yaml-cpp + ${LIBLZMA_LIBRARIES} ZStd::ZStd ) target_compile_features(unitTest diff --git a/components/core/src/clp/streaming_compression/Constants.hpp b/components/core/src/clp/streaming_compression/Constants.hpp index 4649c2e98..080f3a20b 100644 --- a/components/core/src/clp/streaming_compression/Constants.hpp +++ b/components/core/src/clp/streaming_compression/Constants.hpp @@ -7,6 +7,7 @@ namespace clp::streaming_compression { enum class CompressorType : uint8_t { ZSTD = 0x10, + LZMA = 0x20, Passthrough = 0xFF, }; } // namespace clp::streaming_compression diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp new file mode 100644 index 000000000..f10ec915b --- /dev/null +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -0,0 +1,303 @@ +#include "Compressor.hpp" + +// spdlog +#include + +// Project headers +#include "../../Defs.h" + +// File-scope constants +static constexpr size_t cCompressedStreamBlockBufferSize = 4096; // 4KiB + +namespace streaming_compression::lzma { +Compressor::LzmaOption Compressor::m_option; + +Compressor::Compressor() + : ::streaming_compression::Compressor(CompressorType::LZMA), + m_compression_stream_contains_data(false), + m_compressed_stream_file_writer(nullptr), + m_compression_stream(nullptr) { + m_compressed_stream_block_buffer = std::make_unique(cCompressedStreamBlockBufferSize); + m_compression_stream = new lzma_stream; + memset(m_compression_stream, 0, sizeof(lzma_stream)); +} + +Compressor::~Compressor() { + if (nullptr != m_compression_stream) { + delete m_compression_stream; + } +} + +void Compressor::init_lzma_encoder(lzma_stream* strm) { + lzma_options_lzma options; + if (lzma_lzma_preset(&options, m_option.get_compression_level())) { + SPDLOG_ERROR("Failed to initialize LZMA options."); + throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); + } + options.dict_size = m_option.get_dict_size(); + lzma_filter filters[2]{ + {LZMA_FILTER_LZMA2, &options}, + {LZMA_VLI_UNKNOWN, nullptr}, + }; + + // Initialize the encoder using a preset. Set the integrity to check + // to CRC64, which is the default in the xz command line tool. If + // the .xz file needs to be decompressed with XZ Embedded, use + // LZMA_CHECK_CRC32 instead. + lzma_ret ret = lzma_stream_encoder(strm, filters, LZMA_CHECK_CRC64); + + // Return successfully if the initialization went fine. + if (ret == LZMA_OK) { + return; + } + + // Something went wrong. The possible errors are documented in + // lzma/container.h (src/liblzma/api/lzma/container.h in the source + // package or e.g. /usr/include/lzma/container.h depending on the + // install prefix). + char const* msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = "Memory allocation failed"; + break; + + case LZMA_OPTIONS_ERROR: + msg = "Specified preset is not supported"; + break; + + case LZMA_UNSUPPORTED_CHECK: + msg = "Specified integrity check is not supported"; + break; + + default: + // This is most likely LZMA_PROG_ERROR indicating a bug in + // this program or in liblzma. It is inconvenient to have a + // separate error message for errors that should be impossible + // to occur, but knowing the error code is important for + // debugging. That's why it is good to print the error code + // at least when there is no good error message to show. + msg = "Unknown error, possibly a bug"; + break; + } + + SPDLOG_ERROR("Error initializing the encoder: {} (error code {})", msg, int(ret)); + throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); +} + +void Compressor::open(FileWriter& file_writer, int compression_level) { + if (nullptr != m_compressed_stream_file_writer) { + throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); + } + + if (false == (0 <= compression_level && compression_level <= 9)) { + throw OperationFailed(ErrorCode_Unsupported, __FILENAME__, __LINE__); + } + if (compression_level != m_option.get_compression_level()) { + m_option.set_compression_level(compression_level); + } + + init_lzma_encoder(m_compression_stream); + // Setup compressed stream parameters + m_compression_stream->next_in = nullptr; + m_compression_stream->avail_in = 0; + m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; + + m_compressed_stream_file_writer = &file_writer; + + m_uncompressed_stream_pos = 0; +} + +void Compressor::close() { + if (nullptr == m_compressed_stream_file_writer) { + throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); + } + + flush_and_close_compression_stream(); + m_compressed_stream_file_writer = nullptr; +} + +void Compressor::write(char const* data, size_t data_length) { + if (nullptr == m_compressed_stream_file_writer) { + throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); + } + + if (0 == data_length) { + // Nothing needs to be done because we do not need to compress anything + return; + } + if (nullptr == data) { + throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); + } + lzma_action action = LZMA_RUN; + m_compression_stream->next_in = reinterpret_cast(const_cast(data)); + m_compression_stream->avail_in = data_length; + + // Compress all data + bool hit_input_eof = false; + while (!hit_input_eof) { + lzma_ret return_value = lzma_code(m_compression_stream, action); + switch (return_value) { + case LZMA_OK: + case LZMA_BUF_ERROR: + break; + case LZMA_STREAM_END: + hit_input_eof = true; + break; + default: + SPDLOG_ERROR("lzma() returned an unexpected value - {}.", int(return_value)); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } + + if (0 == m_compression_stream->avail_in) { + // No more data to compress + break; + } + + // Write output buffer to file if it's full + if (0 == m_compression_stream->avail_out) { + m_compressed_stream_file_writer->write( + reinterpret_cast(m_compressed_stream_block_buffer.get()), + cCompressedStreamBlockBufferSize + ); + m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; + } + } + + // Write any compressed data + if (m_compression_stream->avail_out < cCompressedStreamBlockBufferSize) { + m_compressed_stream_file_writer->write( + reinterpret_cast(m_compressed_stream_block_buffer.get()), + cCompressedStreamBlockBufferSize - m_compression_stream->avail_out + ); + m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; + } + + m_compression_stream->next_in = nullptr; + + m_compression_stream_contains_data = true; + m_uncompressed_stream_pos += data_length; +} + +void Compressor::flush() { + if (false == m_compression_stream_contains_data) { + return; + } + // Z_NO_FLUSH - deflate decides how much data to accumulate before producing output + // Z_SYNC_FLUSH - All pending output flushed to output buf and output aligned to byte + // boundary (completes current block and follows it with empty block that is 3 bits plus + // filler to next byte, followed by 4 bytes Z_PARTIAL_FLUSH - Same as Z_SYNC_FLUSH but + // output not aligned to byte boundary (completes current block and follows it with empty + // fixed codes block that is 10 bits long) Z_BLOCK - Same as Z_SYNC_FLUSH but output not + // aligned on a byte boundary and up to 7 bits of current block held to be written + // Z_FULL_FLUSH - Same as Z_SYNC_FLUSH but compression state reset so that decompression can + // restart from this point if the previous compressed data has been damaged Z_FINISH - + // Pending output flushed and deflate returns Z_STREAM_END if there was enough output space, + // or Z_OK or Z_BUF_ERROR if it needs to be called again with more space + // + + bool flush_complete = false; + while (true) { + lzma_ret return_value = lzma_code(m_compression_stream, LZMA_SYNC_FLUSH); + switch (return_value) { + case LZMA_STREAM_END: + flush_complete = true; + break; + case LZMA_OK: + case LZMA_BUF_ERROR: + break; + default: + SPDLOG_ERROR("lzma() returned an unexpected value - {}.", int(return_value)); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } + if (flush_complete) { + break; + } + + // Write output buffer to file if it's full + if (0 == m_compression_stream->avail_out) { + m_compressed_stream_file_writer->write( + reinterpret_cast(m_compressed_stream_block_buffer.get()), + cCompressedStreamBlockBufferSize + ); + m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; + } + } + + // Write any compressed data + if (m_compression_stream->avail_out < cCompressedStreamBlockBufferSize) { + m_compressed_stream_file_writer->write( + reinterpret_cast(m_compressed_stream_block_buffer.get()), + cCompressedStreamBlockBufferSize - m_compression_stream->avail_out + ); + m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; + } + + m_compression_stream_contains_data = false; +} + +ErrorCode Compressor::try_get_pos(size_t& pos) const { + if (nullptr == m_compressed_stream_file_writer) { + return ErrorCode_NotInit; + } + + pos = m_uncompressed_stream_pos; + return ErrorCode_Success; +} + +void Compressor::flush_and_close_compression_stream() { + if (nullptr == m_compressed_stream_file_writer) { + throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); + } + + bool flush_complete = false; + while (true) { + lzma_ret return_value = lzma_code(m_compression_stream, LZMA_FINISH); + switch (return_value) { + case LZMA_OK: + case LZMA_BUF_ERROR: + break; + case LZMA_STREAM_END: + flush_complete = true; + break; + default: + // SPDLOG_ERROR("deflate() returned an unexpected value - + // {}.", return_value); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } + if (flush_complete) { + break; + } + + // Write output buffer to file if it's full + if (0 == m_compression_stream->avail_out) { + m_compressed_stream_file_writer->write( + reinterpret_cast(m_compressed_stream_block_buffer.get()), + cCompressedStreamBlockBufferSize + ); + m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; + } + } + + // Write any compressed data + if (m_compression_stream->avail_out < cCompressedStreamBlockBufferSize) { + m_compressed_stream_file_writer->write( + reinterpret_cast(m_compressed_stream_block_buffer.get()), + cCompressedStreamBlockBufferSize - m_compression_stream->avail_out + ); + m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; + } + + m_compression_stream_contains_data = false; + + lzma_end(m_compression_stream); + m_compression_stream->avail_out = 0; + m_compression_stream->next_out = nullptr; +} +} // namespace streaming_compression::lzma diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp new file mode 100644 index 000000000..d31c7687e --- /dev/null +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -0,0 +1,133 @@ +#ifndef STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP +#define STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP + +// C++ standard libraries +#include +#include + +// ZLIB library +#include +#include + +// Project headers +#include "../../FileWriter.hpp" +#include "../../TraceableException.hpp" +#include "../Compressor.hpp" +#include "Constants.hpp" + +namespace streaming_compression::lzma { +class Compressor : public ::streaming_compression::Compressor { +public: + // Types + class OperationFailed : public TraceableException { + public: + // Constructors + OperationFailed(ErrorCode error_code, char const* const filename, int line_number) + : TraceableException(error_code, filename, line_number) {} + + // Methods + char const* what() const noexcept override { + return "streaming_compression::gzip::Compressor operation failed"; + } + }; + + class LzmaOption { + public: + LzmaOption() + : m_compression_level{cDefaultCompressionLevel}, + m_dict_size{cDefaultDictionarySize} {} + + auto set_compression_level(int compression_level) -> void { + if (0 > compression_level) { + m_compression_level = 0; + } else if (9 < compression_level) { + m_compression_level = 9; + } else { + m_compression_level = compression_level; + } + } + + auto set_dict_size(uint32_t dict_size) -> void { m_dict_size = dict_size; } + + [[nodiscard]] auto get_compression_level() const -> int { return m_compression_level; } + + [[nodiscard]] auto get_dict_size() const -> uint32_t { return m_dict_size; } + + private: + int m_compression_level; + uint32_t m_dict_size; + }; + + // Constructor + Compressor(); + + // Destructor + ~Compressor(); + + // Explicitly disable copy and move constructor/assignment + Compressor(Compressor const&) = delete; + Compressor& operator=(Compressor const&) = delete; + + // Methods implementing the WriterInterface + /** + * Writes the given data to the compressor + * @param data + * @param data_length + */ + void write(char const* data, size_t data_length) override; + /** + * Writes any internally buffered data to file and ends the current frame + */ + void flush() override; + + /** + * Tries to get the current position of the write head + * @param pos Position of the write head + * @return ErrorCode_NotInit if the compressor is not open + * @return ErrorCode_Success on success + */ + ErrorCode try_get_pos(size_t& pos) const override; + + // Methods implementing the Compressor interface + /** + * Initialize streaming compressor + * @param file_writer + * @param compression_level + */ + void open(FileWriter& file_writer, int compression_level) override; + + /** + * Closes the compressor + */ + void close() override; + + // Methods + static auto set_compression_level(int compression_level) -> void { + m_option.set_compression_level(compression_level); + } + + static auto set_dict_size(uint32_t dict_size) -> void { m_option.set_dict_size(dict_size); } + +private: + /** + * Flushes the stream and closes it + */ + void flush_and_close_compression_stream(); + + static void init_lzma_encoder(lzma_stream* strm); + static LzmaOption m_option; + + // Variables + FileWriter* m_compressed_stream_file_writer; + + // Compressed stream variables + lzma_stream* m_compression_stream; + bool m_compression_stream_contains_data; + + std::unique_ptr m_compressed_stream_block_buffer; + + size_t m_uncompressed_stream_pos; +}; +} // namespace streaming_compression::lzma + +#endif // STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP diff --git a/components/core/src/clp/streaming_compression/lzma/Constants.hpp b/components/core/src/clp/streaming_compression/lzma/Constants.hpp new file mode 100644 index 000000000..959c09f47 --- /dev/null +++ b/components/core/src/clp/streaming_compression/lzma/Constants.hpp @@ -0,0 +1,15 @@ +#ifndef STREAMING_COMPRESSION_LZMA_CONSTANTS_HPP +#define STREAMING_COMPRESSION_LZMA_CONSTANTS_HPP + +#include + +// C++ libraries +#include +#include + +namespace streaming_compression::lzma { +constexpr int cDefaultCompressionLevel{3}; +constexpr uint32_t cDefaultDictionarySize{LZMA_DICT_SIZE_DEFAULT}; +} // namespace streaming_compression::lzma + +#endif // STREAMING_COMPRESSION_LZMA_CONSTANTS_HPP diff --git a/components/core/src/clp/streaming_compression/lzma/Decompressor.cpp b/components/core/src/clp/streaming_compression/lzma/Decompressor.cpp new file mode 100644 index 000000000..a2ed4d466 --- /dev/null +++ b/components/core/src/clp/streaming_compression/lzma/Decompressor.cpp @@ -0,0 +1,362 @@ +#include "Decompressor.hpp" + +// C++ Standard Libraries +#include + +// Boost libraries +#include + +// spdlog +#include + +// Project headers +#include "../../Defs.h" + +namespace streaming_compression::lzma { +Decompressor::Decompressor() + : ::streaming_compression::Decompressor(CompressorType::LZMA), + m_input_type(InputType::NotInitialized), + m_decompression_stream(nullptr), + m_file_reader(nullptr), + m_file_reader_initial_pos(0), + m_file_read_buffer_length(0), + m_file_read_buffer_capacity(0), + m_decompressed_stream_pos(0), + m_unused_decompressed_stream_block_size(0) { + // Create block to hold unused decompressed data + m_unused_decompressed_stream_block_buffer + = std::make_unique(m_unused_decompressed_stream_block_size); + m_decompression_stream = new lzma_stream; + memset(m_decompression_stream, 0, sizeof(lzma_stream)); +} + +Decompressor::~Decompressor() { + delete m_decompression_stream; +} + +void Decompressor::exact_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) { + auto errorcode = try_read(buf, num_bytes_to_read, num_bytes_read); + if (num_bytes_read != num_bytes_to_read) { + SPDLOG_ERROR("FAILED TO READ EXACTLY {} bytes", num_bytes_to_read); + throw; + } + if (errorcode != ErrorCode_Success) { + SPDLOG_ERROR("FAILED TO READ EXACTLY {} bytes", num_bytes_to_read); + throw; + } +} + +ErrorCode Decompressor::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) { + if (InputType::NotInitialized == m_input_type) { + return ErrorCode_NotInit; + } + if (nullptr == buf) { + return ErrorCode_BadParam; + } + if (0 == num_bytes_to_read) { + return ErrorCode_Success; + } + + num_bytes_read = 0; + + m_decompression_stream->next_out = reinterpret_cast(buf); + m_decompression_stream->avail_out = num_bytes_to_read; + while (true) { + // Check if there's data that can be decompressed + if (0 == m_decompression_stream->avail_in) { + if (InputType::File != m_input_type) { + // if we hit here, there must be something wrong + // we have consumed all data buffer but for some reason it still requires more. + return ErrorCode_EndOfFile; + } else { + auto error_code = m_file_reader->try_read( + m_file_read_buffer.get(), + m_file_read_buffer_capacity, + m_file_read_buffer_length + ); + m_decompression_stream->avail_in = m_file_read_buffer_length; + m_decompression_stream->next_in + = reinterpret_cast(m_file_read_buffer.get()); + if (ErrorCode_Success != error_code) { + if (ErrorCode_EndOfFile == error_code) { + num_bytes_read = num_bytes_to_read - m_decompression_stream->avail_out; + m_decompressed_stream_pos += num_bytes_read; + return ErrorCode_EndOfFile; + } + } + } + } + + lzma_ret return_value = lzma_code(m_decompression_stream, LZMA_RUN); + switch (return_value) { + case LZMA_OK: + case LZMA_BUF_ERROR: + if (0 == m_decompression_stream->avail_out) { + m_decompression_stream->next_out = nullptr; + num_bytes_read = num_bytes_to_read; + m_decompressed_stream_pos += num_bytes_read; + return ErrorCode_Success; + } + // by breaking here, enter the next iteration of decompressing + break; + case LZMA_STREAM_END: + if (0 == m_decompression_stream->avail_out) { + m_decompression_stream->next_out = nullptr; + num_bytes_read = num_bytes_to_read; + m_decompressed_stream_pos += num_bytes_read; + return ErrorCode_Success; + } + SPDLOG_ERROR("streaming_compression::lzma::Decompressor wants to read more but " + "reached end of file"); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + case LZMA_MEM_ERROR: + SPDLOG_ERROR("streaming_compression::lzma::Decompressor inflate() ran out of memory" + ); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + default: + SPDLOG_ERROR("inflate() returned an unexpected value - {}.", int(return_value)); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } + } +} + +ErrorCode Decompressor::try_seek_from_begin(size_t pos) { + if (InputType::NotInitialized == m_input_type) { + throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); + } + + // Check if we've already decompressed passed the desired position + if (m_decompressed_stream_pos > pos) { + // ZStd has no way for us to seek back to the desired position, so just reset the stream + // to the beginning + reset_stream(); + } + + // We need to fast-forward the decompression stream to decompressed_stream_pos + ErrorCode error; + while (m_decompressed_stream_pos < pos) { + size_t num_bytes_to_decompress = std::min( + m_unused_decompressed_stream_block_size, + pos - m_decompressed_stream_pos + ); + error = try_read_exact_length( + m_unused_decompressed_stream_block_buffer.get(), + num_bytes_to_decompress + ); + if (ErrorCode_Success != error) { + return error; + } + } + + return ErrorCode_Success; +} + +ErrorCode Decompressor::try_get_pos(size_t& pos) { + if (InputType::NotInitialized == m_input_type) { + return ErrorCode_NotInit; + } + + pos = m_decompressed_stream_pos; + return ErrorCode_Success; +} + +void Decompressor::close() { + if (InputType::NotInitialized == m_input_type) { + return; + } + lzma_end(m_decompression_stream); + m_decompression_stream->avail_out = 0; + m_decompression_stream->next_out = nullptr; + if (InputType::MemoryMappedCompressedFile == m_input_type) { + if (m_memory_mapped_compressed_file.is_open()) { + // An existing file is memory mapped by the decompressor + m_memory_mapped_compressed_file.close(); + } + } else if (InputType::File == m_input_type) { + m_file_read_buffer.reset(); + m_file_read_buffer_capacity = 0; + m_file_read_buffer_length = 0; + m_file_reader = nullptr; + } + m_input_type = InputType::NotInitialized; +} + +void Decompressor::init_decoder(lzma_stream* strm) { + // Initialize a .xz decoder. The decoder supports a memory usage limit + // and a set of flags. + // + // The memory usage of the decompressor depends on the settings used + // to compress a .xz file. It can vary from less than a megabyte to + // a few gigabytes, but in practice (at least for now) it rarely + // exceeds 65 MiB because that's how much memory is required to + // decompress files created with "xz -9". Settings requiring more + // memory take extra effort to use and don't (at least for now) + // provide significantly better compression in most cases. + // + // Memory usage limit is useful if it is important that the + // decompressor won't consume gigabytes of memory. The need + // for limiting depends on the application. In this example, + // no memory usage limiting is used. This is done by setting + // the limit to UINT64_MAX. + // + // The .xz format allows concatenating compressed files as is: + // + // echo foo | xz > foobar.xz + // echo bar | xz >> foobar.xz + // + // When decompressing normal standalone .xz files, LZMA_CONCATENATED + // should always be used to support decompression of concatenated + // .xz files. If LZMA_CONCATENATED isn't used, the decoder will stop + // after the first .xz stream. This can be useful when .xz data has + // been embedded inside another file format. + // + // Flags other than LZMA_CONCATENATED are supported too, and can + // be combined with bitwise-or. See lzma/container.h + // (src/liblzma/api/lzma/container.h in the source package or e.g. + // /usr/include/lzma/container.h depending on the install prefix) + // for details. + lzma_ret ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED); + + // Return successfully if the initialization went fine. + if (ret == LZMA_OK) { + return; + } + + // Something went wrong. The possible errors are documented in + // lzma/container.h (src/liblzma/api/lzma/container.h in the source + // package or e.g. /usr/include/lzma/container.h depending on the + // install prefix). + // + // Note that LZMA_MEMLIMIT_ERROR is never possible here. If you + // specify a very tiny limit, the error will be delayed until + // the first headers have been parsed by a call to lzma_code(). + char const* msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = "Memory allocation failed"; + break; + + case LZMA_OPTIONS_ERROR: + msg = "Unsupported decompressor flags"; + break; + + default: + // This is most likely LZMA_PROG_ERROR indicating a bug in + // this program or in liblzma. It is inconvenient to have a + // separate error message for errors that should be impossible + // to occur, but knowing the error code is important for + // debugging. That's why it is good to print the error code + // at least when there is no good error message to show. + msg = "Unknown error, possibly a bug"; + break; + } + + SPDLOG_ERROR("Error initializing the decoder: {} (error code {})", msg, int(ret)); +} + +void Decompressor::open(char const* compressed_data_buf, size_t compressed_data_buf_size) { + if (InputType::NotInitialized != m_input_type) { + throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); + } + m_input_type = InputType::CompressedDataBuf; + + // Configure input stream + reset_stream(); + m_decompression_stream->next_in + = reinterpret_cast(const_cast(compressed_data_buf)); + m_decompression_stream->avail_in = compressed_data_buf_size; + m_decompression_stream->next_out = nullptr; + m_decompression_stream->avail_out = 0; +} + +ErrorCode Decompressor::open(std::string const& compressed_file_path) { + if (InputType::NotInitialized != m_input_type) { + throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); + } + m_input_type = InputType::MemoryMappedCompressedFile; + + // Create memory mapping for compressed_file_path, use boost read only memory mapped file + boost::system::error_code boost_error_code; + size_t compressed_file_size + = boost::filesystem::file_size(compressed_file_path, boost_error_code); + if (boost_error_code) { + SPDLOG_ERROR( + "streaming_compression::zstd::Decompressor: Unable to obtain file size for " + "'{}' - {}.", + compressed_file_path.c_str(), + boost_error_code.message().c_str() + ); + return ErrorCode_Failure; + } + + boost::iostreams::mapped_file_params memory_map_params; + memory_map_params.path = compressed_file_path; + memory_map_params.flags = boost::iostreams::mapped_file::readonly; + memory_map_params.length = compressed_file_size; + memory_map_params.hint = m_memory_mapped_compressed_file.data( + ); // Try to map it to the same memory location as previous memory mapped file + m_memory_mapped_compressed_file.open(memory_map_params); + if (!m_memory_mapped_compressed_file.is_open()) { + SPDLOG_ERROR( + "streaming_compression::lzma::Decompressor: Unable to memory map the " + "compressed file with path: {}", + compressed_file_path.c_str() + ); + return ErrorCode_Failure; + } + + // Configure input stream + reset_stream(); + m_decompression_stream->next_in + = reinterpret_cast(const_cast(m_memory_mapped_compressed_file.data())); + m_decompression_stream->avail_in = compressed_file_size; + m_decompression_stream->next_out = nullptr; + m_decompression_stream->avail_out = 0; + + return ErrorCode_Success; +} + +void Decompressor::open(FileReader& file_reader, size_t file_read_buffer_capacity) { + if (InputType::NotInitialized != m_input_type) { + throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); + } + m_input_type = InputType::File; + + m_file_reader = &file_reader; + m_file_reader_initial_pos = m_file_reader->get_pos(); + + m_file_read_buffer_capacity = file_read_buffer_capacity; + m_file_read_buffer = std::make_unique(m_file_read_buffer_capacity); + m_file_read_buffer_length = 0; + + // Configure input stream + reset_stream(); + m_decompression_stream->next_in = reinterpret_cast(m_file_read_buffer.get()); + m_decompression_stream->avail_in = m_file_read_buffer_length; + m_decompression_stream->next_out = nullptr; + m_decompression_stream->avail_out = 0; +} + +ErrorCode Decompressor::get_decompressed_stream_region( + size_t decompressed_stream_pos, + char* extraction_buf, + size_t extraction_len +) { + auto error_code = try_seek_from_begin(decompressed_stream_pos); + if (ErrorCode_Success != error_code) { + return error_code; + } + + error_code = try_read_exact_length(extraction_buf, extraction_len); + return error_code; +} + +void Decompressor::reset_stream() { + if (InputType::File == m_input_type) { + m_file_reader->seek_from_begin(m_file_reader_initial_pos); + m_file_read_buffer_length = 0; + } + m_decompressed_stream_pos = 0; + init_decoder(m_decompression_stream); +} +} // namespace streaming_compression::lzma diff --git a/components/core/src/clp/streaming_compression/lzma/Decompressor.hpp b/components/core/src/clp/streaming_compression/lzma/Decompressor.hpp new file mode 100644 index 000000000..996663e44 --- /dev/null +++ b/components/core/src/clp/streaming_compression/lzma/Decompressor.hpp @@ -0,0 +1,162 @@ +#ifndef STREAMING_COMPRESSION_LZMA_DECOMPRESSOR_HPP +#define STREAMING_COMPRESSION_LZMA_DECOMPRESSOR_HPP + +// C++ standard libraries +#include +#include + +// ZLIB library +#include +#include +// Boost libraries +#include + +// Project headers +#include "../../FileReader.hpp" +#include "../../TraceableException.hpp" +#include "../Decompressor.hpp" + +namespace streaming_compression::lzma { +class Decompressor : public ::streaming_compression::Decompressor { +public: + // Types + class OperationFailed : public TraceableException { + public: + // Constructors + OperationFailed(ErrorCode error_code, char const* const filename, int line_number) + : TraceableException(error_code, filename, line_number) {} + + // Methods + char const* what() const noexcept override { + return "streaming_compression::lzma::Decompressor operation failed"; + } + }; + + // Constructor + Decompressor(); + + // Destructor + ~Decompressor(); + + // Explicitly disable copy and move constructor/assignment + Decompressor(Decompressor const&) = delete; + Decompressor& operator=(Decompressor const&) = delete; + + // Methods implementing the ReaderInterface + /** + * Tries to read up to a given number of bytes from the decompressor + * @param buf + * @param num_bytes_to_read The number of bytes to try and read + * @param num_bytes_read The actual number of bytes read + * @return Same as FileReader::try_read if the decompressor is attached to a file + * @return ErrorCode_NotInit if the decompressor is not open + * @return ErrorCode_BadParam if buf is invalid + * @return ErrorCode_EndOfFile on EOF + * @return ErrorCode_Failure on decompression failure + * @return ErrorCode_Success on success + */ + ErrorCode try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) override; + + /** + */ + void exact_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read); + + /** + * Tries to seek from the beginning to the given position + * @param pos + * @return ErrorCode_NotInit if the decompressor is not open + * @return Same as ReaderInterface::try_read_exact_length + * @return ErrorCode_Success on success + */ + ErrorCode try_seek_from_begin(size_t pos) override; + /** + * Tries to get the current position of the read head + * @param pos Position of the read head in the file + * @return ErrorCode_NotInit if the decompressor is not open + * @return ErrorCode_Success on success + */ + ErrorCode try_get_pos(size_t& pos) override; + + // Methods implementing the Decompressor interface + void close() override; + /** + * Decompresses and copies the range of uncompressed data described by + * decompressed_stream_pos and extraction_len into extraction_buf + * @param decompressed_stream_pos + * @param extraction_buf + * @param extraction_len + * @return Same as streaming_compression::zstd::Decompressor::try_seek_from_begin + * @return Same as ReaderInterface::try_read_exact_length + */ + ErrorCode get_decompressed_stream_region( + size_t decompressed_stream_pos, + char* extraction_buf, + size_t extraction_len + ) override; + + // Methods + /*** + * Initialize streaming decompressor to decompress from the specified compressed data buffer + * @param compressed_data_buf + * @param compressed_data_buf_size + */ + void open(char const* compressed_data_buf, size_t compressed_data_buf_size) override; + + /*** + * Initialize streaming decompressor to decompress from a compressed file specified by the + * given path + * @param compressed_file_path + * @param decompressed_stream_block_size + * @return ErrorCode_Failure if the provided path cannot be memory mapped + * @return ErrorCode_Success on success + */ + ErrorCode open(std::string const& compressed_file_path); + + /** + * Initializes the decompressor to decompress from an open file + * @param file_reader + * @param file_read_buffer_capacity The maximum amount of data to read from a file at a time + */ + void open(FileReader& file_reader, size_t file_read_buffer_capacity) override; + +private: + // Enum class + enum class InputType { + NotInitialized, // Note: do nothing but generate an error to prevent this required + // parameter is not initialized properly + CompressedDataBuf, + MemoryMappedCompressedFile, + File + }; + + // Methods + /** + * Reset streaming decompression state so it will start decompressing from the beginning of + * the stream afterwards + */ + void reset_stream(); + + void init_decoder(lzma_stream* strm); + + // Variables + InputType m_input_type; + + // Compressed stream variables + lzma_stream* m_decompression_stream{nullptr}; + + boost::iostreams::mapped_file_source m_memory_mapped_compressed_file; + FileReader* m_file_reader; + size_t m_file_reader_initial_pos; + std::unique_ptr m_file_read_buffer; + size_t m_file_read_buffer_length; + size_t m_file_read_buffer_capacity; + + size_t m_decompressed_stream_pos; + size_t m_unused_decompressed_stream_block_size; + std::unique_ptr m_unused_decompressed_stream_block_buffer; + + char const* m_compressed_stream_block; + size_t m_compressed_stream_block_size; +}; +} // namespace streaming_compression::lzma +#endif // STREAMING_COMPRESSION_LZMA_DECOMPRESSOR_HPP diff --git a/components/core/tests/test-StreamingCompression.cpp b/components/core/tests/test-StreamingCompression.cpp index 0fbae9e3a..d632510fc 100644 --- a/components/core/tests/test-StreamingCompression.cpp +++ b/components/core/tests/test-StreamingCompression.cpp @@ -15,6 +15,7 @@ #include "../src/clp/ReadOnlyMemoryMappedFile.hpp" #include "../src/clp/streaming_compression/Compressor.hpp" #include "../src/clp/streaming_compression/Decompressor.hpp" +#include "../src/clp/streaming_compression/lzma/Compressor.hpp" #include "../src/clp/streaming_compression/passthrough/Compressor.hpp" #include "../src/clp/streaming_compression/passthrough/Decompressor.hpp" #include "../src/clp/streaming_compression/zstd/Compressor.hpp" diff --git a/components/core/tools/scripts/lib_install/liblzma.sh b/components/core/tools/scripts/lib_install/liblzma.sh new file mode 100755 index 000000000..1145b2646 --- /dev/null +++ b/components/core/tools/scripts/lib_install/liblzma.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +# Dependencies: +# - curl +# - make +# - gcc +# NOTE: Dependencies should be installed outside the script to allow the script to be largely distro-agnostic + +# Exit on any error +set -e + +# Error on undefined variable +set -u + +cUsage="Usage: ${BASH_SOURCE[0]} [ <.deb output directory>]" +if [ "$#" -lt 1 ] ; then + echo $cUsage + exit +fi +version=$1 + +package_name=liblzma +temp_dir=/tmp/${package_name}-installation +deb_output_dir=${temp_dir} +if [[ "$#" -gt 1 ]] ; then + deb_output_dir="$(readlink -f "$2")" + if [ ! -d ${deb_output_dir} ] ; then + echo "${deb_output_dir} does not exist or is not a directory" + exit + fi +fi + +# Note: we won't check if the package already exists + +echo "Checking for elevated privileges..." +privileged_command_prefix="" +if [ ${EUID:-$(id -u)} -ne 0 ] ; then + sudo echo "Script can elevate privileges." + privileged_command_prefix="${privileged_command_prefix} sudo" +fi + +# Get number of cpu cores +num_cpus=$(grep -c ^processor /proc/cpuinfo) + +# Download +mkdir -p $temp_dir +cd $temp_dir +extracted_dir=${temp_dir}/xz-${version} +if [ ! -e ${extracted_dir} ] ; then + tar_filename=xz-${version}.tar.gz + if [ ! -e ${tar_filename} ] ; then + curl -fsSL https://github.com/tukaani-project/xz/releases/download/v${version}/${tar_filename} -o ${tar_filename} + fi + tar -xf ${tar_filename} +fi + +# Build +cd ${extracted_dir} +mkdir build +cd build +cmake -DCMAKE_POSITION_INDEPENDENT_CODE=TRUE ../ +make -j${num_cpus} +make install liblzma + +# Clean up +rm -rf $temp_dir diff --git a/components/core/tools/scripts/lib_install/ubuntu-focal/install-packages-from-source.sh b/components/core/tools/scripts/lib_install/ubuntu-focal/install-packages-from-source.sh index 1e21314cc..10a2b0482 100755 --- a/components/core/tools/scripts/lib_install/ubuntu-focal/install-packages-from-source.sh +++ b/components/core/tools/scripts/lib_install/ubuntu-focal/install-packages-from-source.sh @@ -14,6 +14,7 @@ lib_install_scripts_dir=$script_dir/.. "$lib_install_scripts_dir"/fmtlib.sh 8.0.1 "$lib_install_scripts_dir"/libarchive.sh 3.5.1 +"$lib_install_scripts_dir"/liblzma.sh 5.4.6 "$lib_install_scripts_dir"/lz4.sh 1.8.2 "$lib_install_scripts_dir"/mongocxx.sh 3.10.2 "$lib_install_scripts_dir"/msgpack.sh 7.0.0 diff --git a/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh index 706674764..f1e2ee4ff 100755 --- a/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh +++ b/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh @@ -20,6 +20,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ libcurl4 \ libcurl4-openssl-dev \ libmariadb-dev \ + liblzma-dev \ libssl-dev \ make \ openjdk-11-jdk \ diff --git a/components/core/tools/scripts/lib_install/ubuntu-jammy/install-packages-from-source.sh b/components/core/tools/scripts/lib_install/ubuntu-jammy/install-packages-from-source.sh index 7799c9ba5..97aaf7093 100755 --- a/components/core/tools/scripts/lib_install/ubuntu-jammy/install-packages-from-source.sh +++ b/components/core/tools/scripts/lib_install/ubuntu-jammy/install-packages-from-source.sh @@ -11,6 +11,7 @@ lib_install_scripts_dir=$script_dir/.. "$lib_install_scripts_dir"/fmtlib.sh 8.0.1 "$lib_install_scripts_dir"/libarchive.sh 3.5.1 +"$lib_install_scripts_dir"/liblzma.sh 5.4.6 "$lib_install_scripts_dir"/lz4.sh 1.8.2 "$lib_install_scripts_dir"/mongocxx.sh 3.10.2 "$lib_install_scripts_dir"/msgpack.sh 7.0.0 diff --git a/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh index 92d965b9b..4911a6a98 100755 --- a/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh +++ b/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh @@ -20,6 +20,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ libcurl4 \ libcurl4-openssl-dev \ libmariadb-dev \ + liblzma-dev \ libssl-dev \ openjdk-11-jdk \ pkg-config \ From d5af274f119ad8bb2d5b3bbc9e1e97bca282be7a Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Mon, 25 Nov 2024 12:49:13 -0500 Subject: [PATCH 02/35] Make unit test pass --- .../streaming_compression/lzma/Compressor.cpp | 72 +++++++--------- .../streaming_compression/lzma/Compressor.hpp | 84 +++++++++++-------- .../streaming_compression/lzma/Constants.hpp | 12 +-- .../lzma/Decompressor.cpp | 6 +- .../lzma/Decompressor.hpp | 15 ++-- .../core/tests/test-StreamingCompression.cpp | 6 ++ 6 files changed, 103 insertions(+), 92 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index f10ec915b..7bb13e5d3 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -1,34 +1,22 @@ #include "Compressor.hpp" -// spdlog #include +// Compression libraries +#include +#include + // Project headers #include "../../Defs.h" -// File-scope constants -static constexpr size_t cCompressedStreamBlockBufferSize = 4096; // 4KiB - -namespace streaming_compression::lzma { +namespace clp::streaming_compression::lzma { Compressor::LzmaOption Compressor::m_option; -Compressor::Compressor() - : ::streaming_compression::Compressor(CompressorType::LZMA), - m_compression_stream_contains_data(false), - m_compressed_stream_file_writer(nullptr), - m_compression_stream(nullptr) { - m_compressed_stream_block_buffer = std::make_unique(cCompressedStreamBlockBufferSize); - m_compression_stream = new lzma_stream; - memset(m_compression_stream, 0, sizeof(lzma_stream)); -} - -Compressor::~Compressor() { - if (nullptr != m_compression_stream) { - delete m_compression_stream; - } +Compressor::Compressor() { + memset(m_compression_stream.get(), 0, sizeof(LzmaStream)); } -void Compressor::init_lzma_encoder(lzma_stream* strm) { +void Compressor::init_lzma_encoder(LzmaStream* strm) { lzma_options_lzma options; if (lzma_lzma_preset(&options, m_option.get_compression_level())) { SPDLOG_ERROR("Failed to initialize LZMA options."); @@ -44,10 +32,10 @@ void Compressor::init_lzma_encoder(lzma_stream* strm) { // to CRC64, which is the default in the xz command line tool. If // the .xz file needs to be decompressed with XZ Embedded, use // LZMA_CHECK_CRC32 instead. - lzma_ret ret = lzma_stream_encoder(strm, filters, LZMA_CHECK_CRC64); + auto const ret = lzma_stream_encoder(strm, filters, LZMA_CHECK_CRC64); // Return successfully if the initialization went fine. - if (ret == LZMA_OK) { + if (LZMA_OK == ret) { return; } @@ -96,12 +84,12 @@ void Compressor::open(FileWriter& file_writer, int compression_level) { m_option.set_compression_level(compression_level); } - init_lzma_encoder(m_compression_stream); + init_lzma_encoder(m_compression_stream.get()); // Setup compressed stream parameters m_compression_stream->next_in = nullptr; m_compression_stream->avail_in = 0; - m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); - m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; + m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); + m_compression_stream->avail_out = m_compressed_stream_block_buffer.size(); m_compressed_stream_file_writer = &file_writer; @@ -136,7 +124,7 @@ void Compressor::write(char const* data, size_t data_length) { // Compress all data bool hit_input_eof = false; while (!hit_input_eof) { - lzma_ret return_value = lzma_code(m_compression_stream, action); + auto const return_value = lzma_code(m_compression_stream.get(), action); switch (return_value) { case LZMA_OK: case LZMA_BUF_ERROR: @@ -157,10 +145,10 @@ void Compressor::write(char const* data, size_t data_length) { // Write output buffer to file if it's full if (0 == m_compression_stream->avail_out) { m_compressed_stream_file_writer->write( - reinterpret_cast(m_compressed_stream_block_buffer.get()), + reinterpret_cast(m_compressed_stream_block_buffer.data()), cCompressedStreamBlockBufferSize ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; } } @@ -168,10 +156,10 @@ void Compressor::write(char const* data, size_t data_length) { // Write any compressed data if (m_compression_stream->avail_out < cCompressedStreamBlockBufferSize) { m_compressed_stream_file_writer->write( - reinterpret_cast(m_compressed_stream_block_buffer.get()), + reinterpret_cast(m_compressed_stream_block_buffer.data()), cCompressedStreamBlockBufferSize - m_compression_stream->avail_out ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; } @@ -200,7 +188,7 @@ void Compressor::flush() { bool flush_complete = false; while (true) { - lzma_ret return_value = lzma_code(m_compression_stream, LZMA_SYNC_FLUSH); + auto const return_value = lzma_code(m_compression_stream.get(), LZMA_SYNC_FLUSH); switch (return_value) { case LZMA_STREAM_END: flush_complete = true; @@ -219,10 +207,10 @@ void Compressor::flush() { // Write output buffer to file if it's full if (0 == m_compression_stream->avail_out) { m_compressed_stream_file_writer->write( - reinterpret_cast(m_compressed_stream_block_buffer.get()), + reinterpret_cast(m_compressed_stream_block_buffer.data()), cCompressedStreamBlockBufferSize ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; } } @@ -230,10 +218,10 @@ void Compressor::flush() { // Write any compressed data if (m_compression_stream->avail_out < cCompressedStreamBlockBufferSize) { m_compressed_stream_file_writer->write( - reinterpret_cast(m_compressed_stream_block_buffer.get()), + reinterpret_cast(m_compressed_stream_block_buffer.data()), cCompressedStreamBlockBufferSize - m_compression_stream->avail_out ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; } @@ -256,7 +244,7 @@ void Compressor::flush_and_close_compression_stream() { bool flush_complete = false; while (true) { - lzma_ret return_value = lzma_code(m_compression_stream, LZMA_FINISH); + lzma_ret return_value = lzma_code(m_compression_stream.get(), LZMA_FINISH); switch (return_value) { case LZMA_OK: case LZMA_BUF_ERROR: @@ -276,10 +264,10 @@ void Compressor::flush_and_close_compression_stream() { // Write output buffer to file if it's full if (0 == m_compression_stream->avail_out) { m_compressed_stream_file_writer->write( - reinterpret_cast(m_compressed_stream_block_buffer.get()), + reinterpret_cast(m_compressed_stream_block_buffer.data()), cCompressedStreamBlockBufferSize ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; } } @@ -287,17 +275,17 @@ void Compressor::flush_and_close_compression_stream() { // Write any compressed data if (m_compression_stream->avail_out < cCompressedStreamBlockBufferSize) { m_compressed_stream_file_writer->write( - reinterpret_cast(m_compressed_stream_block_buffer.get()), + reinterpret_cast(m_compressed_stream_block_buffer.data()), cCompressedStreamBlockBufferSize - m_compression_stream->avail_out ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.get(); + m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; } m_compression_stream_contains_data = false; - lzma_end(m_compression_stream); + lzma_end(m_compression_stream.get()); m_compression_stream->avail_out = 0; m_compression_stream->next_out = nullptr; } -} // namespace streaming_compression::lzma +} // namespace clp::streaming_compression::lzma diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index d31c7687e..53f82b139 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -1,22 +1,22 @@ -#ifndef STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP -#define STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP +#ifndef CLP_STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP +#define CLP_STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP -// C++ standard libraries +#include +#include #include -#include -// ZLIB library #include -#include +#include -// Project headers +#include "../../Array.hpp" +#include "../../ErrorCode.hpp" #include "../../FileWriter.hpp" #include "../../TraceableException.hpp" #include "../Compressor.hpp" #include "Constants.hpp" -namespace streaming_compression::lzma { -class Compressor : public ::streaming_compression::Compressor { +namespace clp::streaming_compression::lzma { +class Compressor : public ::clp::streaming_compression::Compressor { public: // Types class OperationFailed : public TraceableException { @@ -26,8 +26,8 @@ class Compressor : public ::streaming_compression::Compressor { : TraceableException(error_code, filename, line_number) {} // Methods - char const* what() const noexcept override { - return "streaming_compression::gzip::Compressor operation failed"; + [[nodiscard]] auto what() const noexcept -> char const* override { + return "streaming_compression::lzma::Compressor operation failed"; } }; @@ -38,10 +38,10 @@ class Compressor : public ::streaming_compression::Compressor { m_dict_size{cDefaultDictionarySize} {} auto set_compression_level(int compression_level) -> void { - if (0 > compression_level) { - m_compression_level = 0; - } else if (9 < compression_level) { - m_compression_level = 9; + if (compression_level < cMinCompressionLevel) { + m_compression_level = cMinCompressionLevel; + } else if (compression_level > cMaxCompressionLevel) { + m_compression_level = cMaxCompressionLevel; } else { m_compression_level = compression_level; } @@ -62,11 +62,15 @@ class Compressor : public ::streaming_compression::Compressor { Compressor(); // Destructor - ~Compressor(); + ~Compressor() override = default; - // Explicitly disable copy and move constructor/assignment + // Delete copy constructor and assignment operator Compressor(Compressor const&) = delete; - Compressor& operator=(Compressor const&) = delete; + auto operator=(Compressor const&) -> Compressor& = delete; + + // Default move constructor and assignment operator + Compressor(Compressor&&) noexcept = default; + auto operator=(Compressor&&) noexcept -> Compressor& = default; // Methods implementing the WriterInterface /** @@ -74,11 +78,12 @@ class Compressor : public ::streaming_compression::Compressor { * @param data * @param data_length */ - void write(char const* data, size_t data_length) override; + auto write(char const* data, size_t data_length) -> void override; + /** * Writes any internally buffered data to file and ends the current frame */ - void flush() override; + auto flush() -> void override; /** * Tries to get the current position of the write head @@ -86,20 +91,28 @@ class Compressor : public ::streaming_compression::Compressor { * @return ErrorCode_NotInit if the compressor is not open * @return ErrorCode_Success on success */ - ErrorCode try_get_pos(size_t& pos) const override; + auto try_get_pos(size_t& pos) const -> ErrorCode override; + + /** + * Closes the compressor + */ + auto close() -> void override; // Methods implementing the Compressor interface /** - * Initialize streaming compressor + * Initializes the compression stream with the default compression level * @param file_writer - * @param compression_level */ - void open(FileWriter& file_writer, int compression_level) override; + auto open(FileWriter& file_writer) -> void override { + this->open(file_writer, cDefaultCompressionLevel); + } /** - * Closes the compressor + * Initializes the compression stream with the given compression level + * @param file_writer + * @param compression_level */ - void close() override; + auto open(FileWriter& file_writer, int compression_level) -> void; // Methods static auto set_compression_level(int compression_level) -> void { @@ -109,25 +122,28 @@ class Compressor : public ::streaming_compression::Compressor { static auto set_dict_size(uint32_t dict_size) -> void { m_option.set_dict_size(dict_size); } private: + using LzmaStream = lzma_stream; + /** * Flushes the stream and closes it */ void flush_and_close_compression_stream(); - static void init_lzma_encoder(lzma_stream* strm); + static void init_lzma_encoder(LzmaStream* strm); static LzmaOption m_option; + static constexpr size_t cCompressedStreamBlockBufferSize{4096}; // 4KiB // Variables - FileWriter* m_compressed_stream_file_writer; + FileWriter* m_compressed_stream_file_writer{nullptr}; // Compressed stream variables - lzma_stream* m_compression_stream; - bool m_compression_stream_contains_data; + std::unique_ptr m_compression_stream{std::make_unique()}; + bool m_compression_stream_contains_data{false}; - std::unique_ptr m_compressed_stream_block_buffer; + Array m_compressed_stream_block_buffer{cCompressedStreamBlockBufferSize}; - size_t m_uncompressed_stream_pos; + size_t m_uncompressed_stream_pos{0}; }; -} // namespace streaming_compression::lzma +} // namespace clp::streaming_compression::lzma -#endif // STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP +#endif // CLP_STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP diff --git a/components/core/src/clp/streaming_compression/lzma/Constants.hpp b/components/core/src/clp/streaming_compression/lzma/Constants.hpp index 959c09f47..4e261187a 100644 --- a/components/core/src/clp/streaming_compression/lzma/Constants.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Constants.hpp @@ -1,15 +1,15 @@ #ifndef STREAMING_COMPRESSION_LZMA_CONSTANTS_HPP #define STREAMING_COMPRESSION_LZMA_CONSTANTS_HPP -#include - -// C++ libraries -#include #include -namespace streaming_compression::lzma { +#include + +namespace clp::streaming_compression::lzma { constexpr int cDefaultCompressionLevel{3}; +constexpr int cMinCompressionLevel{0}; +constexpr int cMaxCompressionLevel{9}; constexpr uint32_t cDefaultDictionarySize{LZMA_DICT_SIZE_DEFAULT}; -} // namespace streaming_compression::lzma +} // namespace clp::streaming_compression::lzma #endif // STREAMING_COMPRESSION_LZMA_CONSTANTS_HPP diff --git a/components/core/src/clp/streaming_compression/lzma/Decompressor.cpp b/components/core/src/clp/streaming_compression/lzma/Decompressor.cpp index a2ed4d466..b6a10b418 100644 --- a/components/core/src/clp/streaming_compression/lzma/Decompressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Decompressor.cpp @@ -12,9 +12,9 @@ // Project headers #include "../../Defs.h" -namespace streaming_compression::lzma { +namespace clp::streaming_compression::lzma { Decompressor::Decompressor() - : ::streaming_compression::Decompressor(CompressorType::LZMA), + : ::clp::streaming_compression::Decompressor(CompressorType::LZMA), m_input_type(InputType::NotInitialized), m_decompression_stream(nullptr), m_file_reader(nullptr), @@ -359,4 +359,4 @@ void Decompressor::reset_stream() { m_decompressed_stream_pos = 0; init_decoder(m_decompression_stream); } -} // namespace streaming_compression::lzma +} // namespace clp::streaming_compression::lzma diff --git a/components/core/src/clp/streaming_compression/lzma/Decompressor.hpp b/components/core/src/clp/streaming_compression/lzma/Decompressor.hpp index 996663e44..5e90f5942 100644 --- a/components/core/src/clp/streaming_compression/lzma/Decompressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Decompressor.hpp @@ -1,13 +1,14 @@ -#ifndef STREAMING_COMPRESSION_LZMA_DECOMPRESSOR_HPP -#define STREAMING_COMPRESSION_LZMA_DECOMPRESSOR_HPP +#ifndef CLP_STREAMING_COMPRESSION_LZMA_DECOMPRESSOR_HPP +#define CLP_STREAMING_COMPRESSION_LZMA_DECOMPRESSOR_HPP // C++ standard libraries #include #include // ZLIB library -#include #include + +#include // Boost libraries #include @@ -16,8 +17,8 @@ #include "../../TraceableException.hpp" #include "../Decompressor.hpp" -namespace streaming_compression::lzma { -class Decompressor : public ::streaming_compression::Decompressor { +namespace clp::streaming_compression::lzma { +class Decompressor : public ::clp::streaming_compression::Decompressor { public: // Types class OperationFailed : public TraceableException { @@ -158,5 +159,5 @@ class Decompressor : public ::streaming_compression::Decompressor { char const* m_compressed_stream_block; size_t m_compressed_stream_block_size; }; -} // namespace streaming_compression::lzma -#endif // STREAMING_COMPRESSION_LZMA_DECOMPRESSOR_HPP +} // namespace clp::streaming_compression::lzma +#endif // CLP_STREAMING_COMPRESSION_LZMA_DECOMPRESSOR_HPP diff --git a/components/core/tests/test-StreamingCompression.cpp b/components/core/tests/test-StreamingCompression.cpp index d632510fc..d58d4c1ce 100644 --- a/components/core/tests/test-StreamingCompression.cpp +++ b/components/core/tests/test-StreamingCompression.cpp @@ -16,6 +16,7 @@ #include "../src/clp/streaming_compression/Compressor.hpp" #include "../src/clp/streaming_compression/Decompressor.hpp" #include "../src/clp/streaming_compression/lzma/Compressor.hpp" +#include "../src/clp/streaming_compression/lzma/Decompressor.hpp" #include "../src/clp/streaming_compression/passthrough/Compressor.hpp" #include "../src/clp/streaming_compression/passthrough/Decompressor.hpp" #include "../src/clp/streaming_compression/zstd/Compressor.hpp" @@ -56,6 +57,11 @@ TEST_CASE("StreamingCompression", "[StreamingCompression]") { decompressor = std::make_unique(); } + SECTION("LZMA compression") { + compressor = std::make_unique(); + decompressor = std::make_unique(); + } + // Initialize buffers Array uncompressed_buffer{cBufferSize}; for (size_t i{0}; i < cBufferSize; ++i) { From b94ca2695d4ebaf7c79a8ac3e31b94eae1e52e16 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 26 Nov 2024 03:21:38 -0500 Subject: [PATCH 03/35] Refactor lzma compressor to group common functionalities into helplers --- .../streaming_compression/lzma/Compressor.cpp | 210 ++++++------------ .../streaming_compression/lzma/Compressor.hpp | 56 ++--- 2 files changed, 84 insertions(+), 182 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 7bb13e5d3..74a59ebca 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -1,38 +1,40 @@ #include "Compressor.hpp" -#include +#include +#include +#include +#include -// Compression libraries #include -#include +#include -// Project headers -#include "../../Defs.h" +#include "../../ErrorCode.hpp" +#include "../../FileWriter.hpp" +#include "../../TraceableException.hpp" +#include "../../type_utils.hpp" +#include "Constants.hpp" namespace clp::streaming_compression::lzma { -Compressor::LzmaOption Compressor::m_option; - -Compressor::Compressor() { - memset(m_compression_stream.get(), 0, sizeof(LzmaStream)); -} +using clp::size_checked_pointer_cast; -void Compressor::init_lzma_encoder(LzmaStream* strm) { - lzma_options_lzma options; - if (lzma_lzma_preset(&options, m_option.get_compression_level())) { +auto Compressor::init_lzma_encoder(LzmaStream* strm, int compression_level, size_t dict_size) + -> void { + LzmaOptionsLzma options; + if (0 != lzma_lzma_preset(&options, compression_level)) { SPDLOG_ERROR("Failed to initialize LZMA options."); throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } - options.dict_size = m_option.get_dict_size(); - lzma_filter filters[2]{ - {LZMA_FILTER_LZMA2, &options}, - {LZMA_VLI_UNKNOWN, nullptr}, - }; + options.dict_size = dict_size; + std::array filters{{ + {.id = LZMA_FILTER_LZMA2, .options = &options}, + {.id = LZMA_VLI_UNKNOWN, .options = nullptr}, + }}; // Initialize the encoder using a preset. Set the integrity to check // to CRC64, which is the default in the xz command line tool. If // the .xz file needs to be decompressed with XZ Embedded, use // LZMA_CHECK_CRC32 instead. - auto const ret = lzma_stream_encoder(strm, filters, LZMA_CHECK_CRC64); + auto const ret{lzma_stream_encoder(strm, filters.data(), LZMA_CHECK_CRC64)}; // Return successfully if the initialization went fine. if (LZMA_OK == ret) { @@ -43,7 +45,7 @@ void Compressor::init_lzma_encoder(LzmaStream* strm) { // lzma/container.h (src/liblzma/api/lzma/container.h in the source // package or e.g. /usr/include/lzma/container.h depending on the // install prefix). - char const* msg; + char const* msg{nullptr}; switch (ret) { case LZMA_MEM_ERROR: msg = "Memory allocation failed"; @@ -68,23 +70,21 @@ void Compressor::init_lzma_encoder(LzmaStream* strm) { break; } - SPDLOG_ERROR("Error initializing the encoder: {} (error code {})", msg, int(ret)); + SPDLOG_ERROR("Error initializing the encoder: {} (error code {})", msg, static_cast(ret)); throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } -void Compressor::open(FileWriter& file_writer, int compression_level) { +auto Compressor::open(FileWriter& file_writer, int compression_level) -> void { if (nullptr != m_compressed_stream_file_writer) { throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); } - if (false == (0 <= compression_level && compression_level <= 9)) { + if (compression_level < cMinCompressionLevel || compression_level > cMaxCompressionLevel) { throw OperationFailed(ErrorCode_Unsupported, __FILENAME__, __LINE__); } - if (compression_level != m_option.get_compression_level()) { - m_option.set_compression_level(compression_level); - } - init_lzma_encoder(m_compression_stream.get()); + memset(m_compression_stream.get(), 0, sizeof(LzmaStream)); + init_lzma_encoder(m_compression_stream.get(), compression_level, m_dict_size); // Setup compressed stream parameters m_compression_stream->next_in = nullptr; m_compression_stream->avail_in = 0; @@ -96,7 +96,7 @@ void Compressor::open(FileWriter& file_writer, int compression_level) { m_uncompressed_stream_pos = 0; } -void Compressor::close() { +auto Compressor::close() -> void { if (nullptr == m_compressed_stream_file_writer) { throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); } @@ -105,7 +105,7 @@ void Compressor::close() { m_compressed_stream_file_writer = nullptr; } -void Compressor::write(char const* data, size_t data_length) { +auto Compressor::write(char const* data, size_t data_length) -> void { if (nullptr == m_compressed_stream_file_writer) { throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); } @@ -114,54 +114,15 @@ void Compressor::write(char const* data, size_t data_length) { // Nothing needs to be done because we do not need to compress anything return; } + if (nullptr == data) { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } - lzma_action action = LZMA_RUN; - m_compression_stream->next_in = reinterpret_cast(const_cast(data)); - m_compression_stream->avail_in = data_length; - - // Compress all data - bool hit_input_eof = false; - while (!hit_input_eof) { - auto const return_value = lzma_code(m_compression_stream.get(), action); - switch (return_value) { - case LZMA_OK: - case LZMA_BUF_ERROR: - break; - case LZMA_STREAM_END: - hit_input_eof = true; - break; - default: - SPDLOG_ERROR("lzma() returned an unexpected value - {}.", int(return_value)); - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - } - if (0 == m_compression_stream->avail_in) { - // No more data to compress - break; - } + m_compression_stream->next_in = size_checked_pointer_cast(data); + m_compression_stream->avail_in = data_length; - // Write output buffer to file if it's full - if (0 == m_compression_stream->avail_out) { - m_compressed_stream_file_writer->write( - reinterpret_cast(m_compressed_stream_block_buffer.data()), - cCompressedStreamBlockBufferSize - ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); - m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; - } - } - - // Write any compressed data - if (m_compression_stream->avail_out < cCompressedStreamBlockBufferSize) { - m_compressed_stream_file_writer->write( - reinterpret_cast(m_compressed_stream_block_buffer.data()), - cCompressedStreamBlockBufferSize - m_compression_stream->avail_out - ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); - m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; - } + run_lzma(LZMA_RUN); m_compression_stream->next_in = nullptr; @@ -169,7 +130,7 @@ void Compressor::write(char const* data, size_t data_length) { m_uncompressed_stream_pos += data_length; } -void Compressor::flush() { +auto Compressor::flush() -> void { if (false == m_compression_stream_contains_data) { return; } @@ -184,51 +145,11 @@ void Compressor::flush() { // restart from this point if the previous compressed data has been damaged Z_FINISH - // Pending output flushed and deflate returns Z_STREAM_END if there was enough output space, // or Z_OK or Z_BUF_ERROR if it needs to be called again with more space - // - - bool flush_complete = false; - while (true) { - auto const return_value = lzma_code(m_compression_stream.get(), LZMA_SYNC_FLUSH); - switch (return_value) { - case LZMA_STREAM_END: - flush_complete = true; - break; - case LZMA_OK: - case LZMA_BUF_ERROR: - break; - default: - SPDLOG_ERROR("lzma() returned an unexpected value - {}.", int(return_value)); - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - } - if (flush_complete) { - break; - } - - // Write output buffer to file if it's full - if (0 == m_compression_stream->avail_out) { - m_compressed_stream_file_writer->write( - reinterpret_cast(m_compressed_stream_block_buffer.data()), - cCompressedStreamBlockBufferSize - ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); - m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; - } - } - - // Write any compressed data - if (m_compression_stream->avail_out < cCompressedStreamBlockBufferSize) { - m_compressed_stream_file_writer->write( - reinterpret_cast(m_compressed_stream_block_buffer.data()), - cCompressedStreamBlockBufferSize - m_compression_stream->avail_out - ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); - m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; - } - + run_lzma(LZMA_SYNC_FLUSH); m_compression_stream_contains_data = false; } -ErrorCode Compressor::try_get_pos(size_t& pos) const { +auto Compressor::try_get_pos(size_t& pos) const -> ErrorCode { if (nullptr == m_compressed_stream_file_writer) { return ErrorCode_NotInit; } @@ -237,55 +158,64 @@ ErrorCode Compressor::try_get_pos(size_t& pos) const { return ErrorCode_Success; } -void Compressor::flush_and_close_compression_stream() { +auto Compressor::flush_and_close_compression_stream() -> void { if (nullptr == m_compressed_stream_file_writer) { throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); } - bool flush_complete = false; + run_lzma(LZMA_FINISH); + + m_compression_stream_contains_data = false; + + lzma_end(m_compression_stream.get()); + m_compression_stream->avail_out = 0; + m_compression_stream->next_out = nullptr; +} + +auto Compressor::run_lzma(LzmaAction action) -> void { + // Compress all data + bool hit_input_eof{false}; while (true) { - lzma_ret return_value = lzma_code(m_compression_stream.get(), LZMA_FINISH); - switch (return_value) { + auto const rc = lzma_code(m_compression_stream.get(), action); + switch (rc) { case LZMA_OK: case LZMA_BUF_ERROR: break; case LZMA_STREAM_END: - flush_complete = true; + hit_input_eof = true; break; default: - // SPDLOG_ERROR("deflate() returned an unexpected value - - // {}.", return_value); + SPDLOG_ERROR("lzma() returned an unexpected value - {}.", static_cast(rc)); throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } - if (flush_complete) { + + if (LZMA_RUN == action && 0 == m_compression_stream->avail_in) { + // No more data to compress + break; + } + + if (hit_input_eof) { break; } // Write output buffer to file if it's full if (0 == m_compression_stream->avail_out) { - m_compressed_stream_file_writer->write( - reinterpret_cast(m_compressed_stream_block_buffer.data()), - cCompressedStreamBlockBufferSize - ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); - m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; + write_data(); } } // Write any compressed data if (m_compression_stream->avail_out < cCompressedStreamBlockBufferSize) { - m_compressed_stream_file_writer->write( - reinterpret_cast(m_compressed_stream_block_buffer.data()), - cCompressedStreamBlockBufferSize - m_compression_stream->avail_out - ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); - m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; + write_data(); } +} - m_compression_stream_contains_data = false; - - lzma_end(m_compression_stream.get()); - m_compression_stream->avail_out = 0; - m_compression_stream->next_out = nullptr; +auto Compressor::write_data() -> void { + m_compressed_stream_file_writer->write( + size_checked_pointer_cast(m_compressed_stream_block_buffer.data()), + cCompressedStreamBlockBufferSize - m_compression_stream->avail_out + ); + m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); + m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; } } // namespace clp::streaming_compression::lzma diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index 53f82b139..f6c6b4963 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -1,12 +1,12 @@ #ifndef CLP_STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP #define CLP_STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP -#include +#include + #include #include #include -#include #include "../../Array.hpp" #include "../../ErrorCode.hpp" @@ -31,35 +31,8 @@ class Compressor : public ::clp::streaming_compression::Compressor { } }; - class LzmaOption { - public: - LzmaOption() - : m_compression_level{cDefaultCompressionLevel}, - m_dict_size{cDefaultDictionarySize} {} - - auto set_compression_level(int compression_level) -> void { - if (compression_level < cMinCompressionLevel) { - m_compression_level = cMinCompressionLevel; - } else if (compression_level > cMaxCompressionLevel) { - m_compression_level = cMaxCompressionLevel; - } else { - m_compression_level = compression_level; - } - } - - auto set_dict_size(uint32_t dict_size) -> void { m_dict_size = dict_size; } - - [[nodiscard]] auto get_compression_level() const -> int { return m_compression_level; } - - [[nodiscard]] auto get_dict_size() const -> uint32_t { return m_dict_size; } - - private: - int m_compression_level; - uint32_t m_dict_size; - }; - // Constructor - Compressor(); + Compressor() = default; // Destructor ~Compressor() override = default; @@ -114,24 +87,22 @@ class Compressor : public ::clp::streaming_compression::Compressor { */ auto open(FileWriter& file_writer, int compression_level) -> void; - // Methods - static auto set_compression_level(int compression_level) -> void { - m_option.set_compression_level(compression_level); - } - - static auto set_dict_size(uint32_t dict_size) -> void { m_option.set_dict_size(dict_size); } - private: + using LzmaAction = lzma_action; + using LzmaFilter = lzma_filter; + using LzmaOptionsLzma = lzma_options_lzma; using LzmaStream = lzma_stream; + static auto + init_lzma_encoder(LzmaStream* strm, int compression_level, size_t dict_size) -> void; + static constexpr size_t cCompressedStreamBlockBufferSize{4096}; // 4KiB + /** * Flushes the stream and closes it */ - void flush_and_close_compression_stream(); - - static void init_lzma_encoder(LzmaStream* strm); - static LzmaOption m_option; - static constexpr size_t cCompressedStreamBlockBufferSize{4096}; // 4KiB + auto flush_and_close_compression_stream() -> void; + auto write_data() -> void; + auto run_lzma(lzma_action action) -> void; // Variables FileWriter* m_compressed_stream_file_writer{nullptr}; @@ -139,6 +110,7 @@ class Compressor : public ::clp::streaming_compression::Compressor { // Compressed stream variables std::unique_ptr m_compression_stream{std::make_unique()}; bool m_compression_stream_contains_data{false}; + size_t m_dict_size{cDefaultDictionarySize}; Array m_compressed_stream_block_buffer{cCompressedStreamBlockBufferSize}; From 707c41219a5e2ad91ccbf01b91df973e9856ef6d Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 27 Nov 2024 01:14:54 -0500 Subject: [PATCH 04/35] Improve comments --- .../streaming_compression/lzma/Compressor.cpp | 35 ++++++++----------- .../streaming_compression/lzma/Compressor.hpp | 25 +++++++++++-- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 74a59ebca..6f6b5b4cf 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -21,7 +21,7 @@ auto Compressor::init_lzma_encoder(LzmaStream* strm, int compression_level, size -> void { LzmaOptionsLzma options; if (0 != lzma_lzma_preset(&options, compression_level)) { - SPDLOG_ERROR("Failed to initialize LZMA options."); + SPDLOG_ERROR("Failed to initialize LZMA options' compression level."); throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } options.dict_size = dict_size; @@ -122,7 +122,9 @@ auto Compressor::write(char const* data, size_t data_length) -> void { m_compression_stream->next_in = size_checked_pointer_cast(data); m_compression_stream->avail_in = data_length; - run_lzma(LZMA_RUN); + // Normal compression encoding workflow. Continue until the input buffer is + // exhausted. + compress(LZMA_RUN); m_compression_stream->next_in = nullptr; @@ -134,18 +136,9 @@ auto Compressor::flush() -> void { if (false == m_compression_stream_contains_data) { return; } - // Z_NO_FLUSH - deflate decides how much data to accumulate before producing output - // Z_SYNC_FLUSH - All pending output flushed to output buf and output aligned to byte - // boundary (completes current block and follows it with empty block that is 3 bits plus - // filler to next byte, followed by 4 bytes Z_PARTIAL_FLUSH - Same as Z_SYNC_FLUSH but - // output not aligned to byte boundary (completes current block and follows it with empty - // fixed codes block that is 10 bits long) Z_BLOCK - Same as Z_SYNC_FLUSH but output not - // aligned on a byte boundary and up to 7 bits of current block held to be written - // Z_FULL_FLUSH - Same as Z_SYNC_FLUSH but compression state reset so that decompression can - // restart from this point if the previous compressed data has been damaged Z_FINISH - - // Pending output flushed and deflate returns Z_STREAM_END if there was enough output space, - // or Z_OK or Z_BUF_ERROR if it needs to be called again with more space - run_lzma(LZMA_SYNC_FLUSH); + + // Forces all the buffered data to be available at output + compress(LZMA_SYNC_FLUSH); m_compression_stream_contains_data = false; } @@ -163,7 +156,8 @@ auto Compressor::flush_and_close_compression_stream() -> void { throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); } - run_lzma(LZMA_FINISH); + // Same as flush but all the input data must have been given to the encoder + compress(LZMA_FINISH); m_compression_stream_contains_data = false; @@ -172,8 +166,7 @@ auto Compressor::flush_and_close_compression_stream() -> void { m_compression_stream->next_out = nullptr; } -auto Compressor::run_lzma(LzmaAction action) -> void { - // Compress all data +auto Compressor::compress(LzmaAction action) -> void { bool hit_input_eof{false}; while (true) { auto const rc = lzma_code(m_compression_stream.get(), action); @@ -200,17 +193,17 @@ auto Compressor::run_lzma(LzmaAction action) -> void { // Write output buffer to file if it's full if (0 == m_compression_stream->avail_out) { - write_data(); + pipe_data(); } } - // Write any compressed data + // Write remaining compressed data if (m_compression_stream->avail_out < cCompressedStreamBlockBufferSize) { - write_data(); + pipe_data(); } } -auto Compressor::write_data() -> void { +auto Compressor::pipe_data() -> void { m_compressed_stream_file_writer->write( size_checked_pointer_cast(m_compressed_stream_block_buffer.data()), cCompressedStreamBlockBufferSize - m_compression_stream->avail_out diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index f6c6b4963..03f32a186 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -93,6 +93,13 @@ class Compressor : public ::clp::streaming_compression::Compressor { using LzmaOptionsLzma = lzma_options_lzma; using LzmaStream = lzma_stream; + /** + * Initialize the Lzma compression stream + * @param strm A pre-allocated `lzma_stream` object + * @param compression_level + * @param dict_size Dictionary size that indicates how many bytes of the + * recently processed uncompressed data is kept in memory + */ static auto init_lzma_encoder(LzmaStream* strm, int compression_level, size_t dict_size) -> void; static constexpr size_t cCompressedStreamBlockBufferSize{4096}; // 4KiB @@ -101,8 +108,22 @@ class Compressor : public ::clp::streaming_compression::Compressor { * Flushes the stream and closes it */ auto flush_and_close_compression_stream() -> void; - auto write_data() -> void; - auto run_lzma(lzma_action action) -> void; + + /** + * Repeatedly invoke lzma_code() compression workflow until LZMA_STREAM_END + * is reached. + * The workflow action needs to be kept the same throughout this process. + * See also: https://github.com/frida/xz/blob/main/src/liblzma/api/lzma/base.h#L246 + * + * @param action + */ + auto compress(lzma_action action) -> void; + + /** + * Pipes the current compressed data in the lzma buffer to the output file + * and reset the compression buffer to receive new data. + */ + auto pipe_data() -> void; // Variables FileWriter* m_compressed_stream_file_writer{nullptr}; From 6d1ab8fa907632a9af6001f9075404fc09708633 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 27 Nov 2024 11:08:09 -0500 Subject: [PATCH 05/35] Fix reference link --- .../core/src/clp/streaming_compression/lzma/Compressor.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index 03f32a186..80052e50c 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -113,7 +113,7 @@ class Compressor : public ::clp::streaming_compression::Compressor { * Repeatedly invoke lzma_code() compression workflow until LZMA_STREAM_END * is reached. * The workflow action needs to be kept the same throughout this process. - * See also: https://github.com/frida/xz/blob/main/src/liblzma/api/lzma/base.h#L246 + * See also: https://github.com/tukaani-project/xz/blob/master/src/liblzma/api/lzma/base.h#L274 * * @param action */ From 89b57074a7851d66310bb32b3031da566a3902f4 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 27 Nov 2024 11:43:56 -0500 Subject: [PATCH 06/35] Add install for CentOS --- .../lib_install/centos-stream-9/install-prebuilt-packages.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/components/core/tools/scripts/lib_install/centos-stream-9/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/centos-stream-9/install-prebuilt-packages.sh index e90f54733..eede5e004 100755 --- a/components/core/tools/scripts/lib_install/centos-stream-9/install-prebuilt-packages.sh +++ b/components/core/tools/scripts/lib_install/centos-stream-9/install-prebuilt-packages.sh @@ -16,4 +16,5 @@ dnf install -y \ libzstd-devel \ make \ mariadb-connector-c-devel \ - openssl-devel + openssl-devel \ + xz-devel From c646cea6325763dbebc23c790d87445a7c0c8ecd Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 27 Nov 2024 14:04:31 -0500 Subject: [PATCH 07/35] Apply coderabbit suggestions --- components/core/CMakeLists.txt | 6 +-- .../streaming_compression/lzma/Compressor.cpp | 44 +++++++++---------- .../streaming_compression/lzma/Compressor.hpp | 6 +-- .../core/tools/scripts/lib_install/liblzma.sh | 24 +++++----- 4 files changed, 39 insertions(+), 41 deletions(-) diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 92bb6af19..56156c131 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -232,13 +232,13 @@ endif() # TODO: add a script in ./cmake/Modules to resolve .a vs. .so find_package(LibLZMA REQUIRED) if(LIBLZMA_FOUND) - message(STATUS "Found LIBLZMA_FOUND ${LIBLZMA_VERSION_STRING}") + message(STATUS "Found Lzma ${LIBLZMA_VERSION_STRING}") message(STATUS "Lzma library location: ${LIBLZMA_LIBRARIES}") else() - message(FATAL_ERROR "Could not find ${CLP_LIBS_STRING} libraries for LIBLZMA_FOUND") + message(FATAL_ERROR "Could not find ${CLP_LIBS_STRING} libraries for Lzma") endif() include_directories(${LIBLZMA_INCLUDE_DIRS}) -message("LZMA Include Dir: ${LIBLZMA_INCLUDE_DIRS}") +message("Lzma Include Dir: ${LIBLZMA_INCLUDE_DIRS}") # sqlite dependencies set(sqlite_DYNAMIC_LIBS "dl;m;pthread") diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 6f6b5b4cf..c7b46cd6c 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -83,13 +83,13 @@ auto Compressor::open(FileWriter& file_writer, int compression_level) -> void { throw OperationFailed(ErrorCode_Unsupported, __FILENAME__, __LINE__); } - memset(m_compression_stream.get(), 0, sizeof(LzmaStream)); - init_lzma_encoder(m_compression_stream.get(), compression_level, m_dict_size); + m_compression_stream = LZMA_STREAM_INIT; + init_lzma_encoder(&m_compression_stream, compression_level, m_dict_size); // Setup compressed stream parameters - m_compression_stream->next_in = nullptr; - m_compression_stream->avail_in = 0; - m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); - m_compression_stream->avail_out = m_compressed_stream_block_buffer.size(); + m_compression_stream.next_in = nullptr; + m_compression_stream.avail_in = 0; + m_compression_stream.next_out = m_compressed_stream_block_buffer.data(); + m_compression_stream.avail_out = m_compressed_stream_block_buffer.size(); m_compressed_stream_file_writer = &file_writer; @@ -119,14 +119,14 @@ auto Compressor::write(char const* data, size_t data_length) -> void { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } - m_compression_stream->next_in = size_checked_pointer_cast(data); - m_compression_stream->avail_in = data_length; + m_compression_stream.next_in = size_checked_pointer_cast(data); + m_compression_stream.avail_in = data_length; // Normal compression encoding workflow. Continue until the input buffer is // exhausted. compress(LZMA_RUN); - m_compression_stream->next_in = nullptr; + m_compression_stream.next_in = nullptr; m_compression_stream_contains_data = true; m_uncompressed_stream_pos += data_length; @@ -161,44 +161,44 @@ auto Compressor::flush_and_close_compression_stream() -> void { m_compression_stream_contains_data = false; - lzma_end(m_compression_stream.get()); - m_compression_stream->avail_out = 0; - m_compression_stream->next_out = nullptr; + lzma_end(&m_compression_stream); + m_compression_stream.avail_out = 0; + m_compression_stream.next_out = nullptr; } auto Compressor::compress(LzmaAction action) -> void { - bool hit_input_eof{false}; + bool hit_stream_end{false}; while (true) { - auto const rc = lzma_code(m_compression_stream.get(), action); + auto const rc = lzma_code(&m_compression_stream, action); switch (rc) { case LZMA_OK: case LZMA_BUF_ERROR: break; case LZMA_STREAM_END: - hit_input_eof = true; + hit_stream_end = true; break; default: SPDLOG_ERROR("lzma() returned an unexpected value - {}.", static_cast(rc)); throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } - if (LZMA_RUN == action && 0 == m_compression_stream->avail_in) { + if (LZMA_RUN == action && 0 == m_compression_stream.avail_in) { // No more data to compress break; } - if (hit_input_eof) { + if (hit_stream_end) { break; } // Write output buffer to file if it's full - if (0 == m_compression_stream->avail_out) { + if (0 == m_compression_stream.avail_out) { pipe_data(); } } // Write remaining compressed data - if (m_compression_stream->avail_out < cCompressedStreamBlockBufferSize) { + if (m_compression_stream.avail_out < cCompressedStreamBlockBufferSize) { pipe_data(); } } @@ -206,9 +206,9 @@ auto Compressor::compress(LzmaAction action) -> void { auto Compressor::pipe_data() -> void { m_compressed_stream_file_writer->write( size_checked_pointer_cast(m_compressed_stream_block_buffer.data()), - cCompressedStreamBlockBufferSize - m_compression_stream->avail_out + cCompressedStreamBlockBufferSize - m_compression_stream.avail_out ); - m_compression_stream->next_out = m_compressed_stream_block_buffer.data(); - m_compression_stream->avail_out = cCompressedStreamBlockBufferSize; + m_compression_stream.next_out = m_compressed_stream_block_buffer.data(); + m_compression_stream.avail_out = cCompressedStreamBlockBufferSize; } } // namespace clp::streaming_compression::lzma diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index 80052e50c..d10810e88 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -1,8 +1,6 @@ #ifndef CLP_STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP #define CLP_STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP -#include - #include #include @@ -129,11 +127,11 @@ class Compressor : public ::clp::streaming_compression::Compressor { FileWriter* m_compressed_stream_file_writer{nullptr}; // Compressed stream variables - std::unique_ptr m_compression_stream{std::make_unique()}; + LzmaStream m_compression_stream; bool m_compression_stream_contains_data{false}; size_t m_dict_size{cDefaultDictionarySize}; - Array m_compressed_stream_block_buffer{cCompressedStreamBlockBufferSize}; + Array m_compressed_stream_block_buffer{cCompressedStreamBlockBufferSize}; size_t m_uncompressed_stream_pos{0}; }; diff --git a/components/core/tools/scripts/lib_install/liblzma.sh b/components/core/tools/scripts/lib_install/liblzma.sh index 1145b2646..28766eced 100755 --- a/components/core/tools/scripts/lib_install/liblzma.sh +++ b/components/core/tools/scripts/lib_install/liblzma.sh @@ -1,16 +1,23 @@ #!/bin/bash +# Exit on any error +set -e + +# Error on undefined variable +set -u + # Dependencies: # - curl # - make # - gcc # NOTE: Dependencies should be installed outside the script to allow the script to be largely distro-agnostic -# Exit on any error -set -e - -# Error on undefined variable -set -u +for cmd in curl make gcc; do + if ! $cmd --version >/dev/null 2>&1; then + echo "Error: Required dependency '$cmd' not found" + exit 1 + fi +done cUsage="Usage: ${BASH_SOURCE[0]} [ <.deb output directory>]" if [ "$#" -lt 1 ] ; then @@ -32,13 +39,6 @@ fi # Note: we won't check if the package already exists -echo "Checking for elevated privileges..." -privileged_command_prefix="" -if [ ${EUID:-$(id -u)} -ne 0 ] ; then - sudo echo "Script can elevate privileges." - privileged_command_prefix="${privileged_command_prefix} sudo" -fi - # Get number of cpu cores num_cpus=$(grep -c ^processor /proc/cpuinfo) From c91e5fb90752c0d89190b88ce45cafeab4e163a6 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 27 Nov 2024 14:21:10 -0500 Subject: [PATCH 08/35] Remove decompressor related files --- components/core/CMakeLists.txt | 2 - .../lzma/Decompressor.cpp | 362 ------------------ .../lzma/Decompressor.hpp | 163 -------- .../core/tests/test-StreamingCompression.cpp | 7 +- 4 files changed, 5 insertions(+), 529 deletions(-) delete mode 100644 components/core/src/clp/streaming_compression/lzma/Decompressor.cpp delete mode 100644 components/core/src/clp/streaming_compression/lzma/Decompressor.hpp diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 56156c131..312c6e2ef 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -480,8 +480,6 @@ set(SOURCE_FILES_unitTest src/clp/streaming_compression/Decompressor.hpp src/clp/streaming_compression/lzma/Compressor.cpp src/clp/streaming_compression/lzma/Compressor.hpp - src/clp/streaming_compression/lzma/Decompressor.cpp - src/clp/streaming_compression/lzma/Decompressor.hpp src/clp/streaming_compression/lzma/Constants.hpp src/clp/streaming_compression/passthrough/Compressor.cpp src/clp/streaming_compression/passthrough/Compressor.hpp diff --git a/components/core/src/clp/streaming_compression/lzma/Decompressor.cpp b/components/core/src/clp/streaming_compression/lzma/Decompressor.cpp deleted file mode 100644 index b6a10b418..000000000 --- a/components/core/src/clp/streaming_compression/lzma/Decompressor.cpp +++ /dev/null @@ -1,362 +0,0 @@ -#include "Decompressor.hpp" - -// C++ Standard Libraries -#include - -// Boost libraries -#include - -// spdlog -#include - -// Project headers -#include "../../Defs.h" - -namespace clp::streaming_compression::lzma { -Decompressor::Decompressor() - : ::clp::streaming_compression::Decompressor(CompressorType::LZMA), - m_input_type(InputType::NotInitialized), - m_decompression_stream(nullptr), - m_file_reader(nullptr), - m_file_reader_initial_pos(0), - m_file_read_buffer_length(0), - m_file_read_buffer_capacity(0), - m_decompressed_stream_pos(0), - m_unused_decompressed_stream_block_size(0) { - // Create block to hold unused decompressed data - m_unused_decompressed_stream_block_buffer - = std::make_unique(m_unused_decompressed_stream_block_size); - m_decompression_stream = new lzma_stream; - memset(m_decompression_stream, 0, sizeof(lzma_stream)); -} - -Decompressor::~Decompressor() { - delete m_decompression_stream; -} - -void Decompressor::exact_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) { - auto errorcode = try_read(buf, num_bytes_to_read, num_bytes_read); - if (num_bytes_read != num_bytes_to_read) { - SPDLOG_ERROR("FAILED TO READ EXACTLY {} bytes", num_bytes_to_read); - throw; - } - if (errorcode != ErrorCode_Success) { - SPDLOG_ERROR("FAILED TO READ EXACTLY {} bytes", num_bytes_to_read); - throw; - } -} - -ErrorCode Decompressor::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) { - if (InputType::NotInitialized == m_input_type) { - return ErrorCode_NotInit; - } - if (nullptr == buf) { - return ErrorCode_BadParam; - } - if (0 == num_bytes_to_read) { - return ErrorCode_Success; - } - - num_bytes_read = 0; - - m_decompression_stream->next_out = reinterpret_cast(buf); - m_decompression_stream->avail_out = num_bytes_to_read; - while (true) { - // Check if there's data that can be decompressed - if (0 == m_decompression_stream->avail_in) { - if (InputType::File != m_input_type) { - // if we hit here, there must be something wrong - // we have consumed all data buffer but for some reason it still requires more. - return ErrorCode_EndOfFile; - } else { - auto error_code = m_file_reader->try_read( - m_file_read_buffer.get(), - m_file_read_buffer_capacity, - m_file_read_buffer_length - ); - m_decompression_stream->avail_in = m_file_read_buffer_length; - m_decompression_stream->next_in - = reinterpret_cast(m_file_read_buffer.get()); - if (ErrorCode_Success != error_code) { - if (ErrorCode_EndOfFile == error_code) { - num_bytes_read = num_bytes_to_read - m_decompression_stream->avail_out; - m_decompressed_stream_pos += num_bytes_read; - return ErrorCode_EndOfFile; - } - } - } - } - - lzma_ret return_value = lzma_code(m_decompression_stream, LZMA_RUN); - switch (return_value) { - case LZMA_OK: - case LZMA_BUF_ERROR: - if (0 == m_decompression_stream->avail_out) { - m_decompression_stream->next_out = nullptr; - num_bytes_read = num_bytes_to_read; - m_decompressed_stream_pos += num_bytes_read; - return ErrorCode_Success; - } - // by breaking here, enter the next iteration of decompressing - break; - case LZMA_STREAM_END: - if (0 == m_decompression_stream->avail_out) { - m_decompression_stream->next_out = nullptr; - num_bytes_read = num_bytes_to_read; - m_decompressed_stream_pos += num_bytes_read; - return ErrorCode_Success; - } - SPDLOG_ERROR("streaming_compression::lzma::Decompressor wants to read more but " - "reached end of file"); - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - case LZMA_MEM_ERROR: - SPDLOG_ERROR("streaming_compression::lzma::Decompressor inflate() ran out of memory" - ); - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - default: - SPDLOG_ERROR("inflate() returned an unexpected value - {}.", int(return_value)); - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - } - } -} - -ErrorCode Decompressor::try_seek_from_begin(size_t pos) { - if (InputType::NotInitialized == m_input_type) { - throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); - } - - // Check if we've already decompressed passed the desired position - if (m_decompressed_stream_pos > pos) { - // ZStd has no way for us to seek back to the desired position, so just reset the stream - // to the beginning - reset_stream(); - } - - // We need to fast-forward the decompression stream to decompressed_stream_pos - ErrorCode error; - while (m_decompressed_stream_pos < pos) { - size_t num_bytes_to_decompress = std::min( - m_unused_decompressed_stream_block_size, - pos - m_decompressed_stream_pos - ); - error = try_read_exact_length( - m_unused_decompressed_stream_block_buffer.get(), - num_bytes_to_decompress - ); - if (ErrorCode_Success != error) { - return error; - } - } - - return ErrorCode_Success; -} - -ErrorCode Decompressor::try_get_pos(size_t& pos) { - if (InputType::NotInitialized == m_input_type) { - return ErrorCode_NotInit; - } - - pos = m_decompressed_stream_pos; - return ErrorCode_Success; -} - -void Decompressor::close() { - if (InputType::NotInitialized == m_input_type) { - return; - } - lzma_end(m_decompression_stream); - m_decompression_stream->avail_out = 0; - m_decompression_stream->next_out = nullptr; - if (InputType::MemoryMappedCompressedFile == m_input_type) { - if (m_memory_mapped_compressed_file.is_open()) { - // An existing file is memory mapped by the decompressor - m_memory_mapped_compressed_file.close(); - } - } else if (InputType::File == m_input_type) { - m_file_read_buffer.reset(); - m_file_read_buffer_capacity = 0; - m_file_read_buffer_length = 0; - m_file_reader = nullptr; - } - m_input_type = InputType::NotInitialized; -} - -void Decompressor::init_decoder(lzma_stream* strm) { - // Initialize a .xz decoder. The decoder supports a memory usage limit - // and a set of flags. - // - // The memory usage of the decompressor depends on the settings used - // to compress a .xz file. It can vary from less than a megabyte to - // a few gigabytes, but in practice (at least for now) it rarely - // exceeds 65 MiB because that's how much memory is required to - // decompress files created with "xz -9". Settings requiring more - // memory take extra effort to use and don't (at least for now) - // provide significantly better compression in most cases. - // - // Memory usage limit is useful if it is important that the - // decompressor won't consume gigabytes of memory. The need - // for limiting depends on the application. In this example, - // no memory usage limiting is used. This is done by setting - // the limit to UINT64_MAX. - // - // The .xz format allows concatenating compressed files as is: - // - // echo foo | xz > foobar.xz - // echo bar | xz >> foobar.xz - // - // When decompressing normal standalone .xz files, LZMA_CONCATENATED - // should always be used to support decompression of concatenated - // .xz files. If LZMA_CONCATENATED isn't used, the decoder will stop - // after the first .xz stream. This can be useful when .xz data has - // been embedded inside another file format. - // - // Flags other than LZMA_CONCATENATED are supported too, and can - // be combined with bitwise-or. See lzma/container.h - // (src/liblzma/api/lzma/container.h in the source package or e.g. - // /usr/include/lzma/container.h depending on the install prefix) - // for details. - lzma_ret ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED); - - // Return successfully if the initialization went fine. - if (ret == LZMA_OK) { - return; - } - - // Something went wrong. The possible errors are documented in - // lzma/container.h (src/liblzma/api/lzma/container.h in the source - // package or e.g. /usr/include/lzma/container.h depending on the - // install prefix). - // - // Note that LZMA_MEMLIMIT_ERROR is never possible here. If you - // specify a very tiny limit, the error will be delayed until - // the first headers have been parsed by a call to lzma_code(). - char const* msg; - switch (ret) { - case LZMA_MEM_ERROR: - msg = "Memory allocation failed"; - break; - - case LZMA_OPTIONS_ERROR: - msg = "Unsupported decompressor flags"; - break; - - default: - // This is most likely LZMA_PROG_ERROR indicating a bug in - // this program or in liblzma. It is inconvenient to have a - // separate error message for errors that should be impossible - // to occur, but knowing the error code is important for - // debugging. That's why it is good to print the error code - // at least when there is no good error message to show. - msg = "Unknown error, possibly a bug"; - break; - } - - SPDLOG_ERROR("Error initializing the decoder: {} (error code {})", msg, int(ret)); -} - -void Decompressor::open(char const* compressed_data_buf, size_t compressed_data_buf_size) { - if (InputType::NotInitialized != m_input_type) { - throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); - } - m_input_type = InputType::CompressedDataBuf; - - // Configure input stream - reset_stream(); - m_decompression_stream->next_in - = reinterpret_cast(const_cast(compressed_data_buf)); - m_decompression_stream->avail_in = compressed_data_buf_size; - m_decompression_stream->next_out = nullptr; - m_decompression_stream->avail_out = 0; -} - -ErrorCode Decompressor::open(std::string const& compressed_file_path) { - if (InputType::NotInitialized != m_input_type) { - throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); - } - m_input_type = InputType::MemoryMappedCompressedFile; - - // Create memory mapping for compressed_file_path, use boost read only memory mapped file - boost::system::error_code boost_error_code; - size_t compressed_file_size - = boost::filesystem::file_size(compressed_file_path, boost_error_code); - if (boost_error_code) { - SPDLOG_ERROR( - "streaming_compression::zstd::Decompressor: Unable to obtain file size for " - "'{}' - {}.", - compressed_file_path.c_str(), - boost_error_code.message().c_str() - ); - return ErrorCode_Failure; - } - - boost::iostreams::mapped_file_params memory_map_params; - memory_map_params.path = compressed_file_path; - memory_map_params.flags = boost::iostreams::mapped_file::readonly; - memory_map_params.length = compressed_file_size; - memory_map_params.hint = m_memory_mapped_compressed_file.data( - ); // Try to map it to the same memory location as previous memory mapped file - m_memory_mapped_compressed_file.open(memory_map_params); - if (!m_memory_mapped_compressed_file.is_open()) { - SPDLOG_ERROR( - "streaming_compression::lzma::Decompressor: Unable to memory map the " - "compressed file with path: {}", - compressed_file_path.c_str() - ); - return ErrorCode_Failure; - } - - // Configure input stream - reset_stream(); - m_decompression_stream->next_in - = reinterpret_cast(const_cast(m_memory_mapped_compressed_file.data())); - m_decompression_stream->avail_in = compressed_file_size; - m_decompression_stream->next_out = nullptr; - m_decompression_stream->avail_out = 0; - - return ErrorCode_Success; -} - -void Decompressor::open(FileReader& file_reader, size_t file_read_buffer_capacity) { - if (InputType::NotInitialized != m_input_type) { - throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); - } - m_input_type = InputType::File; - - m_file_reader = &file_reader; - m_file_reader_initial_pos = m_file_reader->get_pos(); - - m_file_read_buffer_capacity = file_read_buffer_capacity; - m_file_read_buffer = std::make_unique(m_file_read_buffer_capacity); - m_file_read_buffer_length = 0; - - // Configure input stream - reset_stream(); - m_decompression_stream->next_in = reinterpret_cast(m_file_read_buffer.get()); - m_decompression_stream->avail_in = m_file_read_buffer_length; - m_decompression_stream->next_out = nullptr; - m_decompression_stream->avail_out = 0; -} - -ErrorCode Decompressor::get_decompressed_stream_region( - size_t decompressed_stream_pos, - char* extraction_buf, - size_t extraction_len -) { - auto error_code = try_seek_from_begin(decompressed_stream_pos); - if (ErrorCode_Success != error_code) { - return error_code; - } - - error_code = try_read_exact_length(extraction_buf, extraction_len); - return error_code; -} - -void Decompressor::reset_stream() { - if (InputType::File == m_input_type) { - m_file_reader->seek_from_begin(m_file_reader_initial_pos); - m_file_read_buffer_length = 0; - } - m_decompressed_stream_pos = 0; - init_decoder(m_decompression_stream); -} -} // namespace clp::streaming_compression::lzma diff --git a/components/core/src/clp/streaming_compression/lzma/Decompressor.hpp b/components/core/src/clp/streaming_compression/lzma/Decompressor.hpp deleted file mode 100644 index 5e90f5942..000000000 --- a/components/core/src/clp/streaming_compression/lzma/Decompressor.hpp +++ /dev/null @@ -1,163 +0,0 @@ -#ifndef CLP_STREAMING_COMPRESSION_LZMA_DECOMPRESSOR_HPP -#define CLP_STREAMING_COMPRESSION_LZMA_DECOMPRESSOR_HPP - -// C++ standard libraries -#include -#include - -// ZLIB library -#include - -#include -// Boost libraries -#include - -// Project headers -#include "../../FileReader.hpp" -#include "../../TraceableException.hpp" -#include "../Decompressor.hpp" - -namespace clp::streaming_compression::lzma { -class Decompressor : public ::clp::streaming_compression::Decompressor { -public: - // Types - class OperationFailed : public TraceableException { - public: - // Constructors - OperationFailed(ErrorCode error_code, char const* const filename, int line_number) - : TraceableException(error_code, filename, line_number) {} - - // Methods - char const* what() const noexcept override { - return "streaming_compression::lzma::Decompressor operation failed"; - } - }; - - // Constructor - Decompressor(); - - // Destructor - ~Decompressor(); - - // Explicitly disable copy and move constructor/assignment - Decompressor(Decompressor const&) = delete; - Decompressor& operator=(Decompressor const&) = delete; - - // Methods implementing the ReaderInterface - /** - * Tries to read up to a given number of bytes from the decompressor - * @param buf - * @param num_bytes_to_read The number of bytes to try and read - * @param num_bytes_read The actual number of bytes read - * @return Same as FileReader::try_read if the decompressor is attached to a file - * @return ErrorCode_NotInit if the decompressor is not open - * @return ErrorCode_BadParam if buf is invalid - * @return ErrorCode_EndOfFile on EOF - * @return ErrorCode_Failure on decompression failure - * @return ErrorCode_Success on success - */ - ErrorCode try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) override; - - /** - */ - void exact_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read); - - /** - * Tries to seek from the beginning to the given position - * @param pos - * @return ErrorCode_NotInit if the decompressor is not open - * @return Same as ReaderInterface::try_read_exact_length - * @return ErrorCode_Success on success - */ - ErrorCode try_seek_from_begin(size_t pos) override; - /** - * Tries to get the current position of the read head - * @param pos Position of the read head in the file - * @return ErrorCode_NotInit if the decompressor is not open - * @return ErrorCode_Success on success - */ - ErrorCode try_get_pos(size_t& pos) override; - - // Methods implementing the Decompressor interface - void close() override; - /** - * Decompresses and copies the range of uncompressed data described by - * decompressed_stream_pos and extraction_len into extraction_buf - * @param decompressed_stream_pos - * @param extraction_buf - * @param extraction_len - * @return Same as streaming_compression::zstd::Decompressor::try_seek_from_begin - * @return Same as ReaderInterface::try_read_exact_length - */ - ErrorCode get_decompressed_stream_region( - size_t decompressed_stream_pos, - char* extraction_buf, - size_t extraction_len - ) override; - - // Methods - /*** - * Initialize streaming decompressor to decompress from the specified compressed data buffer - * @param compressed_data_buf - * @param compressed_data_buf_size - */ - void open(char const* compressed_data_buf, size_t compressed_data_buf_size) override; - - /*** - * Initialize streaming decompressor to decompress from a compressed file specified by the - * given path - * @param compressed_file_path - * @param decompressed_stream_block_size - * @return ErrorCode_Failure if the provided path cannot be memory mapped - * @return ErrorCode_Success on success - */ - ErrorCode open(std::string const& compressed_file_path); - - /** - * Initializes the decompressor to decompress from an open file - * @param file_reader - * @param file_read_buffer_capacity The maximum amount of data to read from a file at a time - */ - void open(FileReader& file_reader, size_t file_read_buffer_capacity) override; - -private: - // Enum class - enum class InputType { - NotInitialized, // Note: do nothing but generate an error to prevent this required - // parameter is not initialized properly - CompressedDataBuf, - MemoryMappedCompressedFile, - File - }; - - // Methods - /** - * Reset streaming decompression state so it will start decompressing from the beginning of - * the stream afterwards - */ - void reset_stream(); - - void init_decoder(lzma_stream* strm); - - // Variables - InputType m_input_type; - - // Compressed stream variables - lzma_stream* m_decompression_stream{nullptr}; - - boost::iostreams::mapped_file_source m_memory_mapped_compressed_file; - FileReader* m_file_reader; - size_t m_file_reader_initial_pos; - std::unique_ptr m_file_read_buffer; - size_t m_file_read_buffer_length; - size_t m_file_read_buffer_capacity; - - size_t m_decompressed_stream_pos; - size_t m_unused_decompressed_stream_block_size; - std::unique_ptr m_unused_decompressed_stream_block_buffer; - - char const* m_compressed_stream_block; - size_t m_compressed_stream_block_size; -}; -} // namespace clp::streaming_compression::lzma -#endif // CLP_STREAMING_COMPRESSION_LZMA_DECOMPRESSOR_HPP diff --git a/components/core/tests/test-StreamingCompression.cpp b/components/core/tests/test-StreamingCompression.cpp index d58d4c1ce..6dac8ba52 100644 --- a/components/core/tests/test-StreamingCompression.cpp +++ b/components/core/tests/test-StreamingCompression.cpp @@ -16,7 +16,6 @@ #include "../src/clp/streaming_compression/Compressor.hpp" #include "../src/clp/streaming_compression/Decompressor.hpp" #include "../src/clp/streaming_compression/lzma/Compressor.hpp" -#include "../src/clp/streaming_compression/lzma/Decompressor.hpp" #include "../src/clp/streaming_compression/passthrough/Compressor.hpp" #include "../src/clp/streaming_compression/passthrough/Decompressor.hpp" #include "../src/clp/streaming_compression/zstd/Compressor.hpp" @@ -59,7 +58,6 @@ TEST_CASE("StreamingCompression", "[StreamingCompression]") { SECTION("LZMA compression") { compressor = std::make_unique(); - decompressor = std::make_unique(); } // Initialize buffers @@ -81,6 +79,11 @@ TEST_CASE("StreamingCompression", "[StreamingCompression]") { file_writer.close(); // Decompress and compare + if (nullptr == decompressor) { + boost::filesystem::remove(compressed_file_path); + return; + } + clp::ReadOnlyMemoryMappedFile const memory_mapped_compressed_file{compressed_file_path}; auto const compressed_file_view{memory_mapped_compressed_file.get_view()}; decompressor->open(compressed_file_view.data(), compressed_file_view.size()); From 26b06638740d15c5657de301138d46977da25203 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Sat, 30 Nov 2024 02:11:46 -0500 Subject: [PATCH 09/35] Address review concerns --- .../streaming_compression/lzma/Compressor.cpp | 99 +++++++++---------- .../streaming_compression/lzma/Compressor.hpp | 18 +--- 2 files changed, 50 insertions(+), 67 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index c7b46cd6c..6092207d6 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -15,17 +15,15 @@ #include "Constants.hpp" namespace clp::streaming_compression::lzma { -using clp::size_checked_pointer_cast; - -auto Compressor::init_lzma_encoder(LzmaStream* strm, int compression_level, size_t dict_size) +auto Compressor::init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_size) -> void { - LzmaOptionsLzma options; + lzma_options_lzma options; if (0 != lzma_lzma_preset(&options, compression_level)) { SPDLOG_ERROR("Failed to initialize LZMA options' compression level."); throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } options.dict_size = dict_size; - std::array filters{{ + std::array filters{{ {.id = LZMA_FILTER_LZMA2, .options = &options}, {.id = LZMA_VLI_UNKNOWN, .options = nullptr}, }}; @@ -34,10 +32,10 @@ auto Compressor::init_lzma_encoder(LzmaStream* strm, int compression_level, size // to CRC64, which is the default in the xz command line tool. If // the .xz file needs to be decompressed with XZ Embedded, use // LZMA_CHECK_CRC32 instead. - auto const ret{lzma_stream_encoder(strm, filters.data(), LZMA_CHECK_CRC64)}; + auto const rc{lzma_stream_encoder(strm, filters.data(), LZMA_CHECK_CRC64)}; // Return successfully if the initialization went fine. - if (LZMA_OK == ret) { + if (LZMA_OK == rc) { return; } @@ -46,7 +44,7 @@ auto Compressor::init_lzma_encoder(LzmaStream* strm, int compression_level, size // package or e.g. /usr/include/lzma/container.h depending on the // install prefix). char const* msg{nullptr}; - switch (ret) { + switch (rc) { case LZMA_MEM_ERROR: msg = "Memory allocation failed"; break; @@ -60,17 +58,12 @@ auto Compressor::init_lzma_encoder(LzmaStream* strm, int compression_level, size break; default: - // This is most likely LZMA_PROG_ERROR indicating a bug in - // this program or in liblzma. It is inconvenient to have a - // separate error message for errors that should be impossible - // to occur, but knowing the error code is important for - // debugging. That's why it is good to print the error code - // at least when there is no good error message to show. - msg = "Unknown error, possibly a bug"; + // This is most likely LZMA_PROG_ERROR indicating a bug in liblzma + msg = "Unknown error"; break; } - SPDLOG_ERROR("Error initializing the encoder: {} (error code {})", msg, static_cast(ret)); + SPDLOG_ERROR("Error initializing the encoder: {} (error code {})", msg, static_cast(rc)); throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } @@ -85,9 +78,12 @@ auto Compressor::open(FileWriter& file_writer, int compression_level) -> void { m_compression_stream = LZMA_STREAM_INIT; init_lzma_encoder(&m_compression_stream, compression_level, m_dict_size); - // Setup compressed stream parameters + + // No input upon initialization m_compression_stream.next_in = nullptr; m_compression_stream.avail_in = 0; + + // Attach output buffer to LZMA stream m_compression_stream.next_out = m_compressed_stream_block_buffer.data(); m_compression_stream.avail_out = m_compressed_stream_block_buffer.size(); @@ -101,7 +97,13 @@ auto Compressor::close() -> void { throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); } - flush_and_close_compression_stream(); + run_lzma(LZMA_FINISH); + lzma_end(&m_compression_stream); + + // Detach output buffer from LZMA stream + m_compression_stream.next_out = nullptr; + m_compression_stream.avail_out = 0; + m_compressed_stream_file_writer = nullptr; } @@ -119,27 +121,22 @@ auto Compressor::write(char const* data, size_t data_length) -> void { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } - m_compression_stream.next_in = size_checked_pointer_cast(data); + // Attach input data to LZMA stream + m_compression_stream.next_in = clp::size_checked_pointer_cast(data); m_compression_stream.avail_in = data_length; - // Normal compression encoding workflow. Continue until the input buffer is - // exhausted. - compress(LZMA_RUN); - - m_compression_stream.next_in = nullptr; + run_lzma(LZMA_RUN); - m_compression_stream_contains_data = true; m_uncompressed_stream_pos += data_length; } auto Compressor::flush() -> void { - if (false == m_compression_stream_contains_data) { + if (m_compression_stream_is_flushed) { return; } // Forces all the buffered data to be available at output - compress(LZMA_SYNC_FLUSH); - m_compression_stream_contains_data = false; + run_lzma(LZMA_SYNC_FLUSH); } auto Compressor::try_get_pos(size_t& pos) const -> ErrorCode { @@ -151,43 +148,39 @@ auto Compressor::try_get_pos(size_t& pos) const -> ErrorCode { return ErrorCode_Success; } -auto Compressor::flush_and_close_compression_stream() -> void { - if (nullptr == m_compressed_stream_file_writer) { - throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); - } - - // Same as flush but all the input data must have been given to the encoder - compress(LZMA_FINISH); - - m_compression_stream_contains_data = false; - - lzma_end(&m_compression_stream); - m_compression_stream.avail_out = 0; - m_compression_stream.next_out = nullptr; -} - -auto Compressor::compress(LzmaAction action) -> void { - bool hit_stream_end{false}; +auto Compressor::run_lzma(lzma_action action) -> void { + m_compression_stream_is_flushed = false; + bool end_of_stream{false}; while (true) { + if (0 == m_compression_stream.avail_in) { // No more input data + if (LZMA_RUN == action) { + // All input data have been processed, so we can safely detach + // input data from LZMA stream. + m_compression_stream.next_in = nullptr; + break; + } + } else { + if (LZMA_FINISH == action) { + SPDLOG_ERROR("Tried to close LZMA compressor with unprocessed input data."); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } + } + auto const rc = lzma_code(&m_compression_stream, action); switch (rc) { case LZMA_OK: case LZMA_BUF_ERROR: break; case LZMA_STREAM_END: - hit_stream_end = true; + end_of_stream = true; break; default: SPDLOG_ERROR("lzma() returned an unexpected value - {}.", static_cast(rc)); throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } - if (LZMA_RUN == action && 0 == m_compression_stream.avail_in) { - // No more data to compress - break; - } - - if (hit_stream_end) { + if (end_of_stream) { + m_compression_stream_is_flushed = true; break; } @@ -205,7 +198,7 @@ auto Compressor::compress(LzmaAction action) -> void { auto Compressor::pipe_data() -> void { m_compressed_stream_file_writer->write( - size_checked_pointer_cast(m_compressed_stream_block_buffer.data()), + clp::size_checked_pointer_cast(m_compressed_stream_block_buffer.data()), cCompressedStreamBlockBufferSize - m_compression_stream.avail_out ); m_compression_stream.next_out = m_compressed_stream_block_buffer.data(); diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index d10810e88..5b1adb404 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -86,11 +86,6 @@ class Compressor : public ::clp::streaming_compression::Compressor { auto open(FileWriter& file_writer, int compression_level) -> void; private: - using LzmaAction = lzma_action; - using LzmaFilter = lzma_filter; - using LzmaOptionsLzma = lzma_options_lzma; - using LzmaStream = lzma_stream; - /** * Initialize the Lzma compression stream * @param strm A pre-allocated `lzma_stream` object @@ -99,14 +94,9 @@ class Compressor : public ::clp::streaming_compression::Compressor { * recently processed uncompressed data is kept in memory */ static auto - init_lzma_encoder(LzmaStream* strm, int compression_level, size_t dict_size) -> void; + init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_size) -> void; static constexpr size_t cCompressedStreamBlockBufferSize{4096}; // 4KiB - /** - * Flushes the stream and closes it - */ - auto flush_and_close_compression_stream() -> void; - /** * Repeatedly invoke lzma_code() compression workflow until LZMA_STREAM_END * is reached. @@ -115,7 +105,7 @@ class Compressor : public ::clp::streaming_compression::Compressor { * * @param action */ - auto compress(lzma_action action) -> void; + auto run_lzma(lzma_action action) -> void; /** * Pipes the current compressed data in the lzma buffer to the output file @@ -127,8 +117,8 @@ class Compressor : public ::clp::streaming_compression::Compressor { FileWriter* m_compressed_stream_file_writer{nullptr}; // Compressed stream variables - LzmaStream m_compression_stream; - bool m_compression_stream_contains_data{false}; + lzma_stream m_compression_stream; + bool m_compression_stream_is_flushed{true}; size_t m_dict_size{cDefaultDictionarySize}; Array m_compressed_stream_block_buffer{cCompressedStreamBlockBufferSize}; From 740bc1c1216f999c881dffd49564eeabcf1d4bbd Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Mon, 2 Dec 2024 10:20:12 -0500 Subject: [PATCH 10/35] Address review concern --- .../streaming_compression/lzma/Compressor.cpp | 26 +++++++++++++------ .../streaming_compression/lzma/Compressor.hpp | 15 +++-------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 6092207d6..a1d5dfaa2 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -14,13 +14,21 @@ #include "../../type_utils.hpp" #include "Constants.hpp" -namespace clp::streaming_compression::lzma { -auto Compressor::init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_size) - -> void { +namespace { +using clp::streaming_compression::lzma::Compressor; + +/** + * Initialize the Lzma compression stream + * @param strm A pre-allocated `lzma_stream` object + * @param compression_level + * @param dict_size Dictionary size that indicates how many bytes of the + * recently processed uncompressed data is kept in memory + */ +auto init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_size) -> void { lzma_options_lzma options; if (0 != lzma_lzma_preset(&options, compression_level)) { SPDLOG_ERROR("Failed to initialize LZMA options' compression level."); - throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); + throw Compressor::OperationFailed(clp::ErrorCode_BadParam, __FILENAME__, __LINE__); } options.dict_size = dict_size; std::array filters{{ @@ -64,9 +72,11 @@ auto Compressor::init_lzma_encoder(lzma_stream* strm, int compression_level, siz } SPDLOG_ERROR("Error initializing the encoder: {} (error code {})", msg, static_cast(rc)); - throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); + throw Compressor::OperationFailed(clp::ErrorCode_BadParam, __FILENAME__, __LINE__); } +} // namespace +namespace clp::streaming_compression::lzma { auto Compressor::open(FileWriter& file_writer, int compression_level) -> void { if (nullptr != m_compressed_stream_file_writer) { throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); @@ -186,17 +196,17 @@ auto Compressor::run_lzma(lzma_action action) -> void { // Write output buffer to file if it's full if (0 == m_compression_stream.avail_out) { - pipe_data(); + flush_stream_output_block_buffer(); } } // Write remaining compressed data if (m_compression_stream.avail_out < cCompressedStreamBlockBufferSize) { - pipe_data(); + flush_stream_output_block_buffer(); } } -auto Compressor::pipe_data() -> void { +auto Compressor::flush_stream_output_block_buffer() -> void { m_compressed_stream_file_writer->write( clp::size_checked_pointer_cast(m_compressed_stream_block_buffer.data()), cCompressedStreamBlockBufferSize - m_compression_stream.avail_out diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index 5b1adb404..4afdce36a 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -86,15 +86,6 @@ class Compressor : public ::clp::streaming_compression::Compressor { auto open(FileWriter& file_writer, int compression_level) -> void; private: - /** - * Initialize the Lzma compression stream - * @param strm A pre-allocated `lzma_stream` object - * @param compression_level - * @param dict_size Dictionary size that indicates how many bytes of the - * recently processed uncompressed data is kept in memory - */ - static auto - init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_size) -> void; static constexpr size_t cCompressedStreamBlockBufferSize{4096}; // 4KiB /** @@ -108,10 +99,10 @@ class Compressor : public ::clp::streaming_compression::Compressor { auto run_lzma(lzma_action action) -> void; /** - * Pipes the current compressed data in the lzma buffer to the output file - * and reset the compression buffer to receive new data. + * Flushes the current compressed data in the lzma output buffer to the + * output file handler. Reset the compression buffer to receive new data. */ - auto pipe_data() -> void; + auto flush_stream_output_block_buffer() -> void; // Variables FileWriter* m_compressed_stream_file_writer{nullptr}; From e2be8833595b3281cdeaaccfcd1255849ce33b29 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Mon, 2 Dec 2024 10:22:18 -0500 Subject: [PATCH 11/35] Simplify else-if --- .../core/src/clp/streaming_compression/lzma/Compressor.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index a1d5dfaa2..c40ca7652 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -169,8 +169,7 @@ auto Compressor::run_lzma(lzma_action action) -> void { m_compression_stream.next_in = nullptr; break; } - } else { - if (LZMA_FINISH == action) { + } else if (LZMA_FINISH == action) { SPDLOG_ERROR("Tried to close LZMA compressor with unprocessed input data."); throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } From 905367d6e4e08174fb30b7da67d00e5455ad14de Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Mon, 2 Dec 2024 10:23:19 -0500 Subject: [PATCH 12/35] Fix else-if --- .../core/src/clp/streaming_compression/lzma/Compressor.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index c40ca7652..610c7cc17 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -170,9 +170,8 @@ auto Compressor::run_lzma(lzma_action action) -> void { break; } } else if (LZMA_FINISH == action) { - SPDLOG_ERROR("Tried to close LZMA compressor with unprocessed input data."); - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - } + SPDLOG_ERROR("Tried to close LZMA compressor with unprocessed input data."); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } auto const rc = lzma_code(&m_compression_stream, action); From 8ae88b2a86f880a02133ac9ee3cb3a1ed5921a9d Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Mon, 2 Dec 2024 10:44:30 -0500 Subject: [PATCH 13/35] Add lzma (xz) dep to MacOS --- components/core/tools/scripts/lib_install/macos/install-all.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/components/core/tools/scripts/lib_install/macos/install-all.sh b/components/core/tools/scripts/lib_install/macos/install-all.sh index 97e41903d..cb24dd054 100755 --- a/components/core/tools/scripts/lib_install/macos/install-all.sh +++ b/components/core/tools/scripts/lib_install/macos/install-all.sh @@ -21,6 +21,7 @@ brew install \ mongo-cxx-driver \ msgpack-cxx \ spdlog \ + xz \ zstd # Install pkg-config if it isn't already installed From 0d0c20eaf35271572b068fe03887695d0f62f69d Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Mon, 2 Dec 2024 12:24:45 -0500 Subject: [PATCH 14/35] Refactor helper run_lzma() --- .../streaming_compression/lzma/Compressor.cpp | 79 +++++++++---------- .../streaming_compression/lzma/Compressor.hpp | 2 + 2 files changed, 38 insertions(+), 43 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 610c7cc17..11260c6e9 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -107,7 +107,14 @@ auto Compressor::close() -> void { throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); } - run_lzma(LZMA_FINISH); + if (m_compression_stream.avail_in > 0) { + SPDLOG_ERROR("Tried to close LZMA compressor with unprocessed input data."); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } + + while (false == m_compression_stream_is_flushed) { + run_lzma(LZMA_FINISH); + } lzma_end(&m_compression_stream); // Detach output buffer from LZMA stream @@ -134,19 +141,22 @@ auto Compressor::write(char const* data, size_t data_length) -> void { // Attach input data to LZMA stream m_compression_stream.next_in = clp::size_checked_pointer_cast(data); m_compression_stream.avail_in = data_length; + m_compression_stream_is_flushed = false; - run_lzma(LZMA_RUN); + while (m_compression_stream.avail_in > 0) { + run_lzma(LZMA_RUN); + } + + // All input data have been encoded so detach input data + m_compression_stream.next_in = nullptr; m_uncompressed_stream_pos += data_length; } auto Compressor::flush() -> void { - if (m_compression_stream_is_flushed) { - return; + while (false == m_compression_stream_is_flushed) { + run_lzma(LZMA_SYNC_FLUSH); } - - // Forces all the buffered data to be available at output - run_lzma(LZMA_SYNC_FLUSH); } auto Compressor::try_get_pos(size_t& pos) const -> ErrorCode { @@ -159,52 +169,35 @@ auto Compressor::try_get_pos(size_t& pos) const -> ErrorCode { } auto Compressor::run_lzma(lzma_action action) -> void { - m_compression_stream_is_flushed = false; - bool end_of_stream{false}; - while (true) { - if (0 == m_compression_stream.avail_in) { // No more input data - if (LZMA_RUN == action) { - // All input data have been processed, so we can safely detach - // input data from LZMA stream. - m_compression_stream.next_in = nullptr; - break; - } - } else if (LZMA_FINISH == action) { - SPDLOG_ERROR("Tried to close LZMA compressor with unprocessed input data."); - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - } - - auto const rc = lzma_code(&m_compression_stream, action); - switch (rc) { - case LZMA_OK: - case LZMA_BUF_ERROR: - break; - case LZMA_STREAM_END: - end_of_stream = true; - break; - default: - SPDLOG_ERROR("lzma() returned an unexpected value - {}.", static_cast(rc)); + auto const rc = lzma_code(&m_compression_stream, action); + switch (rc) { + case LZMA_OK: + break; + case LZMA_BUF_ERROR: // No encoding progress can be made + if (m_compression_stream.avail_in > 0) { + SPDLOG_ERROR("LZMA compressor input stream is corrupt."); throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - } - - if (end_of_stream) { + } + break; + case LZMA_STREAM_END: m_compression_stream_is_flushed = true; break; - } - - // Write output buffer to file if it's full - if (0 == m_compression_stream.avail_out) { - flush_stream_output_block_buffer(); - } + default: + SPDLOG_ERROR("lzma() returned an unexpected value - {}.", static_cast(rc)); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } - // Write remaining compressed data - if (m_compression_stream.avail_out < cCompressedStreamBlockBufferSize) { + // Write output buffer to file if it's full or flushed + if (0 == m_compression_stream.avail_out || m_compression_stream_is_flushed) { flush_stream_output_block_buffer(); } } auto Compressor::flush_stream_output_block_buffer() -> void { + if (cCompressedStreamBlockBufferSize == m_compression_stream.avail_out) { + // Nothing to flush + return; + } m_compressed_stream_file_writer->write( clp::size_checked_pointer_cast(m_compressed_stream_block_buffer.data()), cCompressedStreamBlockBufferSize - m_compression_stream.avail_out diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index 4afdce36a..1953001f2 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -53,6 +53,8 @@ class Compressor : public ::clp::streaming_compression::Compressor { /** * Writes any internally buffered data to file and ends the current frame + * + * Forces all the encoded data buffered by LZMA to be available at output */ auto flush() -> void override; From 559485d18c64eb32e7a72169ff303b3baae07d52 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Mon, 2 Dec 2024 12:36:07 -0500 Subject: [PATCH 15/35] Update function doc --- .../core/src/clp/streaming_compression/lzma/Compressor.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index 1953001f2..3eb062223 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -91,9 +91,10 @@ class Compressor : public ::clp::streaming_compression::Compressor { static constexpr size_t cCompressedStreamBlockBufferSize{4096}; // 4KiB /** - * Repeatedly invoke lzma_code() compression workflow until LZMA_STREAM_END - * is reached. - * The workflow action needs to be kept the same throughout this process. + * Invoke lzma_code() encoding workflow for one time with the given action. + * + * Once flushing starts, the workflow action needs to stay the same until + * flushing is complete (aka LZMA_STREAM_END is reached). * See also: https://github.com/tukaani-project/xz/blob/master/src/liblzma/api/lzma/base.h#L274 * * @param action From 7c69c6919f6fd41a83d0f5e5865bb565014e9723 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Mon, 2 Dec 2024 12:48:02 -0500 Subject: [PATCH 16/35] Clarify unit test early termination --- components/core/tests/test-StreamingCompression.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/components/core/tests/test-StreamingCompression.cpp b/components/core/tests/test-StreamingCompression.cpp index 6dac8ba52..a47012ca3 100644 --- a/components/core/tests/test-StreamingCompression.cpp +++ b/components/core/tests/test-StreamingCompression.cpp @@ -4,8 +4,10 @@ #include #include #include +#include #include +#include #include #include @@ -78,12 +80,16 @@ TEST_CASE("StreamingCompression", "[StreamingCompression]") { compressor->close(); file_writer.close(); - // Decompress and compare - if (nullptr == decompressor) { + if (boost::dynamic_pointer_cast( + std::move(compressor) + )) + { + // TODO: remove this LZMA testing early termination boost::filesystem::remove(compressed_file_path); return; } + // Decompress and compare clp::ReadOnlyMemoryMappedFile const memory_mapped_compressed_file{compressed_file_path}; auto const compressed_file_view{memory_mapped_compressed_file.get_view()}; decompressor->open(compressed_file_view.data(), compressed_file_view.size()); From a6d68b8f66fa2e9978cad8486d2c6b9220b78c10 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Mon, 2 Dec 2024 12:52:29 -0500 Subject: [PATCH 17/35] Update components/core/tests/test-StreamingCompression.cpp Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- components/core/tests/test-StreamingCompression.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/components/core/tests/test-StreamingCompression.cpp b/components/core/tests/test-StreamingCompression.cpp index a47012ca3..a7f2ee78c 100644 --- a/components/core/tests/test-StreamingCompression.cpp +++ b/components/core/tests/test-StreamingCompression.cpp @@ -58,7 +58,9 @@ TEST_CASE("StreamingCompression", "[StreamingCompression]") { decompressor = std::make_unique(); } - SECTION("LZMA compression") { + SECTION("LZMA compression (compression-only test)") { + // Note: Decompressor initialization is intentionally omitted as this is a + // compression-only test. See early termination logic below. compressor = std::make_unique(); } From 1519c21c7d88d4860c00e243abae6ca8443d5fa1 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 3 Dec 2024 02:34:23 -0500 Subject: [PATCH 18/35] Split LZMA_RUN from flush actions --- .../streaming_compression/lzma/Compressor.cpp | 86 ++++++++++++++----- .../streaming_compression/lzma/Compressor.hpp | 18 ++-- 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 11260c6e9..e6e95e7c8 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -17,6 +17,11 @@ namespace { using clp::streaming_compression::lzma::Compressor; +auto is_flush_action(lzma_action action) { + return LZMA_SYNC_FLUSH == action || LZMA_FULL_FLUSH == action || LZMA_FULL_BARRIER == action + || LZMA_FINISH == action; +} + /** * Initialize the Lzma compression stream * @param strm A pre-allocated `lzma_stream` object @@ -42,7 +47,6 @@ auto init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_siz // LZMA_CHECK_CRC32 instead. auto const rc{lzma_stream_encoder(strm, filters.data(), LZMA_CHECK_CRC64)}; - // Return successfully if the initialization went fine. if (LZMA_OK == rc) { return; } @@ -112,9 +116,7 @@ auto Compressor::close() -> void { throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } - while (false == m_compression_stream_is_flushed) { - run_lzma(LZMA_FINISH); - } + flush_lzma(LZMA_FINISH); lzma_end(&m_compression_stream); // Detach output buffer from LZMA stream @@ -141,10 +143,9 @@ auto Compressor::write(char const* data, size_t data_length) -> void { // Attach input data to LZMA stream m_compression_stream.next_in = clp::size_checked_pointer_cast(data); m_compression_stream.avail_in = data_length; - m_compression_stream_is_flushed = false; while (m_compression_stream.avail_in > 0) { - run_lzma(LZMA_RUN); + encode_lzma_once(); } // All input data have been encoded so detach input data @@ -154,9 +155,7 @@ auto Compressor::write(char const* data, size_t data_length) -> void { } auto Compressor::flush() -> void { - while (false == m_compression_stream_is_flushed) { - run_lzma(LZMA_SYNC_FLUSH); - } + flush_lzma(LZMA_SYNC_FLUSH); } auto Compressor::try_get_pos(size_t& pos) const -> ErrorCode { @@ -168,29 +167,70 @@ auto Compressor::try_get_pos(size_t& pos) const -> ErrorCode { return ErrorCode_Success; } -auto Compressor::run_lzma(lzma_action action) -> void { - auto const rc = lzma_code(&m_compression_stream, action); +auto Compressor::encode_lzma_once() -> void { + if (0 == m_compression_stream.avail_in) { + return; + } + + if (0 == m_compression_stream.avail_out) { + flush_stream_output_block_buffer(); + } + + auto const rc = lzma_code(&m_compression_stream, LZMA_RUN); switch (rc) { case LZMA_OK: break; case LZMA_BUF_ERROR: // No encoding progress can be made - if (m_compression_stream.avail_in > 0) { - SPDLOG_ERROR("LZMA compressor input stream is corrupt."); - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - } - break; - case LZMA_STREAM_END: - m_compression_stream_is_flushed = true; - break; + SPDLOG_ERROR("LZMA compressor input stream is corrupt."); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); default: - SPDLOG_ERROR("lzma() returned an unexpected value - {}.", static_cast(rc)); + SPDLOG_ERROR("lzma_code() returned an unexpected value - {}.", static_cast(rc)); throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } +} - // Write output buffer to file if it's full or flushed - if (0 == m_compression_stream.avail_out || m_compression_stream_is_flushed) { - flush_stream_output_block_buffer(); +auto Compressor::flush_lzma(lzma_action flush_action) -> void { + if (false == is_flush_action(flush_action)) { + SPDLOG_ERROR( + "lzma_code() supplied with invalid flush action - {}.", + static_cast(flush_action) + ); + throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); + } + + bool flushed{false}; + while (false == flushed) { + auto const rc = lzma_code(&m_compression_stream, flush_action); + switch (rc) { + case LZMA_OK: + break; + case LZMA_STREAM_END: + // NOTE: this might not be true when multithreaded encoder is + // used with LZMA_FULL_BARRIER. For now, we skip this check. + flushed = true; + break; + case LZMA_BUF_ERROR: // No encoding progress can be made + // NOTE: this can happen if we are using LZMA_FULL_FLUSH or + // LZMA_FULL_BARRIER. These two actions keeps encoding input + // data alongside flushing already encoded but buffered data. + SPDLOG_ERROR("LZMA compressor input stream is corrupt."); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + default: + SPDLOG_ERROR( + "lzma_code() returned an unexpected value - {}.", + static_cast(rc) + ); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } + + // Write output buffer to file if it's full + if (0 == m_compression_stream.avail_out) { + flush_stream_output_block_buffer(); + } } + + // Write the last chunk of output + flush_stream_output_block_buffer(); } auto Compressor::flush_stream_output_block_buffer() -> void { diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index 3eb062223..045345829 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -91,15 +91,24 @@ class Compressor : public ::clp::streaming_compression::Compressor { static constexpr size_t cCompressedStreamBlockBufferSize{4096}; // 4KiB /** - * Invoke lzma_code() encoding workflow for one time with the given action. + * Invoke lzma_code() encoding workflow once with LZMA_RUN + * + * The encoded data may be buffered and thus not immediately available at + * the output block. + */ + auto encode_lzma_once() -> void; + + /** + * Invoke lzma_code() repeatedly with the given flushing action until all + * encoded data is made available at the output block * * Once flushing starts, the workflow action needs to stay the same until - * flushing is complete (aka LZMA_STREAM_END is reached). + * flushing is signaled completed by LZMA (aka LZMA_STREAM_END is reached). * See also: https://github.com/tukaani-project/xz/blob/master/src/liblzma/api/lzma/base.h#L274 * - * @param action + * @param flush_action */ - auto run_lzma(lzma_action action) -> void; + auto flush_lzma(lzma_action flush_action) -> void; /** * Flushes the current compressed data in the lzma output buffer to the @@ -112,7 +121,6 @@ class Compressor : public ::clp::streaming_compression::Compressor { // Compressed stream variables lzma_stream m_compression_stream; - bool m_compression_stream_is_flushed{true}; size_t m_dict_size{cDefaultDictionarySize}; Array m_compressed_stream_block_buffer{cCompressedStreamBlockBufferSize}; From 655bb46dcf853e41bf790444d550506c66ff6163 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 3 Dec 2024 03:26:10 -0500 Subject: [PATCH 19/35] Refactor unit test --- .../core/tests/test-StreamingCompression.cpp | 122 +++++++++--------- 1 file changed, 64 insertions(+), 58 deletions(-) diff --git a/components/core/tests/test-StreamingCompression.cpp b/components/core/tests/test-StreamingCompression.cpp index a47012ca3..a52a42ef7 100644 --- a/components/core/tests/test-StreamingCompression.cpp +++ b/components/core/tests/test-StreamingCompression.cpp @@ -4,10 +4,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -28,69 +28,39 @@ using clp::ErrorCode_Success; using clp::FileWriter; using clp::streaming_compression::Compressor; using clp::streaming_compression::Decompressor; - -TEST_CASE("StreamingCompression", "[StreamingCompression]") { - // Initialize constants - constexpr size_t cBufferSize{128L * 1024 * 1024}; // 128MB - constexpr auto cCompressionChunkSizes = std::to_array( - {cBufferSize / 100, - cBufferSize / 50, - cBufferSize / 25, - cBufferSize / 10, - cBufferSize / 5, - cBufferSize / 2, - cBufferSize} - ); - constexpr size_t cAlphabetLength{26}; - std::string const compressed_file_path{"test_streaming_compressed_file.bin"}; - - // Initialize compression devices - std::unique_ptr compressor; - std::unique_ptr decompressor; - - SECTION("ZStd single phase compression") { - compressor = std::make_unique(); - decompressor = std::make_unique(); - } - - SECTION("Passthrough compression") { - compressor = std::make_unique(); - decompressor = std::make_unique(); - } - - SECTION("LZMA compression") { - compressor = std::make_unique(); - } - - // Initialize buffers - Array uncompressed_buffer{cBufferSize}; - for (size_t i{0}; i < cBufferSize; ++i) { - uncompressed_buffer.at(i) = static_cast(('a' + (i % cAlphabetLength))); - } - - Array decompressed_buffer{cBufferSize}; - - // Compress +using std::string; +using std::string_view; + +namespace { +constexpr string_view cCompressedFilePath{"test_streaming_compressed_file.bin"}; +constexpr size_t cBufferSize{128L * 1024 * 1024}; // 128MB +constexpr auto cCompressionChunkSizes = std::to_array( + {cBufferSize / 100, + cBufferSize / 50, + cBufferSize / 25, + cBufferSize / 10, + cBufferSize / 5, + cBufferSize / 2, + cBufferSize} +); + +auto compress(std::unique_ptr compressor, char const* const src) -> void { FileWriter file_writer; - file_writer.open(compressed_file_path, FileWriter::OpenMode::CREATE_FOR_WRITING); + file_writer.open(string(cCompressedFilePath), FileWriter::OpenMode::CREATE_FOR_WRITING); compressor->open(file_writer); for (auto const chunk_size : cCompressionChunkSizes) { - compressor->write(uncompressed_buffer.data(), chunk_size); + compressor->write(src, chunk_size); } compressor->close(); file_writer.close(); +} - if (boost::dynamic_pointer_cast( - std::move(compressor) - )) - { - // TODO: remove this LZMA testing early termination - boost::filesystem::remove(compressed_file_path); - return; - } - - // Decompress and compare - clp::ReadOnlyMemoryMappedFile const memory_mapped_compressed_file{compressed_file_path}; +auto decompress_and_compare( + std::unique_ptr decompressor, + Array const& uncompressed_buffer, + Array& decompressed_buffer +) -> void { + clp::ReadOnlyMemoryMappedFile const memory_mapped_compressed_file{string(cCompressedFilePath)}; auto const compressed_file_view{memory_mapped_compressed_file.get_view()}; decompressor->open(compressed_file_view.data(), compressed_file_view.size()); @@ -123,7 +93,43 @@ TEST_CASE("StreamingCompression", "[StreamingCompression]") { ) == num_uncompressed_bytes) ); +} +} // namespace + +TEST_CASE("StreamingCompression", "[StreamingCompression]") { + // Initialize constants + constexpr size_t cAlphabetLength{26}; + + // Initialize compression devices + std::unique_ptr compressor; + std::unique_ptr decompressor; + + // Initialize buffers + Array decompressed_buffer{cBufferSize}; + Array uncompressed_buffer{cBufferSize}; + for (size_t i{0}; i < cBufferSize; ++i) { + uncompressed_buffer.at(i) = static_cast(('a' + (i % cAlphabetLength))); + } + + SECTION("ZStd single phase compression") { + compressor = std::make_unique(); + compress(std::move(compressor), uncompressed_buffer.data()); + decompressor = std::make_unique(); + decompress_and_compare(std::move(decompressor), uncompressed_buffer, decompressed_buffer); + } + + SECTION("Passthrough compression") { + compressor = std::make_unique(); + compress(std::move(compressor), uncompressed_buffer.data()); + decompressor = std::make_unique(); + decompress_and_compare(std::move(decompressor), uncompressed_buffer, decompressed_buffer); + } + + SECTION("LZMA compression") { + compressor = std::make_unique(); + compress(std::move(compressor), uncompressed_buffer.data()); + } // Cleanup - boost::filesystem::remove(compressed_file_path); + boost::filesystem::remove(string(cCompressedFilePath)); } From 4fb6c0147a054fdf7970c10ccf64d2435eb13bce Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 3 Dec 2024 03:29:06 -0500 Subject: [PATCH 20/35] Update components/core/src/clp/streaming_compression/lzma/Compressor.cpp Co-authored-by: haiqi96 <14502009+haiqi96@users.noreply.github.com> --- .../core/src/clp/streaming_compression/lzma/Compressor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 11260c6e9..11bfdc5b5 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -21,8 +21,8 @@ using clp::streaming_compression::lzma::Compressor; * Initialize the Lzma compression stream * @param strm A pre-allocated `lzma_stream` object * @param compression_level - * @param dict_size Dictionary size that indicates how many bytes of the - * recently processed uncompressed data is kept in memory + * @param dict_size Dictionary size that specifies how many bytes of the + * recently processed uncompressed data to keep in the memory */ auto init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_size) -> void { lzma_options_lzma options; From 2b85f01d7d6934a19df1203baa2beba94fc395f6 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 3 Dec 2024 03:35:42 -0500 Subject: [PATCH 21/35] Fix import --- components/core/tests/test-StreamingCompression.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/components/core/tests/test-StreamingCompression.cpp b/components/core/tests/test-StreamingCompression.cpp index 9d28a5ec3..2b2dfe85f 100644 --- a/components/core/tests/test-StreamingCompression.cpp +++ b/components/core/tests/test-StreamingCompression.cpp @@ -26,7 +26,6 @@ using clp::Array; using clp::ErrorCode_Success; using clp::FileWriter; using clp::streaming_compression::Compressor; -using clp::streaming_compression::Decompressor; using std::string; using std::string_view; From eda7d6c97a4da5884a5439adfc582e0dab1aabe8 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 4 Dec 2024 10:19:35 -0500 Subject: [PATCH 22/35] Apply suggestions from code review Co-authored-by: haiqi96 <14502009+haiqi96@users.noreply.github.com> --- .../core/src/clp/streaming_compression/lzma/Compressor.cpp | 4 ++-- .../core/src/clp/streaming_compression/lzma/Compressor.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 071ad77b8..f5c0fedd4 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -17,13 +17,13 @@ namespace { using clp::streaming_compression::lzma::Compressor; -auto is_flush_action(lzma_action action) { +auto is_flush_action(lzma_action action) -> bool { return LZMA_SYNC_FLUSH == action || LZMA_FULL_FLUSH == action || LZMA_FULL_BARRIER == action || LZMA_FINISH == action; } /** - * Initialize the Lzma compression stream + * Initialize the LZMA compression stream * @param strm A pre-allocated `lzma_stream` object * @param compression_level * @param dict_size Dictionary size that specifies how many bytes of the diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index 045345829..593c26835 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -100,7 +100,7 @@ class Compressor : public ::clp::streaming_compression::Compressor { /** * Invoke lzma_code() repeatedly with the given flushing action until all - * encoded data is made available at the output block + * encoded data is made available at the output block buffer * * Once flushing starts, the workflow action needs to stay the same until * flushing is signaled completed by LZMA (aka LZMA_STREAM_END is reached). @@ -111,7 +111,7 @@ class Compressor : public ::clp::streaming_compression::Compressor { auto flush_lzma(lzma_action flush_action) -> void; /** - * Flushes the current compressed data in the lzma output buffer to the + * Flushes the current compressed data in the LZMA output buffer to the * output file handler. Reset the compression buffer to receive new data. */ auto flush_stream_output_block_buffer() -> void; From 4164a9d43731dcae330d87338e19882bbc437e62 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 4 Dec 2024 11:01:41 -0500 Subject: [PATCH 23/35] Address review concern --- .../streaming_compression/lzma/Compressor.cpp | 59 +++++++++++-------- .../streaming_compression/lzma/Compressor.hpp | 18 +++--- .../core/tests/test-StreamingCompression.cpp | 2 + 3 files changed, 46 insertions(+), 33 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index f5c0fedd4..50a813ea4 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -144,12 +144,11 @@ auto Compressor::write(char const* data, size_t data_length) -> void { m_compression_stream.next_in = clp::size_checked_pointer_cast(data); m_compression_stream.avail_in = data_length; - while (m_compression_stream.avail_in > 0) { - encode_lzma_once(); - } + encode_lzma(); // All input data have been encoded so detach input data m_compression_stream.next_in = nullptr; + m_compression_stream.avail_in = 0; m_uncompressed_stream_pos += data_length; } @@ -167,26 +166,31 @@ auto Compressor::try_get_pos(size_t& pos) const -> ErrorCode { return ErrorCode_Success; } -auto Compressor::encode_lzma_once() -> void { - if (0 == m_compression_stream.avail_in) { - return; - } +auto Compressor::encode_lzma() -> void { + while (m_compression_stream.avail_in > 0) { + // Write output buffer to file if it's full + if (0 == m_compression_stream.avail_out) { + flush_stream_output_block_buffer(); + } - if (0 == m_compression_stream.avail_out) { - flush_stream_output_block_buffer(); + auto const rc = lzma_code(&m_compression_stream, LZMA_RUN); + switch (rc) { + case LZMA_OK: + break; + case LZMA_BUF_ERROR: // No encoding progress can be made + SPDLOG_ERROR("LZMA compressor input stream is corrupt."); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + default: + SPDLOG_ERROR( + "lzma_code() returned an unexpected value - {}.", + static_cast(rc) + ); + throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); + } } - auto const rc = lzma_code(&m_compression_stream, LZMA_RUN); - switch (rc) { - case LZMA_OK: - break; - case LZMA_BUF_ERROR: // No encoding progress can be made - SPDLOG_ERROR("LZMA compressor input stream is corrupt."); - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - default: - SPDLOG_ERROR("lzma_code() returned an unexpected value - {}.", static_cast(rc)); - throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); - } + // Write the last chunk of output + flush_stream_output_block_buffer(); } auto Compressor::flush_lzma(lzma_action flush_action) -> void { @@ -198,8 +202,18 @@ auto Compressor::flush_lzma(lzma_action flush_action) -> void { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } + /** + * Once flushing starts, the workflow action needs to stay the same until + * flushing is signaled completed by LZMA (aka LZMA_STREAM_END is reached). + * See also: https://github.com/tukaani-project/xz/blob/master/src/liblzma/api/lzma/base.h#L274 + */ bool flushed{false}; while (false == flushed) { + // Write output buffer to file if it's full + if (0 == m_compression_stream.avail_out) { + flush_stream_output_block_buffer(); + } + auto const rc = lzma_code(&m_compression_stream, flush_action); switch (rc) { case LZMA_OK: @@ -222,11 +236,6 @@ auto Compressor::flush_lzma(lzma_action flush_action) -> void { ); throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } - - // Write output buffer to file if it's full - if (0 == m_compression_stream.avail_out) { - flush_stream_output_block_buffer(); - } } // Write the last chunk of output diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index 593c26835..c8c12b9cb 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -91,22 +91,24 @@ class Compressor : public ::clp::streaming_compression::Compressor { static constexpr size_t cCompressedStreamBlockBufferSize{4096}; // 4KiB /** - * Invoke lzma_code() encoding workflow once with LZMA_RUN + * Invoke lzma_code() repeatedly with LZMA_RUN until the input is exhausted * - * The encoded data may be buffered and thus not immediately available at - * the output block. + * At the end of the workflow, the last bytes of encoded data may still be + * buffered and thus not immediately available at the output block buffer. + * + * Assumes input stream and output block buffer are both in valid states. + * @throw `OperationFailed` if LZMA returns an unexpected error value */ - auto encode_lzma_once() -> void; + auto encode_lzma() -> void; /** * Invoke lzma_code() repeatedly with the given flushing action until all * encoded data is made available at the output block buffer * - * Once flushing starts, the workflow action needs to stay the same until - * flushing is signaled completed by LZMA (aka LZMA_STREAM_END is reached). - * See also: https://github.com/tukaani-project/xz/blob/master/src/liblzma/api/lzma/base.h#L274 - * + * Assumes input stream and output block buffer are both in valid states. * @param flush_action + * @throw `OperationFailed` if the provided action is not an LZMA flush + * action, or if LZMA returns an unexpected error value */ auto flush_lzma(lzma_action flush_action) -> void; diff --git a/components/core/tests/test-StreamingCompression.cpp b/components/core/tests/test-StreamingCompression.cpp index 2b2dfe85f..a52a42ef7 100644 --- a/components/core/tests/test-StreamingCompression.cpp +++ b/components/core/tests/test-StreamingCompression.cpp @@ -16,6 +16,7 @@ #include "../src/clp/FileWriter.hpp" #include "../src/clp/ReadOnlyMemoryMappedFile.hpp" #include "../src/clp/streaming_compression/Compressor.hpp" +#include "../src/clp/streaming_compression/Decompressor.hpp" #include "../src/clp/streaming_compression/lzma/Compressor.hpp" #include "../src/clp/streaming_compression/passthrough/Compressor.hpp" #include "../src/clp/streaming_compression/passthrough/Decompressor.hpp" @@ -26,6 +27,7 @@ using clp::Array; using clp::ErrorCode_Success; using clp::FileWriter; using clp::streaming_compression::Compressor; +using clp::streaming_compression::Decompressor; using std::string; using std::string_view; From 8ab0653c8e555e3e1d62d9631c7077410d3f475b Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 4 Dec 2024 20:24:52 -0500 Subject: [PATCH 24/35] Add a comment --- .../core/src/clp/streaming_compression/lzma/Compressor.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 50a813ea4..65445061a 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -117,6 +117,8 @@ auto Compressor::close() -> void { } flush_lzma(LZMA_FINISH); + + // Deallocates LZMA stream's internal data structures lzma_end(&m_compression_stream); // Detach output buffer from LZMA stream From c436f214669b5895f64cf429be383cf48f3e0f6a Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Fri, 6 Dec 2024 00:44:25 -0500 Subject: [PATCH 25/35] Apply suggestions from code review Co-authored-by: haiqi96 <14502009+haiqi96@users.noreply.github.com> --- .../core/src/clp/streaming_compression/lzma/Compressor.cpp | 5 ++++- .../core/src/clp/streaming_compression/lzma/Compressor.hpp | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 65445061a..8d518249c 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -156,6 +156,9 @@ auto Compressor::write(char const* data, size_t data_length) -> void { } auto Compressor::flush() -> void { + if (nullptr == m_compressed_stream_file_writer) { + throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); + } flush_lzma(LZMA_SYNC_FLUSH); } @@ -228,7 +231,7 @@ auto Compressor::flush_lzma(lzma_action flush_action) -> void { case LZMA_BUF_ERROR: // No encoding progress can be made // NOTE: this can happen if we are using LZMA_FULL_FLUSH or // LZMA_FULL_BARRIER. These two actions keeps encoding input - // data alongside flushing already encoded but buffered data. + // data alongside flushing buffered encoded data. SPDLOG_ERROR("LZMA compressor input stream is corrupt."); throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); default: diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index c8c12b9cb..323464545 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -94,7 +94,7 @@ class Compressor : public ::clp::streaming_compression::Compressor { * Invoke lzma_code() repeatedly with LZMA_RUN until the input is exhausted * * At the end of the workflow, the last bytes of encoded data may still be - * buffered and thus not immediately available at the output block buffer. + * buffered in the LZMA stream and thus not immediately available at the output block buffer. * * Assumes input stream and output block buffer are both in valid states. * @throw `OperationFailed` if LZMA returns an unexpected error value @@ -113,8 +113,8 @@ class Compressor : public ::clp::streaming_compression::Compressor { auto flush_lzma(lzma_action flush_action) -> void; /** - * Flushes the current compressed data in the LZMA output buffer to the - * output file handler. Reset the compression buffer to receive new data. + * Flushes the current compressed data in the output block buffer to the + * output file handler. Reset the output block buffer to receive new data. */ auto flush_stream_output_block_buffer() -> void; From 7bd34d256797514c0de5e19570f8bdf8d02cc6b1 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Fri, 6 Dec 2024 01:06:36 -0500 Subject: [PATCH 26/35] Update comment to 100-char length --- .../streaming_compression/lzma/Compressor.cpp | 30 ++++++++----------- .../streaming_compression/lzma/Compressor.hpp | 12 ++++---- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 8d518249c..6c4a29206 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -41,20 +41,18 @@ auto init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_siz {.id = LZMA_VLI_UNKNOWN, .options = nullptr}, }}; - // Initialize the encoder using a preset. Set the integrity to check - // to CRC64, which is the default in the xz command line tool. If - // the .xz file needs to be decompressed with XZ Embedded, use - // LZMA_CHECK_CRC32 instead. + // Initialize the encoder using a preset. Set the integrity to check to CRC64, which is the + // default in the xz command line tool. If the .xz file needs to be decompressed with + // XZ-Embedded, use LZMA_CHECK_CRC32 instead. auto const rc{lzma_stream_encoder(strm, filters.data(), LZMA_CHECK_CRC64)}; if (LZMA_OK == rc) { return; } - // Something went wrong. The possible errors are documented in - // lzma/container.h (src/liblzma/api/lzma/container.h in the source - // package or e.g. /usr/include/lzma/container.h depending on the - // install prefix). + // Something went wrong. The possible errors are documented in lzma/container.h + // (src/liblzma/api/lzma/container.h in the source package or e.g. /usr/include/lzma/container.h + // depending on the install prefix). char const* msg{nullptr}; switch (rc) { case LZMA_MEM_ERROR: @@ -193,9 +191,6 @@ auto Compressor::encode_lzma() -> void { throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } } - - // Write the last chunk of output - flush_stream_output_block_buffer(); } auto Compressor::flush_lzma(lzma_action flush_action) -> void { @@ -208,8 +203,8 @@ auto Compressor::flush_lzma(lzma_action flush_action) -> void { } /** - * Once flushing starts, the workflow action needs to stay the same until - * flushing is signaled completed by LZMA (aka LZMA_STREAM_END is reached). + * Once flushing starts, the workflow action needs to stay the same until flushing is signaled + * complete by LZMA (aka LZMA_STREAM_END is reached). * See also: https://github.com/tukaani-project/xz/blob/master/src/liblzma/api/lzma/base.h#L274 */ bool flushed{false}; @@ -224,14 +219,13 @@ auto Compressor::flush_lzma(lzma_action flush_action) -> void { case LZMA_OK: break; case LZMA_STREAM_END: - // NOTE: this might not be true when multithreaded encoder is - // used with LZMA_FULL_BARRIER. For now, we skip this check. + // NOTE: this might not be true when multithreaded encoder is used with + // LZMA_FULL_BARRIER. For now, we skip this check. flushed = true; break; case LZMA_BUF_ERROR: // No encoding progress can be made - // NOTE: this can happen if we are using LZMA_FULL_FLUSH or - // LZMA_FULL_BARRIER. These two actions keeps encoding input - // data alongside flushing buffered encoded data. + // NOTE: this can happen if we are using LZMA_FULL_FLUSH or LZMA_FULL_BARRIER. These + // two actions keeps encoding input data alongside flushing buffered encoded data. SPDLOG_ERROR("LZMA compressor input stream is corrupt."); throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); default: diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index 323464545..286819893 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -93,8 +93,8 @@ class Compressor : public ::clp::streaming_compression::Compressor { /** * Invoke lzma_code() repeatedly with LZMA_RUN until the input is exhausted * - * At the end of the workflow, the last bytes of encoded data may still be - * buffered in the LZMA stream and thus not immediately available at the output block buffer. + * At the end of the workflow, the last bytes of encoded data may still be buffered in the LZMA + * stream and thus not immediately available at the output block buffer. * * Assumes input stream and output block buffer are both in valid states. * @throw `OperationFailed` if LZMA returns an unexpected error value @@ -102,8 +102,8 @@ class Compressor : public ::clp::streaming_compression::Compressor { auto encode_lzma() -> void; /** - * Invoke lzma_code() repeatedly with the given flushing action until all - * encoded data is made available at the output block buffer + * Invoke lzma_code() repeatedly with the given flushing action until all encoded data is made + * available at the output block buffer * * Assumes input stream and output block buffer are both in valid states. * @param flush_action @@ -113,8 +113,8 @@ class Compressor : public ::clp::streaming_compression::Compressor { auto flush_lzma(lzma_action flush_action) -> void; /** - * Flushes the current compressed data in the output block buffer to the - * output file handler. Reset the output block buffer to receive new data. + * Flushes the current compressed data in the output block buffer to the output file handler. + * Reset the output block buffer to receive new data. */ auto flush_stream_output_block_buffer() -> void; From efd2b2759088c874c2d0a1191b8e4e1d1d16105f Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 10 Dec 2024 23:05:31 -0500 Subject: [PATCH 27/35] Fix according to coding style guidelines --- .../streaming_compression/lzma/Compressor.cpp | 16 ++++++++++------ .../streaming_compression/lzma/Compressor.hpp | 18 +++++++++--------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 6c4a29206..dc2ca222f 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -17,18 +17,22 @@ namespace { using clp::streaming_compression::lzma::Compressor; -auto is_flush_action(lzma_action action) -> bool { - return LZMA_SYNC_FLUSH == action || LZMA_FULL_FLUSH == action || LZMA_FULL_BARRIER == action - || LZMA_FINISH == action; -} +auto is_flush_action(lzma_action action) -> bool; /** - * Initialize the LZMA compression stream + * Initializes the LZMA compression stream * @param strm A pre-allocated `lzma_stream` object * @param compression_level * @param dict_size Dictionary size that specifies how many bytes of the * recently processed uncompressed data to keep in the memory */ +auto init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_size) -> void; + +auto is_flush_action(lzma_action action) -> bool { + return LZMA_SYNC_FLUSH == action || LZMA_FULL_FLUSH == action || LZMA_FULL_BARRIER == action + || LZMA_FINISH == action; +} + auto init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_size) -> void { lzma_options_lzma options; if (0 != lzma_lzma_preset(&options, compression_level)) { @@ -41,7 +45,7 @@ auto init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_siz {.id = LZMA_VLI_UNKNOWN, .options = nullptr}, }}; - // Initialize the encoder using a preset. Set the integrity to check to CRC64, which is the + // Initializes the encoder using a preset. Set the integrity to check to CRC64, which is the // default in the xz command line tool. If the .xz file needs to be decompressed with // XZ-Embedded, use LZMA_CHECK_CRC32 instead. auto const rc{lzma_stream_encoder(strm, filters.data(), LZMA_CHECK_CRC64)}; diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index 286819893..b4255cc1c 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -43,6 +43,13 @@ class Compressor : public ::clp::streaming_compression::Compressor { Compressor(Compressor&&) noexcept = default; auto operator=(Compressor&&) noexcept -> Compressor& = default; + /** + * Initializes the compression stream with the given compression level + * @param file_writer + * @param compression_level + */ + auto open(FileWriter& file_writer, int compression_level) -> void; + // Methods implementing the WriterInterface /** * Writes the given data to the compressor @@ -80,18 +87,11 @@ class Compressor : public ::clp::streaming_compression::Compressor { this->open(file_writer, cDefaultCompressionLevel); } - /** - * Initializes the compression stream with the given compression level - * @param file_writer - * @param compression_level - */ - auto open(FileWriter& file_writer, int compression_level) -> void; - private: static constexpr size_t cCompressedStreamBlockBufferSize{4096}; // 4KiB /** - * Invoke lzma_code() repeatedly with LZMA_RUN until the input is exhausted + * Invokes lzma_code() repeatedly with LZMA_RUN until the input is exhausted * * At the end of the workflow, the last bytes of encoded data may still be buffered in the LZMA * stream and thus not immediately available at the output block buffer. @@ -102,7 +102,7 @@ class Compressor : public ::clp::streaming_compression::Compressor { auto encode_lzma() -> void; /** - * Invoke lzma_code() repeatedly with the given flushing action until all encoded data is made + * Invokes lzma_code() repeatedly with the given flushing action until all encoded data is made * available at the output block buffer * * Assumes input stream and output block buffer are both in valid states. From c530f9287ecf51350220bfca501347f3f79b1d5b Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 11 Dec 2024 21:10:58 -0500 Subject: [PATCH 28/35] Apply suggestions from code review Co-authored-by: davidlion --- components/core/tools/scripts/lib_install/liblzma.sh | 2 +- .../lib_install/ubuntu-focal/install-prebuilt-packages.sh | 2 +- .../lib_install/ubuntu-jammy/install-prebuilt-packages.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/components/core/tools/scripts/lib_install/liblzma.sh b/components/core/tools/scripts/lib_install/liblzma.sh index 28766eced..a73ff79b9 100755 --- a/components/core/tools/scripts/lib_install/liblzma.sh +++ b/components/core/tools/scripts/lib_install/liblzma.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Exit on any error set -e diff --git a/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh index f1e2ee4ff..b373cbe4d 100755 --- a/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh +++ b/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh @@ -19,8 +19,8 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ git \ libcurl4 \ libcurl4-openssl-dev \ - libmariadb-dev \ liblzma-dev \ + libmariadb-dev \ libssl-dev \ make \ openjdk-11-jdk \ diff --git a/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh index 4911a6a98..e2e17283b 100755 --- a/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh +++ b/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh @@ -19,8 +19,8 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ libboost-program-options-dev \ libcurl4 \ libcurl4-openssl-dev \ - libmariadb-dev \ liblzma-dev \ + libmariadb-dev \ libssl-dev \ openjdk-11-jdk \ pkg-config \ From e751ee6f5fe3d757713520b494a2e23edc1a6453 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 12 Dec 2024 01:27:10 -0500 Subject: [PATCH 29/35] Update CMakeLists.txt --- components/core/CMakeLists.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 312c6e2ef..9d0c51c9f 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -11,16 +11,16 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # Set general compressor set(GENERAL_COMPRESSOR "zstd" CACHE STRING "The general-purpose compressor used as the 2nd-stage compressor") -set_property(CACHE GENERAL_COMPRESSOR PROPERTY STRINGS passthrough zstd lzma) -if ("${GENERAL_COMPRESSOR}" STREQUAL "passthrough") +set_property(CACHE GENERAL_COMPRESSOR PROPERTY STRINGS lzma passthrough zstd) +if ("${GENERAL_COMPRESSOR}" STREQUAL "lzma") + add_definitions(-DUSE_LZMA_COMPRESSION=1) + message(STATUS "Using Lempel–Ziv–Markov chain Algorithm compression") +elseif ("${GENERAL_COMPRESSOR}" STREQUAL "passthrough") add_definitions(-DUSE_PASSTHROUGH_COMPRESSION=1) message(STATUS "Using passthrough compression") elseif ("${GENERAL_COMPRESSOR}" STREQUAL "zstd") add_definitions(-DUSE_ZSTD_COMPRESSION=1) message(STATUS "Using Zstandard compression") -elseif ("${GENERAL_COMPRESSOR}" STREQUAL "lzma") - add_definitions(-DUSE_LZMA_COMPRESSION=1) - message(STATUS "Using Lempel–Ziv–Markov chain Algorithm compression") else() message(SEND_ERROR "GENERAL_COMPRESSOR=${GENERAL_COMPRESSOR} is unimplemented.") endif() @@ -228,17 +228,17 @@ else() endif() # Find and setup LZMA Library -# Notice that we don't have support to switch between static and shared libraries. -# TODO: add a script in ./cmake/Modules to resolve .a vs. .so +# TODO: Add support to enforce static linking against LZMA when desired. For a hack, we can set +# `CMAKE_FIND_LIBRARY_SUFFIXES` to ask CMake to prefer the static lib over the shared one. find_package(LibLZMA REQUIRED) if(LIBLZMA_FOUND) message(STATUS "Found Lzma ${LIBLZMA_VERSION_STRING}") message(STATUS "Lzma library location: ${LIBLZMA_LIBRARIES}") + message(STATUS "Lzma Include Dir: ${LIBLZMA_INCLUDE_DIRS}") else() message(FATAL_ERROR "Could not find ${CLP_LIBS_STRING} libraries for Lzma") endif() include_directories(${LIBLZMA_INCLUDE_DIRS}) -message("Lzma Include Dir: ${LIBLZMA_INCLUDE_DIRS}") # sqlite dependencies set(sqlite_DYNAMIC_LIBS "dl;m;pthread") From 1c5efcdbb3567c16d9ed14a02eab50525f8ea426 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 12 Dec 2024 01:30:35 -0500 Subject: [PATCH 30/35] Address review concern --- .../clp/streaming_compression/lzma/Compressor.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index dc2ca222f..1330da53f 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -21,19 +21,19 @@ auto is_flush_action(lzma_action action) -> bool; /** * Initializes the LZMA compression stream - * @param strm A pre-allocated `lzma_stream` object + * @param stream A pre-allocated `lzma_stream` object * @param compression_level * @param dict_size Dictionary size that specifies how many bytes of the * recently processed uncompressed data to keep in the memory */ -auto init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_size) -> void; +auto init_lzma_encoder(lzma_stream* stream, int compression_level, size_t dict_size) -> void; auto is_flush_action(lzma_action action) -> bool { return LZMA_SYNC_FLUSH == action || LZMA_FULL_FLUSH == action || LZMA_FULL_BARRIER == action || LZMA_FINISH == action; } -auto init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_size) -> void { +auto init_lzma_encoder(lzma_stream* stream, int compression_level, size_t dict_size) -> void { lzma_options_lzma options; if (0 != lzma_lzma_preset(&options, compression_level)) { SPDLOG_ERROR("Failed to initialize LZMA options' compression level."); @@ -48,7 +48,7 @@ auto init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_siz // Initializes the encoder using a preset. Set the integrity to check to CRC64, which is the // default in the xz command line tool. If the .xz file needs to be decompressed with // XZ-Embedded, use LZMA_CHECK_CRC32 instead. - auto const rc{lzma_stream_encoder(strm, filters.data(), LZMA_CHECK_CRC64)}; + auto const rc = lzma_stream_encoder(stream, filters.data(), LZMA_CHECK_CRC64); if (LZMA_OK == rc) { return; @@ -71,8 +71,11 @@ auto init_lzma_encoder(lzma_stream* strm, int compression_level, size_t dict_siz msg = "Specified integrity check is not supported"; break; + case LZMA_PROG_ERROR: + msg = "Input arguments are not sane"; + break; + default: - // This is most likely LZMA_PROG_ERROR indicating a bug in liblzma msg = "Unknown error"; break; } From 856c7cb544a8122b8c9e7e9063d077a587da9913 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 12 Dec 2024 01:57:32 -0500 Subject: [PATCH 31/35] Update TODO --- components/core/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 9d0c51c9f..3b5f9aff4 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -228,8 +228,10 @@ else() endif() # Find and setup LZMA Library -# TODO: Add support to enforce static linking against LZMA when desired. For a hack, we can set -# `CMAKE_FIND_LIBRARY_SUFFIXES` to ask CMake to prefer the static lib over the shared one. +# TODO: Add a script in ./cmake/Modules to properly import LZMA in find_package()'s module mode +if(CLP_USE_STATIC_LIBS) + set(LibLZMA_USE_STATIC_LIBS ON) +endif() find_package(LibLZMA REQUIRED) if(LIBLZMA_FOUND) message(STATUS "Found Lzma ${LIBLZMA_VERSION_STRING}") From 43e22d2ec5a4480b6f02a0be31eec6f8efc5406c Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 12 Dec 2024 01:59:24 -0500 Subject: [PATCH 32/35] Case fix --- components/core/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 3b5f9aff4..160f6766d 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -230,7 +230,7 @@ endif() # Find and setup LZMA Library # TODO: Add a script in ./cmake/Modules to properly import LZMA in find_package()'s module mode if(CLP_USE_STATIC_LIBS) - set(LibLZMA_USE_STATIC_LIBS ON) + set(LIBLZMA_USE_STATIC_LIBS ON) endif() find_package(LibLZMA REQUIRED) if(LIBLZMA_FOUND) From 829a6b2d7c8bde7011c451022db7926593335ebd Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 12 Dec 2024 12:02:42 -0500 Subject: [PATCH 33/35] Remove unnecessary function inline comments --- .../streaming_compression/lzma/Compressor.cpp | 113 ++++++++++-------- .../streaming_compression/lzma/Compressor.hpp | 5 + .../core/tests/test-StreamingCompression.cpp | 15 ++- 3 files changed, 76 insertions(+), 57 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 1330da53f..7edd61ae9 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -8,6 +8,7 @@ #include #include +#include "../../Array.hpp" #include "../../ErrorCode.hpp" #include "../../FileWriter.hpp" #include "../../TraceableException.hpp" @@ -15,25 +16,68 @@ #include "Constants.hpp" namespace { +using clp::Array; using clp::streaming_compression::lzma::Compressor; +/** + * Attaches a pre-allocated block buffer to encoder's output stream + * + * Subsequent calls to this function resets the output buffer to its initial state. + * @param stream + * @param out_buffer + */ +auto attach_stream_output_buffer(lzma_stream* stream, Array& out_buffer) -> void; + +auto detach_stream_input_src(lzma_stream* stream) -> void; + +auto detach_stream_output_buffer(lzma_stream* stream) -> void; + auto is_flush_action(lzma_action action) -> bool; /** - * Initializes the LZMA compression stream - * @param stream A pre-allocated `lzma_stream` object + * Initializes an LZMA compression encoder and its streams + * + * @param stream A pre-allocated `lzma_stream` object that is to be initialized * @param compression_level * @param dict_size Dictionary size that specifies how many bytes of the * recently processed uncompressed data to keep in the memory + * @param check Type of integrity check calculated from the uncompressed data. LZMA_CHECK_CRC64 is + * the default in the xz command line tool. If the .xz file needs to be decompressed + * with XZ-Embedded, use LZMA_CHECK_CRC32 instead. */ -auto init_lzma_encoder(lzma_stream* stream, int compression_level, size_t dict_size) -> void; +auto init_lzma_encoder( + lzma_stream* stream, + int compression_level, + size_t dict_size, + lzma_check check = LZMA_CHECK_CRC64 +) -> void; + +auto attach_stream_output_buffer(lzma_stream* stream, Array& out_buffer) -> void { + stream->next_out = out_buffer.data(); + stream->avail_out = out_buffer.size(); +} + +auto detach_stream_input_src(lzma_stream* stream) -> void { + stream->next_in = nullptr; + stream->avail_in = 0; +} + +auto detach_stream_output_buffer(lzma_stream* stream) -> void { + stream->next_out = nullptr; + stream->avail_out = 0; +} auto is_flush_action(lzma_action action) -> bool { return LZMA_SYNC_FLUSH == action || LZMA_FULL_FLUSH == action || LZMA_FULL_BARRIER == action || LZMA_FINISH == action; } -auto init_lzma_encoder(lzma_stream* stream, int compression_level, size_t dict_size) -> void { +auto init_lzma_encoder( + lzma_stream* stream, + int compression_level, + size_t dict_size, + lzma_check check +) -> void { lzma_options_lzma options; if (0 != lzma_lzma_preset(&options, compression_level)) { SPDLOG_ERROR("Failed to initialize LZMA options' compression level."); @@ -45,18 +89,11 @@ auto init_lzma_encoder(lzma_stream* stream, int compression_level, size_t dict_s {.id = LZMA_VLI_UNKNOWN, .options = nullptr}, }}; - // Initializes the encoder using a preset. Set the integrity to check to CRC64, which is the - // default in the xz command line tool. If the .xz file needs to be decompressed with - // XZ-Embedded, use LZMA_CHECK_CRC32 instead. - auto const rc = lzma_stream_encoder(stream, filters.data(), LZMA_CHECK_CRC64); - + auto const rc = lzma_stream_encoder(stream, filters.data(), check); if (LZMA_OK == rc) { return; } - // Something went wrong. The possible errors are documented in lzma/container.h - // (src/liblzma/api/lzma/container.h in the source package or e.g. /usr/include/lzma/container.h - // depending on the install prefix). char const* msg{nullptr}; switch (rc) { case LZMA_MEM_ERROR: @@ -97,17 +134,9 @@ auto Compressor::open(FileWriter& file_writer, int compression_level) -> void { m_compression_stream = LZMA_STREAM_INIT; init_lzma_encoder(&m_compression_stream, compression_level, m_dict_size); - - // No input upon initialization - m_compression_stream.next_in = nullptr; - m_compression_stream.avail_in = 0; - - // Attach output buffer to LZMA stream - m_compression_stream.next_out = m_compressed_stream_block_buffer.data(); - m_compression_stream.avail_out = m_compressed_stream_block_buffer.size(); - + detach_stream_input_src(&m_compression_stream); + attach_stream_output_buffer(&m_compression_stream, m_compressed_stream_block_buffer); m_compressed_stream_file_writer = &file_writer; - m_uncompressed_stream_pos = 0; } @@ -122,14 +151,9 @@ auto Compressor::close() -> void { } flush_lzma(LZMA_FINISH); - // Deallocates LZMA stream's internal data structures lzma_end(&m_compression_stream); - - // Detach output buffer from LZMA stream - m_compression_stream.next_out = nullptr; - m_compression_stream.avail_out = 0; - + detach_stream_output_buffer(&m_compression_stream); m_compressed_stream_file_writer = nullptr; } @@ -139,7 +163,6 @@ auto Compressor::write(char const* data, size_t data_length) -> void { } if (0 == data_length) { - // Nothing needs to be done because we do not need to compress anything return; } @@ -147,16 +170,10 @@ auto Compressor::write(char const* data, size_t data_length) -> void { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } - // Attach input data to LZMA stream m_compression_stream.next_in = clp::size_checked_pointer_cast(data); m_compression_stream.avail_in = data_length; - encode_lzma(); - - // All input data have been encoded so detach input data - m_compression_stream.next_in = nullptr; - m_compression_stream.avail_in = 0; - + detach_stream_input_src(&m_compression_stream); m_uncompressed_stream_pos += data_length; } @@ -178,7 +195,6 @@ auto Compressor::try_get_pos(size_t& pos) const -> ErrorCode { auto Compressor::encode_lzma() -> void { while (m_compression_stream.avail_in > 0) { - // Write output buffer to file if it's full if (0 == m_compression_stream.avail_out) { flush_stream_output_block_buffer(); } @@ -187,8 +203,10 @@ auto Compressor::encode_lzma() -> void { switch (rc) { case LZMA_OK: break; - case LZMA_BUF_ERROR: // No encoding progress can be made - SPDLOG_ERROR("LZMA compressor input stream is corrupt."); + case LZMA_BUF_ERROR: + SPDLOG_ERROR( + "LZMA compressor input stream is corrupt. No encoding progress can be made." + ); throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); default: SPDLOG_ERROR( @@ -209,14 +227,8 @@ auto Compressor::flush_lzma(lzma_action flush_action) -> void { throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } - /** - * Once flushing starts, the workflow action needs to stay the same until flushing is signaled - * complete by LZMA (aka LZMA_STREAM_END is reached). - * See also: https://github.com/tukaani-project/xz/blob/master/src/liblzma/api/lzma/base.h#L274 - */ bool flushed{false}; while (false == flushed) { - // Write output buffer to file if it's full if (0 == m_compression_stream.avail_out) { flush_stream_output_block_buffer(); } @@ -230,10 +242,12 @@ auto Compressor::flush_lzma(lzma_action flush_action) -> void { // LZMA_FULL_BARRIER. For now, we skip this check. flushed = true; break; - case LZMA_BUF_ERROR: // No encoding progress can be made + case LZMA_BUF_ERROR: // NOTE: this can happen if we are using LZMA_FULL_FLUSH or LZMA_FULL_BARRIER. These // two actions keeps encoding input data alongside flushing buffered encoded data. - SPDLOG_ERROR("LZMA compressor input stream is corrupt."); + SPDLOG_ERROR( + "LZMA compressor input stream is corrupt. No encoding progress can be made." + ); throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); default: SPDLOG_ERROR( @@ -244,20 +258,17 @@ auto Compressor::flush_lzma(lzma_action flush_action) -> void { } } - // Write the last chunk of output flush_stream_output_block_buffer(); } auto Compressor::flush_stream_output_block_buffer() -> void { if (cCompressedStreamBlockBufferSize == m_compression_stream.avail_out) { - // Nothing to flush return; } m_compressed_stream_file_writer->write( clp::size_checked_pointer_cast(m_compressed_stream_block_buffer.data()), cCompressedStreamBlockBufferSize - m_compression_stream.avail_out ); - m_compression_stream.next_out = m_compressed_stream_block_buffer.data(); - m_compression_stream.avail_out = cCompressedStreamBlockBufferSize; + attach_stream_output_buffer(&m_compression_stream, m_compressed_stream_block_buffer); } } // namespace clp::streaming_compression::lzma diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index b4255cc1c..986137aa2 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -45,6 +45,7 @@ class Compressor : public ::clp::streaming_compression::Compressor { /** * Initializes the compression stream with the given compression level + * * @param file_writer * @param compression_level */ @@ -105,6 +106,10 @@ class Compressor : public ::clp::streaming_compression::Compressor { * Invokes lzma_code() repeatedly with the given flushing action until all encoded data is made * available at the output block buffer * + * Once flushing starts, the workflow action needs to stay the same until flushing is signaled + * complete by LZMA (aka LZMA_STREAM_END is reached). + * See also: https://github.com/tukaani-project/xz/blob/master/src/liblzma/api/lzma/base.h#L274 + * * Assumes input stream and output block buffer are both in valid states. * @param flush_action * @throw `OperationFailed` if the provided action is not an LZMA flush diff --git a/components/core/tests/test-StreamingCompression.cpp b/components/core/tests/test-StreamingCompression.cpp index a52a42ef7..4076eb88f 100644 --- a/components/core/tests/test-StreamingCompression.cpp +++ b/components/core/tests/test-StreamingCompression.cpp @@ -44,7 +44,15 @@ constexpr auto cCompressionChunkSizes = std::to_array( cBufferSize} ); -auto compress(std::unique_ptr compressor, char const* const src) -> void { +auto compress(std::unique_ptr compressor, char const* src) -> void; + +auto decompress_and_compare( + std::unique_ptr decompressor, + Array const& uncompressed_buffer, + Array& decompressed_buffer +) -> void; + +auto compress(std::unique_ptr compressor, char const* src) -> void { FileWriter file_writer; file_writer.open(string(cCompressedFilePath), FileWriter::OpenMode::CREATE_FOR_WRITING); compressor->open(file_writer); @@ -84,7 +92,6 @@ auto decompress_and_compare( num_uncompressed_bytes += chunk_size; } - // Sanity check REQUIRE( (std::accumulate( cCompressionChunkSizes.cbegin(), @@ -97,14 +104,11 @@ auto decompress_and_compare( } // namespace TEST_CASE("StreamingCompression", "[StreamingCompression]") { - // Initialize constants constexpr size_t cAlphabetLength{26}; - // Initialize compression devices std::unique_ptr compressor; std::unique_ptr decompressor; - // Initialize buffers Array decompressed_buffer{cBufferSize}; Array uncompressed_buffer{cBufferSize}; for (size_t i{0}; i < cBufferSize; ++i) { @@ -130,6 +134,5 @@ TEST_CASE("StreamingCompression", "[StreamingCompression]") { compress(std::move(compressor), uncompressed_buffer.data()); } - // Cleanup boost::filesystem::remove(string(cCompressedFilePath)); } From 81e180795cccd1b1d7380f989f0068e669d19b6b Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 12 Dec 2024 12:07:56 -0500 Subject: [PATCH 34/35] Improve comment --- .../core/src/clp/streaming_compression/lzma/Compressor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 7edd61ae9..36a5038b4 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -238,7 +238,7 @@ auto Compressor::flush_lzma(lzma_action flush_action) -> void { case LZMA_OK: break; case LZMA_STREAM_END: - // NOTE: this might not be true when multithreaded encoder is used with + // NOTE: flush may not have completed if a multithreaded encoder is using action // LZMA_FULL_BARRIER. For now, we skip this check. flushed = true; break; From 09b73c7ff6413066aa3d98a5694a04c130b50a4f Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 17 Dec 2024 00:56:19 -0500 Subject: [PATCH 35/35] Refactor lzma stream related functions into a nested helper class --- .../streaming_compression/lzma/Compressor.cpp | 194 +++++++----------- .../streaming_compression/lzma/Compressor.hpp | 56 ++++- 2 files changed, 123 insertions(+), 127 deletions(-) diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp index 36a5038b4..4a43e93e8 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.cpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.cpp @@ -8,134 +8,25 @@ #include #include -#include "../../Array.hpp" #include "../../ErrorCode.hpp" #include "../../FileWriter.hpp" #include "../../TraceableException.hpp" #include "../../type_utils.hpp" #include "Constants.hpp" -namespace { -using clp::Array; -using clp::streaming_compression::lzma::Compressor; - -/** - * Attaches a pre-allocated block buffer to encoder's output stream - * - * Subsequent calls to this function resets the output buffer to its initial state. - * @param stream - * @param out_buffer - */ -auto attach_stream_output_buffer(lzma_stream* stream, Array& out_buffer) -> void; - -auto detach_stream_input_src(lzma_stream* stream) -> void; - -auto detach_stream_output_buffer(lzma_stream* stream) -> void; - -auto is_flush_action(lzma_action action) -> bool; - -/** - * Initializes an LZMA compression encoder and its streams - * - * @param stream A pre-allocated `lzma_stream` object that is to be initialized - * @param compression_level - * @param dict_size Dictionary size that specifies how many bytes of the - * recently processed uncompressed data to keep in the memory - * @param check Type of integrity check calculated from the uncompressed data. LZMA_CHECK_CRC64 is - * the default in the xz command line tool. If the .xz file needs to be decompressed - * with XZ-Embedded, use LZMA_CHECK_CRC32 instead. - */ -auto init_lzma_encoder( - lzma_stream* stream, - int compression_level, - size_t dict_size, - lzma_check check = LZMA_CHECK_CRC64 -) -> void; - -auto attach_stream_output_buffer(lzma_stream* stream, Array& out_buffer) -> void { - stream->next_out = out_buffer.data(); - stream->avail_out = out_buffer.size(); -} - -auto detach_stream_input_src(lzma_stream* stream) -> void { - stream->next_in = nullptr; - stream->avail_in = 0; -} - -auto detach_stream_output_buffer(lzma_stream* stream) -> void { - stream->next_out = nullptr; - stream->avail_out = 0; -} - -auto is_flush_action(lzma_action action) -> bool { - return LZMA_SYNC_FLUSH == action || LZMA_FULL_FLUSH == action || LZMA_FULL_BARRIER == action - || LZMA_FINISH == action; -} - -auto init_lzma_encoder( - lzma_stream* stream, - int compression_level, - size_t dict_size, - lzma_check check -) -> void { - lzma_options_lzma options; - if (0 != lzma_lzma_preset(&options, compression_level)) { - SPDLOG_ERROR("Failed to initialize LZMA options' compression level."); - throw Compressor::OperationFailed(clp::ErrorCode_BadParam, __FILENAME__, __LINE__); - } - options.dict_size = dict_size; - std::array filters{{ - {.id = LZMA_FILTER_LZMA2, .options = &options}, - {.id = LZMA_VLI_UNKNOWN, .options = nullptr}, - }}; - - auto const rc = lzma_stream_encoder(stream, filters.data(), check); - if (LZMA_OK == rc) { - return; - } - - char const* msg{nullptr}; - switch (rc) { - case LZMA_MEM_ERROR: - msg = "Memory allocation failed"; - break; - - case LZMA_OPTIONS_ERROR: - msg = "Specified preset is not supported"; - break; - - case LZMA_UNSUPPORTED_CHECK: - msg = "Specified integrity check is not supported"; - break; - - case LZMA_PROG_ERROR: - msg = "Input arguments are not sane"; - break; - - default: - msg = "Unknown error"; - break; - } - - SPDLOG_ERROR("Error initializing the encoder: {} (error code {})", msg, static_cast(rc)); - throw Compressor::OperationFailed(clp::ErrorCode_BadParam, __FILENAME__, __LINE__); -} -} // namespace - namespace clp::streaming_compression::lzma { auto Compressor::open(FileWriter& file_writer, int compression_level) -> void { if (nullptr != m_compressed_stream_file_writer) { throw OperationFailed(ErrorCode_NotReady, __FILENAME__, __LINE__); } - if (compression_level < cMinCompressionLevel || compression_level > cMaxCompressionLevel) { throw OperationFailed(ErrorCode_Unsupported, __FILENAME__, __LINE__); } + m_compression_level = compression_level; - m_compression_stream = LZMA_STREAM_INIT; - init_lzma_encoder(&m_compression_stream, compression_level, m_dict_size); - detach_stream_input_src(&m_compression_stream); - attach_stream_output_buffer(&m_compression_stream, m_compressed_stream_block_buffer); + m_lzma_ops.init_lzma_encoder(); + m_lzma_ops.detach_input_src(); + m_lzma_ops.attach_output_buffer(); m_compressed_stream_file_writer = &file_writer; m_uncompressed_stream_pos = 0; } @@ -153,7 +44,7 @@ auto Compressor::close() -> void { flush_lzma(LZMA_FINISH); // Deallocates LZMA stream's internal data structures lzma_end(&m_compression_stream); - detach_stream_output_buffer(&m_compression_stream); + m_lzma_ops.detach_output_buffer(); m_compressed_stream_file_writer = nullptr; } @@ -173,7 +64,7 @@ auto Compressor::write(char const* data, size_t data_length) -> void { m_compression_stream.next_in = clp::size_checked_pointer_cast(data); m_compression_stream.avail_in = data_length; encode_lzma(); - detach_stream_input_src(&m_compression_stream); + m_lzma_ops.detach_input_src(); m_uncompressed_stream_pos += data_length; } @@ -188,7 +79,6 @@ auto Compressor::try_get_pos(size_t& pos) const -> ErrorCode { if (nullptr == m_compressed_stream_file_writer) { return ErrorCode_NotInit; } - pos = m_uncompressed_stream_pos; return ErrorCode_Success; } @@ -198,7 +88,6 @@ auto Compressor::encode_lzma() -> void { if (0 == m_compression_stream.avail_out) { flush_stream_output_block_buffer(); } - auto const rc = lzma_code(&m_compression_stream, LZMA_RUN); switch (rc) { case LZMA_OK: @@ -219,7 +108,7 @@ auto Compressor::encode_lzma() -> void { } auto Compressor::flush_lzma(lzma_action flush_action) -> void { - if (false == is_flush_action(flush_action)) { + if (false == LzmaStreamOperations::is_flush_action(flush_action)) { SPDLOG_ERROR( "lzma_code() supplied with invalid flush action - {}.", static_cast(flush_action) @@ -232,7 +121,6 @@ auto Compressor::flush_lzma(lzma_action flush_action) -> void { if (0 == m_compression_stream.avail_out) { flush_stream_output_block_buffer(); } - auto const rc = lzma_code(&m_compression_stream, flush_action); switch (rc) { case LZMA_OK: @@ -257,7 +145,6 @@ auto Compressor::flush_lzma(lzma_action flush_action) -> void { throw OperationFailed(ErrorCode_Failure, __FILENAME__, __LINE__); } } - flush_stream_output_block_buffer(); } @@ -269,6 +156,71 @@ auto Compressor::flush_stream_output_block_buffer() -> void { clp::size_checked_pointer_cast(m_compressed_stream_block_buffer.data()), cCompressedStreamBlockBufferSize - m_compression_stream.avail_out ); - attach_stream_output_buffer(&m_compression_stream, m_compressed_stream_block_buffer); + m_lzma_ops.attach_output_buffer(); +} + +auto Compressor::LzmaStreamOperations::is_flush_action(lzma_action action) -> bool { + return LZMA_SYNC_FLUSH == action || LZMA_FULL_FLUSH == action || LZMA_FULL_BARRIER == action + || LZMA_FINISH == action; +} + +auto Compressor::LzmaStreamOperations::attach_output_buffer() -> void { + m_p->m_compression_stream.next_out = m_p->m_compressed_stream_block_buffer.data(); + m_p->m_compression_stream.avail_out = m_p->m_compressed_stream_block_buffer.size(); +} + +auto Compressor::LzmaStreamOperations::detach_input_src() -> void { + m_p->m_compression_stream.next_in = nullptr; + m_p->m_compression_stream.avail_in = 0; +} + +auto Compressor::LzmaStreamOperations::detach_output_buffer() -> void { + m_p->m_compression_stream.next_out = nullptr; + m_p->m_compression_stream.avail_out = 0; +} + +auto Compressor::LzmaStreamOperations::init_lzma_encoder(lzma_check check) -> void { + lzma_options_lzma options; + if (0 != lzma_lzma_preset(&options, m_p->m_compression_level)) { + SPDLOG_ERROR("Failed to initialize LZMA options' compression level."); + throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); + } + options.dict_size = m_p->m_dict_size; + std::array filters{{ + {.id = LZMA_FILTER_LZMA2, .options = &options}, + {.id = LZMA_VLI_UNKNOWN, .options = nullptr}, + }}; + + m_p->m_compression_stream = LZMA_STREAM_INIT; + auto const rc = lzma_stream_encoder(&m_p->m_compression_stream, filters.data(), check); + if (LZMA_OK == rc) { + return; + } + + char const* msg{nullptr}; + switch (rc) { + case LZMA_MEM_ERROR: + msg = "Memory allocation failed"; + break; + + case LZMA_OPTIONS_ERROR: + msg = "Specified preset is not supported"; + break; + + case LZMA_UNSUPPORTED_CHECK: + msg = "Specified integrity check is not supported"; + break; + + case LZMA_PROG_ERROR: + msg = "Input arguments are not sane"; + break; + + default: + msg = "Unknown error"; + break; + } + + SPDLOG_ERROR("Error initializing the encoder: {} (error code {})", msg, static_cast(rc)); + throw OperationFailed(ErrorCode_BadParam, __FILENAME__, __LINE__); } } // namespace clp::streaming_compression::lzma diff --git a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp index 986137aa2..3e7af18ff 100644 --- a/components/core/src/clp/streaming_compression/lzma/Compressor.hpp +++ b/components/core/src/clp/streaming_compression/lzma/Compressor.hpp @@ -2,7 +2,7 @@ #define CLP_STREAMING_COMPRESSION_LZMA_COMPRESSOR_HPP #include -#include +#include #include @@ -89,6 +89,48 @@ class Compressor : public ::clp::streaming_compression::Compressor { } private: + class LzmaStreamOperations { + public: + // Constructor + LzmaStreamOperations(Compressor* parent) : m_p(parent) {} + + // Destructor + ~LzmaStreamOperations() = default; + + // Delete copy constructor and assignment operator + LzmaStreamOperations(LzmaStreamOperations const&) = delete; + auto operator=(LzmaStreamOperations const&) -> LzmaStreamOperations& = delete; + + // Default move constructor and assignment operator + LzmaStreamOperations(LzmaStreamOperations&&) noexcept = default; + auto operator=(LzmaStreamOperations&&) noexcept -> LzmaStreamOperations& = default; + + [[nodiscard]] static auto is_flush_action(lzma_action action) -> bool; + + /** + * Attaches a pre-allocated block buffer to the encoder's output stream + * + * Subsequent calls to this function resets the output buffer to its initial state. + */ + auto attach_output_buffer() -> void; + + auto detach_input_src() -> void; + + auto detach_output_buffer() -> void; + + /** + * Initializes an LZMA compression encoder and its streams + * + * @param check Type of integrity check calculated from the uncompressed data. + * LZMA_CHECK_CRC64 is the default in the xz command line tool. If the .xz file needs to be + * decompressed with XZ-Embedded, use LZMA_CHECK_CRC32 instead. + */ + auto init_lzma_encoder(lzma_check check = LZMA_CHECK_CRC64) -> void; + + private: + Compressor* m_p; + }; + static constexpr size_t cCompressedStreamBlockBufferSize{4096}; // 4KiB /** @@ -119,7 +161,8 @@ class Compressor : public ::clp::streaming_compression::Compressor { /** * Flushes the current compressed data in the output block buffer to the output file handler. - * Reset the output block buffer to receive new data. + * + * Also resets the output block buffer to receive new data. */ auto flush_stream_output_block_buffer() -> void; @@ -127,11 +170,12 @@ class Compressor : public ::clp::streaming_compression::Compressor { FileWriter* m_compressed_stream_file_writer{nullptr}; // Compressed stream variables - lzma_stream m_compression_stream; - size_t m_dict_size{cDefaultDictionarySize}; - + LzmaStreamOperations m_lzma_ops{this}; Array m_compressed_stream_block_buffer{cCompressedStreamBlockBufferSize}; - + int m_compression_level{cDefaultCompressionLevel}; + lzma_stream m_compression_stream = LZMA_STREAM_INIT; + // Specifies how many bytes of the recently processed uncompressed data to keep in the memory + size_t m_dict_size{cDefaultDictionarySize}; size_t m_uncompressed_stream_pos{0}; }; } // namespace clp::streaming_compression::lzma