-
Couldn't load subscription status.
- Fork 4
Handle lz4 compression #30
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
23 commits
Select commit
Hold shift + click to select a range
16068f5
First attempt for compression in serialization (not tested and to be …
Hind-M 4efa40c
Clean up and fetch
Hind-M 5ecfe01
More cleanup
Hind-M 9be57ad
Fix fetching lz4
Hind-M da7b9f6
Add lz4 targets to be exported
Hind-M e52ccf2
Add lz4 compression in deserialization
Hind-M 4d52af9
Add owning_arrow_array_private_data
Hind-M 7c59a7d
Factorize
Hind-M 8bc880c
Remove macro from template
Hind-M 10fcf73
Add fct and remove cout
Hind-M c667acd
Move implementation details to cpp and add check boundaries
Hind-M 568a8e7
Factorize more
Hind-M d410d15
Fix conflicts and rework serialization with compression
Hind-M 78d0150
Add compression test
Hind-M bed6421
Minor changes
Hind-M 4b171cc
Rework compression and add tests
Hind-M 7b59436
Add missing header in test
Hind-M 5c1b6ba
Simplify
Hind-M b5cc681
Simplify get_record_batch_message_builder
Hind-M 35ebb09
Simplify deserialization
Hind-M 177dbfa
Add get_buffers docstring
Hind-M bc5415c
Handle empty validity buffers differently
Hind-M a594e91
Code review: add TODOs
Hind-M File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| # Find LZ4 library and headers | ||
|
|
||
| # This module defines: | ||
| # LZ4_FOUND - True if lz4 is found | ||
| # LZ4_INCLUDE_DIRS - LZ4 include directories | ||
| # LZ4_LIBRARIES - Libraries needed to use LZ4 | ||
| # LZ4_VERSION - LZ4 version number | ||
| # | ||
|
|
||
| find_package(PkgConfig) | ||
| if(PKG_CONFIG_FOUND) | ||
| pkg_check_modules(LZ4 QUIET liblz4) | ||
| if(NOT LZ4_FOUND) | ||
| message(STATUS "Did not find 'liblz4.pc', trying 'lz4.pc'") | ||
| pkg_check_modules(LZ4 QUIET lz4) | ||
| endif() | ||
| endif() | ||
|
|
||
| find_path(LZ4_INCLUDE_DIR lz4.h) | ||
| # HINTS ${LZ4_INCLUDEDIR} ${LZ4_INCLUDE_DIRS}) | ||
| find_library(LZ4_LIBRARY NAMES lz4 liblz4) | ||
| # HINTS ${LZ4_LIBDIR} ${LZ4_LIBRARY_DIRS}) | ||
|
|
||
| include(FindPackageHandleStandardArgs) | ||
| find_package_handle_standard_args(lz4 DEFAULT_MSG | ||
| LZ4_LIBRARY LZ4_INCLUDE_DIR) | ||
| mark_as_advanced(LZ4_INCLUDE_DIR LZ4_LIBRARY) | ||
|
|
||
| set(LZ4_LIBRARIES ${LZ4_LIBRARY}) | ||
| set(LZ4_INCLUDE_DIRS ${LZ4_INCLUDE_DIR}) | ||
|
|
||
| if(LZ4_FOUND AND NOT TARGET lz4::lz4) | ||
| add_library(lz4::lz4 UNKNOWN IMPORTED) | ||
| set_target_properties(lz4::lz4 PROPERTIES | ||
| IMPORTED_LOCATION "${LZ4_LIBRARIES}" | ||
| INTERFACE_INCLUDE_DIRECTORIES "${LZ4_INCLUDE_DIRS}") | ||
| if (NOT TARGET LZ4::LZ4 AND TARGET lz4::lz4) | ||
| add_library(LZ4::LZ4 ALIAS lz4::lz4) | ||
| endif () | ||
| endif() | ||
|
|
||
| #TODO add version? |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,34 +1,86 @@ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <vector> | ||
| #include <utility> | ||
|
|
||
| #include <sparrow/c_interface.hpp> | ||
| #include <sparrow/utils/contracts.hpp> | ||
|
|
||
| #include "sparrow_ipc/config/config.hpp" | ||
| #include "sparrow_ipc/arrow_interface/arrow_array/private_data.hpp" | ||
|
|
||
| namespace sparrow_ipc | ||
| { | ||
| [[nodiscard]] SPARROW_IPC_API ArrowArray make_non_owning_arrow_array( | ||
| SPARROW_IPC_API void release_arrow_array_children_and_dictionary(ArrowArray* array); | ||
|
|
||
| template <ArrowPrivateData T> | ||
| void arrow_array_release(ArrowArray* array) | ||
| { | ||
| SPARROW_ASSERT_TRUE(array != nullptr) | ||
| SPARROW_ASSERT_TRUE(array->release == std::addressof(arrow_array_release<T>)) | ||
|
|
||
| SPARROW_ASSERT_TRUE(array->private_data != nullptr); | ||
|
|
||
| delete static_cast<T*>(array->private_data); | ||
| array->private_data = nullptr; | ||
| array->buffers = nullptr; // The buffers were deleted with the private data | ||
|
|
||
| release_arrow_array_children_and_dictionary(array); | ||
| array->release = nullptr; | ||
| } | ||
|
|
||
| template <ArrowPrivateData T, typename Arg> | ||
| void fill_arrow_array( | ||
| ArrowArray& array, | ||
| int64_t length, | ||
| int64_t null_count, | ||
| int64_t offset, | ||
| std::vector<std::uint8_t*>&& buffers, | ||
| size_t children_count, | ||
| ArrowArray** children, | ||
| ArrowArray* dictionary | ||
| ); | ||
| ArrowArray* dictionary, | ||
| Arg&& private_data_arg | ||
| ) | ||
| { | ||
| SPARROW_ASSERT_TRUE(length >= 0); | ||
| SPARROW_ASSERT_TRUE(null_count >= -1); | ||
| SPARROW_ASSERT_TRUE(offset >= 0); | ||
|
|
||
| SPARROW_IPC_API void release_non_owning_arrow_array(ArrowArray* array); | ||
| array.length = length; | ||
| array.null_count = null_count; | ||
| array.offset = offset; | ||
| array.n_children = static_cast<int64_t>(children_count); | ||
| array.children = children; | ||
| array.dictionary = dictionary; | ||
|
|
||
| SPARROW_IPC_API void fill_non_owning_arrow_array( | ||
| ArrowArray& array, | ||
| auto private_data = new T(std::forward<Arg>(private_data_arg)); | ||
| array.private_data = private_data; | ||
| array.n_buffers = private_data->n_buffers(); | ||
| array.buffers = private_data->buffers_ptrs(); | ||
|
|
||
| array.release = &arrow_array_release<T>; | ||
| } | ||
|
|
||
| template <ArrowPrivateData T, typename Arg> | ||
| [[nodiscard]] ArrowArray make_arrow_array( | ||
| int64_t length, | ||
| int64_t null_count, | ||
| int64_t offset, | ||
| std::vector<std::uint8_t*>&& buffers, | ||
| size_t children_count, | ||
| ArrowArray** children, | ||
| ArrowArray* dictionary | ||
| ); | ||
| } | ||
| ArrowArray* dictionary, | ||
| Arg&& private_data_arg | ||
| ) | ||
| { | ||
| ArrowArray array{}; | ||
| fill_arrow_array<T>( | ||
| array, | ||
| length, | ||
| null_count, | ||
| offset, | ||
| children_count, | ||
| children, | ||
| dictionary, | ||
| std::forward<Arg>(private_data_arg) | ||
| ); | ||
| return array; | ||
| } | ||
| } |
24 changes: 16 additions & 8 deletions
24
include/sparrow_ipc/arrow_interface/arrow_array/private_data.hpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,25 +1,33 @@ | ||
| #pragma once | ||
|
|
||
| #include <concepts> | ||
| #include <cstdint> | ||
| #include <span> | ||
| #include <variant> | ||
| #include <vector> | ||
|
|
||
| #include "sparrow_ipc/config/config.hpp" | ||
|
|
||
| namespace sparrow_ipc | ||
| { | ||
| class non_owning_arrow_array_private_data | ||
| template <typename T> | ||
| concept ArrowPrivateData = requires(T& t) | ||
| { | ||
| public: | ||
| { t.buffers_ptrs() } -> std::same_as<const void**>; | ||
| { t.n_buffers() } -> std::convertible_to<std::size_t>; | ||
| }; | ||
|
|
||
| explicit constexpr non_owning_arrow_array_private_data(std::vector<std::uint8_t*>&& buffers_pointers) | ||
| : m_buffers_pointers(std::move(buffers_pointers)) | ||
| { | ||
| } | ||
| class arrow_array_private_data | ||
| { | ||
| public: | ||
| using optionally_owned_buffer = std::variant<std::vector<uint8_t>, std::span<const uint8_t>>; | ||
| explicit arrow_array_private_data(std::vector<optionally_owned_buffer>&& buffers); | ||
|
|
||
| [[nodiscard]] SPARROW_IPC_API const void** buffers_ptrs() noexcept; | ||
| [[nodiscard]] SPARROW_IPC_API std::size_t n_buffers() const noexcept; | ||
|
|
||
| private: | ||
|
|
||
| std::vector<std::uint8_t*> m_buffers_pointers; | ||
| std::vector<optionally_owned_buffer> m_buffers; | ||
| std::vector<const void*> m_buffer_pointers; | ||
| }; | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we mix owned and non-owned buffers? Otherwise a variant of vectors might be more suitable here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As I previously commented elsewhere, since some buffers can be compressed and others not, you need this flexibility otherwise you'll end up copying non-compressed buffer into an owned version.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(and I think at some sparrow may want a more versatile buffer facility such that ownership needn't be under the form of a
std::vector?).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah indeed, I totally forgot it when writing my comment.