-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add unified compression API and lz4_frame/lz4_raw/lz4_hadoop codec #7589
Open
marin-ma
wants to merge
16
commits into
facebookincubator:main
Choose a base branch
from
marin-ma:unify-compression-api-lz4
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
8c24ec5
add compression v2 API and lz4_frame/lz4_raw/lz4_hadoop codec
marin-ma 82557c0
use folly::Expected as return code
marin-ma e6c47ba
fix build
marin-ma 09f5a23
update Expected usage
marin-ma 512be5f
address comments
marin-ma b9a996e
fix format
marin-ma 1c13b33
address comments
marin-ma 7f13443
address comments
marin-ma 462bfc8
rebase
marin-ma 349a4a3
format
marin-ma 05d1a96
refine error message
marin-ma 1690bc9
address comments
marin-ma 48546e0
fix
marin-ma 56f9540
fix up
marin-ma 472cfe9
add throws
marin-ma b76c7c9
nit
marin-ma File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,5 +19,13 @@ endif() | |
velox_add_library(velox_common_compression Compression.cpp LzoDecompressor.cpp) | ||
velox_link_libraries( | ||
velox_common_compression | ||
PUBLIC Folly::folly | ||
PUBLIC velox_status Folly::folly | ||
PRIVATE velox_exception) | ||
|
||
if(VELOX_ENABLE_COMPRESSION_LZ4) | ||
velox_sources(velox_common_compression PRIVATE Lz4Compression.cpp | ||
HadoopCompressionFormat.cpp) | ||
velox_link_libraries(velox_common_compression PUBLIC lz4::lz4) | ||
velox_compile_definitions(velox_common_compression | ||
PRIVATE VELOX_ENABLE_COMPRESSION_LZ4) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah it's not used in a header so PRIVATE is better, good catch! |
||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,9 @@ | |
|
||
#include "velox/common/compression/Compression.h" | ||
#include "velox/common/base/Exceptions.h" | ||
#ifdef VELOX_ENABLE_COMPRESSION_LZ4 | ||
#include "velox/common/compression/Lz4Compression.h" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. like he we would protect this with a a VELOX_ENABLE_COMPRESSION_LZ4 (or similar) |
||
#endif | ||
|
||
#include <folly/Conv.h> | ||
|
||
|
@@ -98,4 +101,132 @@ CompressionKind stringToCompressionKind(const std::string& kind) { | |
VELOX_UNSUPPORTED("Not support compression kind {}", kind); | ||
} | ||
} | ||
|
||
Status Codec::init() { | ||
return Status::OK(); | ||
} | ||
|
||
bool Codec::supportsGetUncompressedLength(CompressionKind kind) { | ||
// TODO: Return true if it's supported by compression kind. | ||
return false; | ||
} | ||
|
||
bool Codec::supportsStreamingCompression(CompressionKind kind) { | ||
switch (kind) { | ||
#ifdef VELOX_ENABLE_COMPRESSION_LZ4 | ||
case CompressionKind::CompressionKind_LZ4: | ||
return true; | ||
#endif | ||
default: | ||
return false; | ||
} | ||
} | ||
|
||
bool Codec::supportsCompressFixedLength(CompressionKind kind) { | ||
// TODO: Return true if it's supported by compression kind. | ||
return false; | ||
} | ||
|
||
Expected<std::unique_ptr<Codec>> Codec::create( | ||
CompressionKind kind, | ||
const CodecOptions& codecOptions) { | ||
if (!isAvailable(kind)) { | ||
auto name = compressionKindToString(kind); | ||
VELOX_RETURN_UNEXPECTED_IF( | ||
folly::StringPiece({name}).startsWith("unknown"), | ||
Status::Invalid("Unrecognized codec: ", name)); | ||
return folly::makeUnexpected(Status::Invalid( | ||
"Support for codec '{}' is either not built or not implemented.", | ||
name)); | ||
} | ||
|
||
auto compressionLevel = codecOptions.compressionLevel; | ||
std::unique_ptr<Codec> codec; | ||
switch (kind) { | ||
#ifdef VELOX_ENABLE_COMPRESSION_LZ4 | ||
case CompressionKind::CompressionKind_LZ4: | ||
if (auto options = dynamic_cast<const Lz4CodecOptions*>(&codecOptions)) { | ||
switch (options->type) { | ||
case Lz4CodecOptions::kLz4Frame: | ||
codec = makeLz4FrameCodec(compressionLevel); | ||
break; | ||
case Lz4CodecOptions::kLz4Raw: | ||
codec = makeLz4RawCodec(compressionLevel); | ||
break; | ||
case Lz4CodecOptions::kLz4Hadoop: | ||
codec = makeLz4HadoopCodec(); | ||
break; | ||
} | ||
} | ||
// By default, create LZ4 Frame codec. | ||
codec = makeLz4FrameCodec(compressionLevel); | ||
break; | ||
#endif | ||
default: | ||
break; | ||
} | ||
VELOX_RETURN_UNEXPECTED_IF( | ||
codec == nullptr, | ||
Status::Invalid(fmt::format( | ||
"Support for codec '{}' is either not built or not implemented.", | ||
compressionKindToString(kind)))); | ||
|
||
VELOX_RETURN_UNEXPECTED_NOT_OK(codec->init()); | ||
|
||
return codec; | ||
} | ||
|
||
Expected<std::unique_ptr<Codec>> Codec::create( | ||
CompressionKind kind, | ||
int32_t compressionLevel) { | ||
return create(kind, CodecOptions{compressionLevel}); | ||
} | ||
|
||
bool Codec::isAvailable(CompressionKind kind) { | ||
switch (kind) { | ||
case CompressionKind::CompressionKind_NONE: | ||
return true; | ||
#ifdef VELOX_ENABLE_COMPRESSION_LZ4 | ||
case CompressionKind::CompressionKind_LZ4: | ||
return true; | ||
#endif | ||
default: | ||
return false; | ||
} | ||
} | ||
|
||
std::optional<uint64_t> Codec::getUncompressedLength( | ||
const uint8_t* input, | ||
uint64_t inputLength) const { | ||
return std::nullopt; | ||
} | ||
|
||
Expected<uint64_t> Codec::compressFixedLength( | ||
const uint8_t* input, | ||
uint64_t inputLength, | ||
uint8_t* output, | ||
uint64_t outputLength) { | ||
return folly::makeUnexpected( | ||
Status::Invalid("'{}' doesn't support fixed-length compression", name())); | ||
} | ||
|
||
Expected<std::shared_ptr<StreamingCompressor>> | ||
Codec::makeStreamingCompressor() { | ||
return folly::makeUnexpected(Status::Invalid( | ||
"Streaming compression is unsupported with {} format.", name())); | ||
} | ||
|
||
Expected<std::shared_ptr<StreamingDecompressor>> | ||
Codec::makeStreamingDecompressor() { | ||
return folly::makeUnexpected(Status::Invalid( | ||
"Streaming decompression is unsupported with {} format.", name())); | ||
} | ||
|
||
int32_t Codec::compressionLevel() const { | ||
return kUseDefaultCompressionLevel; | ||
} | ||
|
||
std::string Codec::name() const { | ||
return compressionKindToString(compressionKind()); | ||
} | ||
} // namespace facebook::velox::common |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We probably do not need this option.