Skip to content

Commit

Permalink
Move LZ4 codec from arrow to velox parquet writer (6909)
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Oct 17, 2023
1 parent 1f809ee commit 03dacf7
Show file tree
Hide file tree
Showing 6 changed files with 637 additions and 20 deletions.
2 changes: 0 additions & 2 deletions velox/dwio/parquet/writer/arrow/tests/ColumnWriterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,6 @@ TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStatsAndGzipCompression) {
Encoding::PLAIN, Compression::GZIP, false, true, LARGE_SIZE);
}

#ifdef ARROW_WITH_LZ4
TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithLz4Compression) {
this->TestRequiredWithSettings(
Encoding::PLAIN, Compression::LZ4, false, false, LARGE_SIZE);
Expand All @@ -623,7 +622,6 @@ TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStatsAndLz4Compression) {
this->TestRequiredWithSettings(
Encoding::PLAIN, Compression::LZ4, false, true, LARGE_SIZE);
}
#endif

TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithZstdCompression) {
this->TestRequiredWithSettings(
Expand Down
2 changes: 0 additions & 2 deletions velox/dwio/parquet/writer/arrow/tests/FileDeserializeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,8 @@ static std::vector<Compression::type> GetSupportedCodecTypes() {

codec_types.push_back(Compression::GZIP);

#ifdef ARROW_WITH_LZ4
codec_types.push_back(Compression::LZ4);
codec_types.push_back(Compression::LZ4_HADOOP);
#endif

codec_types.push_back(Compression::ZSTD);
return codec_types;
Expand Down
2 changes: 0 additions & 2 deletions velox/dwio/parquet/writer/arrow/tests/FileSerializeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,15 +381,13 @@ TYPED_TEST(TestSerialize, SmallFileGzip) {
ASSERT_NO_FATAL_FAILURE(this->FileSerializeTest(Compression::GZIP));
}

#ifdef ARROW_WITH_LZ4
TYPED_TEST(TestSerialize, SmallFileLz4) {
ASSERT_NO_FATAL_FAILURE(this->FileSerializeTest(Compression::LZ4));
}

TYPED_TEST(TestSerialize, SmallFileLz4Hadoop) {
ASSERT_NO_FATAL_FAILURE(this->FileSerializeTest(Compression::LZ4_HADOOP));
}
#endif

TYPED_TEST(TestSerialize, SmallFileZstd) {
ASSERT_NO_FATAL_FAILURE(this->FileSerializeTest(Compression::ZSTD));
Expand Down
19 changes: 15 additions & 4 deletions velox/dwio/parquet/writer/arrow/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,19 @@

add_library(
velox_dwio_arrow_parquet_writer_util_lib
Compression.cpp CompressionSnappy.cpp CompressionZstd.cpp CompressionZlib.cpp
Hashing.cpp Crc32.cpp)
Compression.cpp
CompressionSnappy.cpp
CompressionZstd.cpp
CompressionZlib.cpp
CompressionLZ4.cpp
Hashing.cpp
Crc32.cpp)

target_link_libraries(velox_dwio_arrow_parquet_writer_util_lib parquet arrow
Snappy::snappy zstd::zstd ZLIB::ZLIB)
target_link_libraries(
velox_dwio_arrow_parquet_writer_util_lib
parquet
arrow
Snappy::snappy
zstd::zstd
ZLIB::ZLIB
lz4::lz4)
10 changes: 0 additions & 10 deletions velox/dwio/parquet/writer/arrow/util/Compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,19 +190,13 @@ Result<std::unique_ptr<Codec>> Codec::Create(
break;
}
case Compression::LZ4:
#ifdef ARROW_WITH_LZ4
codec = internal::MakeLz4RawCodec(compression_level);
#endif
break;
case Compression::LZ4_FRAME:
#ifdef ARROW_WITH_LZ4
codec = internal::MakeLz4FrameCodec(compression_level);
#endif
break;
case Compression::LZ4_HADOOP:
#ifdef ARROW_WITH_LZ4
codec = internal::MakeLz4HadoopRawCodec();
#endif
break;
case Compression::ZSTD:
codec = internal::MakeZSTDCodec(compression_level);
Expand Down Expand Up @@ -248,11 +242,7 @@ bool Codec::IsAvailable(Compression::type codec_type) {
case Compression::LZ4:
case Compression::LZ4_FRAME:
case Compression::LZ4_HADOOP:
#ifdef ARROW_WITH_LZ4
return true;
#else
return false;
#endif
case Compression::ZSTD:
return true;
case Compression::BZ2:
Expand Down
Loading

0 comments on commit 03dacf7

Please sign in to comment.