Skip to content

Commit

Permalink
refactor(parquet): Use velox parquet reader in FileSerializeTest
Browse files Browse the repository at this point in the history
  • Loading branch information
jkhaliqi committed Dec 17, 2024
1 parent f1622ab commit 4abc922
Show file tree
Hide file tree
Showing 7 changed files with 274 additions and 94 deletions.
5 changes: 5 additions & 0 deletions velox/dwio/parquet/reader/Metadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ bool ColumnChunkMetaDataPtr::hasDictionaryPageOffset() const {
thriftColumnChunkPtr(ptr_)->meta_data.__isset.dictionary_page_offset;
}

bool ColumnChunkMetaDataPtr::hasIndexPage() const {
return hasMetadata() &&
thriftColumnChunkPtr(ptr_)->meta_data.__isset.index_page_offset;
}

std::unique_ptr<dwio::common::ColumnStatistics>
ColumnChunkMetaDataPtr::getColumnStatistics(
const TypePtr type,
Expand Down
2 changes: 2 additions & 0 deletions velox/dwio/parquet/reader/Metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class ColumnChunkMetaDataPtr {
/// Check the presence of the dictionary page offset in ColumnChunk metadata.
bool hasDictionaryPageOffset() const;

bool hasIndexPage() const;

/// Return the ColumnChunk statistics.
std::unique_ptr<dwio::common::ColumnStatistics> getColumnStatistics(
const TypePtr type,
Expand Down
6 changes: 6 additions & 0 deletions velox/dwio/parquet/reader/ParquetReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <thrift/protocol/TCompactProtocol.h> //@manual

#include <arrow/io/api.h>
#include "velox/dwio/parquet/reader/ParquetColumnReader.h"
#include "velox/dwio/parquet/reader/StructColumnReader.h"
#include "velox/dwio/parquet/thrift/ThriftTransport.h"
Expand Down Expand Up @@ -843,10 +844,12 @@ std::shared_ptr<const RowType> ReaderBase::createRowType(
std::vector<TypePtr> childTypes;
for (auto& child : children) {
auto childName = static_cast<const ParquetTypeWithId&>(*child).name_;
auto childt = static_cast<const ParquetTypeWithId&>(*child).parquetType_;
if (fileColumnNamesReadAsLowerCase) {
folly::toLowerAscii(childName);
}
childNames.push_back(std::move(childName));
// childTypes.push_back(childt);
childTypes.push_back(child->type());
}
return TypeFactory<TypeKind::ROW>::create(
Expand Down Expand Up @@ -1156,4 +1159,7 @@ FileMetaDataPtr ParquetReader::fileMetaData() const {
return readerBase_->fileMetaData();
}

const thrift::FileMetaData& ParquetReader::thriftFileMetaData() const {
return readerBase_->thriftFileMetaData();
}
} // namespace facebook::velox::parquet
7 changes: 7 additions & 0 deletions velox/dwio/parquet/reader/ParquetReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#pragma once

#include <arrow/io/api.h>
#include "velox/dwio/common/Reader.h"
#include "velox/dwio/common/ReaderFactory.h"
#include "velox/dwio/parquet/reader/Metadata.h"
Expand Down Expand Up @@ -106,6 +107,8 @@ class ParquetReader : public dwio::common::Reader {

FileMetaDataPtr fileMetaData() const;

const thrift::FileMetaData& thriftFileMetaData() const;

private:
std::shared_ptr<ReaderBase> readerBase_;
};
Expand All @@ -125,4 +128,8 @@ void registerParquetReaderFactory();

void unregisterParquetReaderFactory();

struct ColumnInfo {
int64_t col_start;
int64_t col_length;
};
} // namespace facebook::velox::parquet
18 changes: 18 additions & 0 deletions velox/dwio/parquet/tests/writer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,21 @@ target_link_libraries(
${TEST_LINK_LIBS}
GTest::gtest
fmt::fmt)

add_executable(velox_dwio_parquet_writer_test FileSerializeTest.cpp)

add_test(
NAME velox_dwio_parquet_writer_test
COMMAND velox_dwio_parquet_writer_test
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

target_link_libraries(
velox_dwio_parquet_writer_test
velox_dwio_arrow_parquet_writer_test_lib
GTest::gmock
GTest::gtest
GTest::gtest_main
arrow
arrow_testing
velox_dwio_native_parquet_reader
velox_temp_path)
Loading

0 comments on commit 4abc922

Please sign in to comment.