Skip to content

Commit

Permalink
Add plain test
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Sep 13, 2024
1 parent 4a67523 commit 49e7a0b
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 96 deletions.
2 changes: 1 addition & 1 deletion velox/dwio/common/DirectDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class DirectDecoder : public IntDecoder<isSigned> {
} else if constexpr (std::is_same_v<
typename Visitor::DataType,
int128_t>) {
if (super::numBytes != 12) {
if (super::numBytes_ != 12) {
toSkip = visitor.process(super::template readInt<int128_t>(), atEnd);
} else {
// Reads INT96 timestamp as int128_t type and extracts the days and
Expand Down
5 changes: 2 additions & 3 deletions velox/dwio/common/IntDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,12 +441,11 @@ inline T IntDecoder<isSigned>::readInt() {
return readLittleEndianFromBigEndian<T>();
} else {
if constexpr (std::is_same_v<T, int128_t>) {
if (numBytes == 12) {
if (numBytes_ == 12) {
VELOX_DCHECK(!useVInts, "Int96 should not be VInt encoded.");
return readInt96();
} else {
VELOX_NYI();
}
VELOX_NYI();
}
return readLongLE();
}
Expand Down
2 changes: 1 addition & 1 deletion velox/dwio/parquet/reader/TimestampColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class TimestampColumnReader : public IntegerColumnReader {
const RowSet& rows,
const uint64_t* /*incomingNulls*/) override {
auto& data = formatData_->as<ParquetData>();
// Use int128_t as a workaroud. Timestamp in Velox is of 16-byte length.
// Use int128_t as a workaround. Timestamp in Velox is of 16-byte length.
prepareRead<int128_t>(offset, rows, nullptr);
readCommon<IntegerColumnReader, true>(rows);
readOffset_ += rows.back() + 1;
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
162 changes: 71 additions & 91 deletions velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,72 @@ class ParquetTableScanTest : public HiveConnectorTestBase {
writer->close();
}

void testInt96TimestampRead(const std::string& fileName) {
// Timestamp-int96.parquet holds one column (t: TIMESTAMP) and
// 10 rows in one row group. Data is in SNAPPY compressed format.
// The values are:
// |t |
// +-------------------+
// |2015-06-01 19:34:56|
// |2015-06-02 19:34:56|
// |2001-02-03 03:34:06|
// |1998-03-01 08:01:06|
// |2022-12-23 03:56:01|
// |1980-01-24 00:23:07|
// |1999-12-08 13:39:26|
// |2023-04-21 09:09:34|
// |2000-09-12 22:36:29|
// |2007-12-12 04:27:56|
// +-------------------+
auto vector = makeFlatVector<Timestamp>(
{Timestamp(1433187296, 0),
Timestamp(1433273696, 0),
Timestamp(981171246, 0),
Timestamp(888739266, 0),
Timestamp(1671767761, 0),
Timestamp(317521387, 0),
Timestamp(944660366, 0),
Timestamp(1682068174, 0),
Timestamp(968798189, 0),
Timestamp(1197433676, 0)});

loadData(
getExampleFilePath(fileName),
ROW({"t"}, {TIMESTAMP()}),
makeRowVector(
{"t"},
{
vector,
}));

assertSelectWithFilter({"t"}, {}, "", "SELECT t from tmp");
assertSelectWithFilter(
{"t"},
{},
"t < TIMESTAMP '2000-09-12 22:36:29'",
"SELECT t from tmp where t < TIMESTAMP '2000-09-12 22:36:29'");
assertSelectWithFilter(
{"t"},
{},
"t <= TIMESTAMP '2000-09-12 22:36:29'",
"SELECT t from tmp where t <= TIMESTAMP '2000-09-12 22:36:29'");
assertSelectWithFilter(
{"t"},
{},
"t > TIMESTAMP '1980-01-24 00:23:07'",
"SELECT t from tmp where t > TIMESTAMP '1980-01-24 00:23:07'");
assertSelectWithFilter(
{"t"},
{},
"t >= TIMESTAMP '1980-01-24 00:23:07'",
"SELECT t from tmp where t >= TIMESTAMP '1980-01-24 00:23:07'");
assertSelectWithFilter(
{"t"},
{},
"t == TIMESTAMP '2022-12-23 03:56:01'",
"SELECT t from tmp where t == TIMESTAMP '2022-12-23 03:56:01'");
}

private:
RowTypePtr getRowType(std::vector<std::string>&& outputColumnNames) const {
std::vector<TypePtr> types;
Expand Down Expand Up @@ -719,70 +785,12 @@ TEST_F(ParquetTableScanTest, sessionTimezone) {
assertSelectWithTimezone({"a"}, "SELECT a FROM tmp", "Asia/Shanghai");
}

TEST_F(ParquetTableScanTest, timestampFilter) {
// Timestamp-int96.parquet holds one column (t: TIMESTAMP) and
// 10 rows in one row group. Data is in SNAPPY compressed format.
// The values are:
// |t |
// +-------------------+
// |2015-06-01 19:34:56|
// |2015-06-02 19:34:56|
// |2001-02-03 03:34:06|
// |1998-03-01 08:01:06|
// |2022-12-23 03:56:01|
// |1980-01-24 00:23:07|
// |1999-12-08 13:39:26|
// |2023-04-21 09:09:34|
// |2000-09-12 22:36:29|
// |2007-12-12 04:27:56|
// +-------------------+
auto vector = makeFlatVector<Timestamp>(
{Timestamp(1433187296, 0),
Timestamp(1433273696, 0),
Timestamp(981171246, 0),
Timestamp(888739266, 0),
Timestamp(1671767761, 0),
Timestamp(317521387, 0),
Timestamp(944660366, 0),
Timestamp(1682068174, 0),
Timestamp(968798189, 0),
Timestamp(1197433676, 0)});

loadData(
getExampleFilePath("timestamp_int96.parquet"),
ROW({"t"}, {TIMESTAMP()}),
makeRowVector(
{"t"},
{
vector,
}));
TEST_F(ParquetTableScanTest, timestampInt96Dictionary) {
testInt96TimestampRead("timestamp_int96_dictionary.parquet");
}

assertSelectWithFilter({"t"}, {}, "", "SELECT t from tmp");
assertSelectWithFilter(
{"t"},
{},
"t < TIMESTAMP '2000-09-12 22:36:29'",
"SELECT t from tmp where t < TIMESTAMP '2000-09-12 22:36:29'");
assertSelectWithFilter(
{"t"},
{},
"t <= TIMESTAMP '2000-09-12 22:36:29'",
"SELECT t from tmp where t <= TIMESTAMP '2000-09-12 22:36:29'");
assertSelectWithFilter(
{"t"},
{},
"t > TIMESTAMP '1980-01-24 00:23:07'",
"SELECT t from tmp where t > TIMESTAMP '1980-01-24 00:23:07'");
assertSelectWithFilter(
{"t"},
{},
"t >= TIMESTAMP '1980-01-24 00:23:07'",
"SELECT t from tmp where t >= TIMESTAMP '1980-01-24 00:23:07'");
assertSelectWithFilter(
{"t"},
{},
"t == TIMESTAMP '2022-12-23 03:56:01'",
"SELECT t from tmp where t == TIMESTAMP '2022-12-23 03:56:01'");
TEST_F(ParquetTableScanTest, timestampInt96Plain) {
testInt96TimestampRead("timestamp_int96_plain.parquet");
}

TEST_F(ParquetTableScanTest, timestampPrecisionMicrosecond) {
Expand Down Expand Up @@ -836,34 +844,6 @@ TEST_F(ParquetTableScanTest, timestampPrecisionMicrosecond) {
assertEqualResults({expected}, result.second);
}


TEST_F(ParquetTableScanTest, timestampINT96) {
auto a = makeFlatVector<Timestamp>({Timestamp(1, 0), Timestamp(2, 0)});
auto expected = makeRowVector({"time"}, {a});
createDuckDbTable("expected", {expected});

auto vector = makeArrayVector<Timestamp>({{}});
loadData(
getExampleFilePath("timestamp_dict_int96.parquet"),
ROW({"time"}, {TIMESTAMP()}),
makeRowVector(
{"time"},
{
vector,
}));
assertSelect({"time"}, "SELECT time from expected");

loadData(
getExampleFilePath("timestamp_plain_int96.parquet"),
ROW({"time"}, {TIMESTAMP()}),
makeRowVector(
{"time"},
{
vector,
}));
assertSelect({"time"}, "SELECT time from expected");
}

int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
folly::Init init{&argc, &argv, false};
Expand Down

0 comments on commit 49e7a0b

Please sign in to comment.