From 5b10d0953b8616fb4ef938040cd8121b5184442a Mon Sep 17 00:00:00 2001 From: rui-mo Date: Mon, 26 Aug 2024 16:37:49 +0800 Subject: [PATCH] Add plain test --- velox/dwio/common/IntDecoder.h | 3 +- .../examples/timestamp_dict_int96.parquet | Bin 467 -> 0 bytes ...uet => timestamp_int96_dictionary.parquet} | Bin .../examples/timestamp_int96_plain.parquet | Bin 0 -> 518 bytes .../examples/timestamp_plain_int96.parquet | Bin 429 -> 0 bytes .../tests/reader/ParquetTableScanTest.cpp | 162 ++++++++---------- 6 files changed, 72 insertions(+), 93 deletions(-) delete mode 100644 velox/dwio/parquet/tests/examples/timestamp_dict_int96.parquet rename velox/dwio/parquet/tests/examples/{timestamp_int96.parquet => timestamp_int96_dictionary.parquet} (100%) create mode 100644 velox/dwio/parquet/tests/examples/timestamp_int96_plain.parquet delete mode 100644 velox/dwio/parquet/tests/examples/timestamp_plain_int96.parquet diff --git a/velox/dwio/common/IntDecoder.h b/velox/dwio/common/IntDecoder.h index 8bbc0751b3169..dee7849aa2d91 100644 --- a/velox/dwio/common/IntDecoder.h +++ b/velox/dwio/common/IntDecoder.h @@ -444,9 +444,8 @@ inline T IntDecoder::readInt() { if (numBytes == 12) { VELOX_DCHECK(!useVInts, "Int96 should not be VInt encoded."); return readInt96(); - } else { - VELOX_NYI(); } + VELOX_NYI(); } return readLongLE(); } diff --git a/velox/dwio/parquet/tests/examples/timestamp_dict_int96.parquet b/velox/dwio/parquet/tests/examples/timestamp_dict_int96.parquet deleted file mode 100644 index 661cb7a285227f8ead6c89fc829fc7cc99cbf37c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 467 zcmZXRze~eF6vtnFSSmP4Uq~QBIB2m@kEUsAsX7P_;#6E!?jTn zx`=;_Aa3Gc;_RiilQ+HZd!P5d@9w$>Cobw($0mN>y}oFBQs@BY8N5I3f;{*8Wq=2N zx{48S6Ak?M{`%4E9L53`0oEN=5}KxHs%0A-4-F|%wQ5Hj>4+uQ7in-MVumVUp{$yE z){g}kx@B8-h1*!L{DNMz{_?0Ay78zO&q{??igpXYsx6#kGsr+X8E*=fAcyoyQG%tM zG0kCeN!f@=E4px{HzG;|G8VFonddW--Hrt5 zkTgpsK}Kq%*B4QklFsRj3|O3(@@vS#63kVav3Mj?gGm%I9!W@ciyB$YC7s!Pem)2D zc@2&ofjF38VI8_VZPEr0t>z%rL-As?N||gXc5ydJcPAAI z>0J=ddJzQa&3Y6(dC~ttZ{k7mqJp4@3VxAz@-jT$_vXDfKQ`B|h7{-*t6z~`ijisv8KP6519h`(%? zKUe^#?<4L%`+C6u_;eYu_4E460APIw@q>q3zsmrRj5BRAct5uR(4N}-Y8^c}0q~-z zg6BIXJs=LoQ{n}etYRqhJKIIPqcTYe_Nx-t(BRmf@A);;rzLM0N~VV-C64VJ66b)_ z%=2|XFFX(003b`9>FU@u6X!M#1J@nox49fj^yYtK_@2s(WYBl#1Q$F6ci}%pv72&i zv^!Fo=YzXEQ(AVVmhO!))AxpoHCdtaQLI_OdWlNA1#51NSzl(;(UhI3bdsq;%WQ~L zf0U-OlcEB*s(}47WzF$qa%%$a#w_w&mRDvSD3!2>wex^nm5e{bG<{285JF$8Fc&d}S}`{xcux`It)0PIN(02&SWcs)@AJfF4! zUi`)1Y-7_vxv3!1Z1-ZfDtP&LQ$_b8<498`ZOzhUk_j+%8{4>JVAQnzCK&eb9%-g- zKA3sO7zO|~R>8^&z?`ENC_?WTX5LbaYgyCsj`D&lbNZ7*=c25VX>M(M)T0cn?mtB} zOR1_ftA(WHbU`yAdCVoZ#!Dif3PA>>l4Tu9;*v=s(zqgn>m|u~wx-s_6|oVnQ6+g+ zs8wF4DGyU6Qm=(e{(Q+`xmt}^U|nuf53close(); } + void testInt96TimestampRead(const std::string& fileName) { + // Timestamp-int96.parquet holds one column (t: TIMESTAMP) and + // 10 rows in one row group. Data is in SNAPPY compressed format. + // The values are: + // |t | + // +-------------------+ + // |2015-06-01 19:34:56| + // |2015-06-02 19:34:56| + // |2001-02-03 03:34:06| + // |1998-03-01 08:01:06| + // |2022-12-23 03:56:01| + // |1980-01-24 00:23:07| + // |1999-12-08 13:39:26| + // |2023-04-21 09:09:34| + // |2000-09-12 22:36:29| + // |2007-12-12 04:27:56| + // +-------------------+ + auto vector = makeFlatVector( + {Timestamp(1433187296, 0), + Timestamp(1433273696, 0), + Timestamp(981171246, 0), + Timestamp(888739266, 0), + Timestamp(1671767761, 0), + Timestamp(317521387, 0), + Timestamp(944660366, 0), + Timestamp(1682068174, 0), + Timestamp(968798189, 0), + Timestamp(1197433676, 0)}); + + loadData( + getExampleFilePath(fileName), + ROW({"t"}, {TIMESTAMP()}), + makeRowVector( + {"t"}, + { + vector, + })); + + assertSelectWithFilter({"t"}, {}, "", "SELECT t from tmp"); + assertSelectWithFilter( + {"t"}, + {}, + "t < TIMESTAMP '2000-09-12 22:36:29'", + "SELECT t from tmp where t < TIMESTAMP '2000-09-12 22:36:29'"); + assertSelectWithFilter( + {"t"}, + {}, + "t <= TIMESTAMP '2000-09-12 22:36:29'", + "SELECT t from tmp where t <= TIMESTAMP '2000-09-12 22:36:29'"); + assertSelectWithFilter( + {"t"}, + {}, + "t > TIMESTAMP '1980-01-24 00:23:07'", + "SELECT t from tmp where t > TIMESTAMP '1980-01-24 00:23:07'"); + assertSelectWithFilter( + {"t"}, + {}, + "t >= TIMESTAMP '1980-01-24 00:23:07'", + "SELECT t from tmp where t >= TIMESTAMP '1980-01-24 00:23:07'"); + assertSelectWithFilter( + {"t"}, + {}, + "t == TIMESTAMP '2022-12-23 03:56:01'", + "SELECT t from tmp where t == TIMESTAMP '2022-12-23 03:56:01'"); + } + private: RowTypePtr getRowType(std::vector&& outputColumnNames) const { std::vector types; @@ -719,70 +785,12 @@ TEST_F(ParquetTableScanTest, sessionTimezone) { assertSelectWithTimezone({"a"}, "SELECT a FROM tmp", "Asia/Shanghai"); } -TEST_F(ParquetTableScanTest, timestampFilter) { - // Timestamp-int96.parquet holds one column (t: TIMESTAMP) and - // 10 rows in one row group. Data is in SNAPPY compressed format. - // The values are: - // |t | - // +-------------------+ - // |2015-06-01 19:34:56| - // |2015-06-02 19:34:56| - // |2001-02-03 03:34:06| - // |1998-03-01 08:01:06| - // |2022-12-23 03:56:01| - // |1980-01-24 00:23:07| - // |1999-12-08 13:39:26| - // |2023-04-21 09:09:34| - // |2000-09-12 22:36:29| - // |2007-12-12 04:27:56| - // +-------------------+ - auto vector = makeFlatVector( - {Timestamp(1433187296, 0), - Timestamp(1433273696, 0), - Timestamp(981171246, 0), - Timestamp(888739266, 0), - Timestamp(1671767761, 0), - Timestamp(317521387, 0), - Timestamp(944660366, 0), - Timestamp(1682068174, 0), - Timestamp(968798189, 0), - Timestamp(1197433676, 0)}); - - loadData( - getExampleFilePath("timestamp_int96.parquet"), - ROW({"t"}, {TIMESTAMP()}), - makeRowVector( - {"t"}, - { - vector, - })); +TEST_F(ParquetTableScanTest, timestampInt96Dictionary) { + testInt96TimestampRead("timestamp_int96_dictionary.parquet"); +} - assertSelectWithFilter({"t"}, {}, "", "SELECT t from tmp"); - assertSelectWithFilter( - {"t"}, - {}, - "t < TIMESTAMP '2000-09-12 22:36:29'", - "SELECT t from tmp where t < TIMESTAMP '2000-09-12 22:36:29'"); - assertSelectWithFilter( - {"t"}, - {}, - "t <= TIMESTAMP '2000-09-12 22:36:29'", - "SELECT t from tmp where t <= TIMESTAMP '2000-09-12 22:36:29'"); - assertSelectWithFilter( - {"t"}, - {}, - "t > TIMESTAMP '1980-01-24 00:23:07'", - "SELECT t from tmp where t > TIMESTAMP '1980-01-24 00:23:07'"); - assertSelectWithFilter( - {"t"}, - {}, - "t >= TIMESTAMP '1980-01-24 00:23:07'", - "SELECT t from tmp where t >= TIMESTAMP '1980-01-24 00:23:07'"); - assertSelectWithFilter( - {"t"}, - {}, - "t == TIMESTAMP '2022-12-23 03:56:01'", - "SELECT t from tmp where t == TIMESTAMP '2022-12-23 03:56:01'"); +TEST_F(ParquetTableScanTest, timestampInt96Plain) { + testInt96TimestampRead("timestamp_int96_plain.parquet"); } TEST_F(ParquetTableScanTest, timestampPrecisionMicrosecond) { @@ -836,34 +844,6 @@ TEST_F(ParquetTableScanTest, timestampPrecisionMicrosecond) { assertEqualResults({expected}, result.second); } - -TEST_F(ParquetTableScanTest, timestampINT96) { - auto a = makeFlatVector({Timestamp(1, 0), Timestamp(2, 0)}); - auto expected = makeRowVector({"time"}, {a}); - createDuckDbTable("expected", {expected}); - - auto vector = makeArrayVector({{}}); - loadData( - getExampleFilePath("timestamp_dict_int96.parquet"), - ROW({"time"}, {TIMESTAMP()}), - makeRowVector( - {"time"}, - { - vector, - })); - assertSelect({"time"}, "SELECT time from expected"); - - loadData( - getExampleFilePath("timestamp_plain_int96.parquet"), - ROW({"time"}, {TIMESTAMP()}), - makeRowVector( - {"time"}, - { - vector, - })); - assertSelect({"time"}, "SELECT time from expected"); -} - int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); folly::Init init{&argc, &argv, false};