From c461c901810f2d83a8d862e7639728cecb69145b Mon Sep 17 00:00:00 2001 From: rui-mo Date: Thu, 12 Dec 2024 17:33:07 +0800 Subject: [PATCH] Add test --- .../tests/reader/ParquetTableScanTest.cpp | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp b/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp index 20826137cba1..4f30405c9e14 100644 --- a/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp +++ b/velox/dwio/parquet/tests/reader/ParquetTableScanTest.cpp @@ -1123,6 +1123,77 @@ TEST_F(ParquetTableScanTest, schemaMatch) { assertEqualVectors(rows->childAt(2), nullVector); } +TEST_F(ParquetTableScanTest, structMatchByIndex) { + std::vector values = {2}; + const auto id = makeFlatVector(values); + const auto name = makeRowVector( + {"first", "last"}, + { + makeFlatVector({"Janet"}), + makeFlatVector({"Jones"}), + }); + const auto address = makeFlatVector({"567 Maple Drive"}); + const auto vector = + makeRowVector({"id", "name", "address"}, {id, name, address}); + + WriterOptions options; + const auto schema = asRowType(vector->type()); + const auto file = TempFilePath::create(); + writeToParquetFile(file->getPath(), {vector}, options); + + loadData(file->getPath(), schema, vector); + assertSelect({"id", "name", "address"}, "SELECT id, name, address from tmp"); + + // Add one nonexisting subfield 'middle' to the 'name' field. + auto rowType = + ROW({"id", "name", "address"}, + {BIGINT(), + ROW({"first", "middle", "last"}, {VARCHAR(), VARCHAR(), VARCHAR()}), + VARCHAR()}); + loadData(file->getPath(), rowType, vector); + assertSelectWithDataColumns( + {"id", "name", "address"}, + rowType, + "SELECT 2, ('Janet', 'Jones', null), '567 Maple Drive'"); + + // Rename subfields of the 'name' field. + rowType = + ROW({"id", "name", "address"}, + {BIGINT(), ROW({"a", "b"}, {VARCHAR(), VARCHAR()}), VARCHAR()}); + loadData(file->getPath(), rowType, vector); + assertSelectWithDataColumns( + {"id", "name", "address"}, + rowType, + "SELECT 2, ('Janet', 'Jones'), '567 Maple Drive'"); + + // Deletion of one subfield from the 'name' field. + rowType = + ROW({"id", "name", "address"}, + {BIGINT(), ROW({"full"}, {VARCHAR()}), VARCHAR()}); + auto op = PlanBuilder() + .startTableScan() + .outputType(rowType) + .dataColumns(rowType) + .endTableScan() + .planNode(); + auto split = makeSplit(file->getPath()); + EXPECT_THROW( + AssertQueryBuilder(op).split(split).copyResults(pool()), + VeloxRuntimeError); + + // No subfield in the 'name' field. + rowType = ROW({"id", "name", "address"}, {BIGINT(), ROW({}, {}), VARCHAR()}); + op = PlanBuilder() + .startTableScan() + .outputType(rowType) + .dataColumns(rowType) + .endTableScan() + .planNode(); + EXPECT_THROW( + AssertQueryBuilder(op).split(split).copyResults(pool()), + VeloxRuntimeError); +} + TEST_F(ParquetTableScanTest, deltaByteArray) { auto a = makeFlatVector({"axis", "axle", "babble", "babyhood"}); auto expected = makeRowVector({"a"}, {a});