From f15e096ea784836f8433b9e851925b9a22ab685c Mon Sep 17 00:00:00 2001 From: youxiduo Date: Tue, 9 Jan 2024 20:41:08 -0800 Subject: [PATCH] Use __HIVE_DEFAULT_PARTITION__ for null partition keys (#8291) Summary: Use` __HIVE_DEFAULT_PARTITION__` for NULL partition keys of all data types. Before this change, the default value was used only for null partition keys of type VARCHAR. This happened by accident as these produces empty partition key which was converted to `__HIVE_DEFAULT_PARTITION__`. Pull Request resolved: https://github.com/facebookincubator/velox/pull/8291 Reviewed By: gggrace14 Differential Revision: D52645855 Pulled By: mbasmanova fbshipit-source-id: d9aaeeb386fe3bf6c89ed2cb146e39e6c349ae6c --- velox/connectors/hive/HivePartitionUtil.cpp | 36 ++++++++++--------- .../hive/tests/HivePartitionUtilTest.cpp | 30 ++++++++++++++++ 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/velox/connectors/hive/HivePartitionUtil.cpp b/velox/connectors/hive/HivePartitionUtil.cpp index 3adf9d38a82b..cbc53c79b5ea 100644 --- a/velox/connectors/hive/HivePartitionUtil.cpp +++ b/velox/connectors/hive/HivePartitionUtil.cpp @@ -51,13 +51,23 @@ template std::pair makePartitionKeyValueString( const BaseVector* partitionVector, vector_size_t row, - const std::string& name) { + const std::string& name, + bool isDate) { using T = typename TypeTraits::NativeType; + if (partitionVector->as>()->isNullAt(row)) { + return std::make_pair(name, ""); + } + if (isDate) { + return std::make_pair( + name, + DATE()->toString( + partitionVector->as>()->valueAt(row))); + } return std::make_pair( name, makePartitionValueString( partitionVector->as>()->valueAt(row))); -}; +} } // namespace @@ -66,21 +76,13 @@ std::vector> extractPartitionKeyValues( vector_size_t row) { std::vector> partitionKeyValues; for (auto i = 0; i < partitionsVector->childrenSize(); i++) { - if (partitionsVector->childAt(i)->type()->isDate()) { - auto partitionVector = partitionsVector->childAt(i)->loadedVector(); - auto partitionName = asRowType(partitionsVector->type())->nameOf(i); - partitionKeyValues.push_back( - {partitionName, - DATE()->toString( - partitionVector->as>()->valueAt(row))}); - } else { - partitionKeyValues.push_back(PARTITION_TYPE_DISPATCH( - makePartitionKeyValueString, - partitionsVector->childAt(i)->typeKind(), - partitionsVector->childAt(i)->loadedVector(), - row, - asRowType(partitionsVector->type())->nameOf(i))); - } + partitionKeyValues.push_back(PARTITION_TYPE_DISPATCH( + makePartitionKeyValueString, + partitionsVector->childAt(i)->typeKind(), + partitionsVector->childAt(i)->loadedVector(), + row, + asRowType(partitionsVector->type())->nameOf(i), + partitionsVector->childAt(i)->type()->isDate())); } return partitionKeyValues; } diff --git a/velox/connectors/hive/tests/HivePartitionUtilTest.cpp b/velox/connectors/hive/tests/HivePartitionUtilTest.cpp index 7bd16286dec5..43945416eb41 100644 --- a/velox/connectors/hive/tests/HivePartitionUtilTest.cpp +++ b/velox/connectors/hive/tests/HivePartitionUtilTest.cpp @@ -117,3 +117,33 @@ TEST_F(HivePartitionUtilTest, partitionName) { "Unsupported partition type: MAP"); } } + +TEST_F(HivePartitionUtilTest, partitionNameForNull) { + std::vector partitionColumnNames{ + "flat_bool_col", + "flat_tinyint_col", + "flat_smallint_col", + "flat_int_col", + "flat_bigint_col", + "flat_string_col", + "const_date_col"}; + + RowVectorPtr input = makeRowVector( + partitionColumnNames, + {makeNullableFlatVector({std::nullopt}), + makeNullableFlatVector({std::nullopt}), + makeNullableFlatVector({std::nullopt}), + makeNullableFlatVector({std::nullopt}), + makeNullableFlatVector({std::nullopt}), + makeNullableFlatVector({std::nullopt}), + makeConstant(std::nullopt, 1, DATE())}); + + for (auto i = 0; i < partitionColumnNames.size(); i++) { + std::vector partitionChannels = {(column_index_t)i}; + auto partitionEntries = extractPartitionKeyValues( + makePartitionsVector(input, partitionChannels), 0); + EXPECT_EQ(1, partitionEntries.size()); + EXPECT_EQ(partitionColumnNames[i], partitionEntries[0].first); + EXPECT_EQ("", partitionEntries[0].second); + } +}