Skip to content

Commit

Permalink
Use __HIVE_DEFAULT_PARTITION__ for null partition keys (#8291)
Browse files Browse the repository at this point in the history
Summary:
Use` __HIVE_DEFAULT_PARTITION__` for NULL partition keys of all data types.

Before this change, the default value was used only for null partition keys of
type VARCHAR. This happened by accident as these produces empty partition key
which was converted to `__HIVE_DEFAULT_PARTITION__`.

Pull Request resolved: #8291

Reviewed By: gggrace14

Differential Revision: D52645855

Pulled By: mbasmanova

fbshipit-source-id: d9aaeeb386fe3bf6c89ed2cb146e39e6c349ae6c
  • Loading branch information
ulysses-you authored and facebook-github-bot committed Jan 10, 2024
1 parent 62caa6a commit f15e096
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 17 deletions.
36 changes: 19 additions & 17 deletions velox/connectors/hive/HivePartitionUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,23 @@ template <TypeKind Kind>
std::pair<std::string, std::string> makePartitionKeyValueString(
const BaseVector* partitionVector,
vector_size_t row,
const std::string& name) {
const std::string& name,
bool isDate) {
using T = typename TypeTraits<Kind>::NativeType;
if (partitionVector->as<SimpleVector<T>>()->isNullAt(row)) {
return std::make_pair(name, "");
}
if (isDate) {
return std::make_pair(
name,
DATE()->toString(
partitionVector->as<SimpleVector<int32_t>>()->valueAt(row)));
}
return std::make_pair(
name,
makePartitionValueString(
partitionVector->as<SimpleVector<T>>()->valueAt(row)));
};
}

} // namespace

Expand All @@ -66,21 +76,13 @@ std::vector<std::pair<std::string, std::string>> extractPartitionKeyValues(
vector_size_t row) {
std::vector<std::pair<std::string, std::string>> partitionKeyValues;
for (auto i = 0; i < partitionsVector->childrenSize(); i++) {
if (partitionsVector->childAt(i)->type()->isDate()) {
auto partitionVector = partitionsVector->childAt(i)->loadedVector();
auto partitionName = asRowType(partitionsVector->type())->nameOf(i);
partitionKeyValues.push_back(
{partitionName,
DATE()->toString(
partitionVector->as<SimpleVector<int32_t>>()->valueAt(row))});
} else {
partitionKeyValues.push_back(PARTITION_TYPE_DISPATCH(
makePartitionKeyValueString,
partitionsVector->childAt(i)->typeKind(),
partitionsVector->childAt(i)->loadedVector(),
row,
asRowType(partitionsVector->type())->nameOf(i)));
}
partitionKeyValues.push_back(PARTITION_TYPE_DISPATCH(
makePartitionKeyValueString,
partitionsVector->childAt(i)->typeKind(),
partitionsVector->childAt(i)->loadedVector(),
row,
asRowType(partitionsVector->type())->nameOf(i),
partitionsVector->childAt(i)->type()->isDate()));
}
return partitionKeyValues;
}
Expand Down
30 changes: 30 additions & 0 deletions velox/connectors/hive/tests/HivePartitionUtilTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,33 @@ TEST_F(HivePartitionUtilTest, partitionName) {
"Unsupported partition type: MAP");
}
}

TEST_F(HivePartitionUtilTest, partitionNameForNull) {
std::vector<std::string> partitionColumnNames{
"flat_bool_col",
"flat_tinyint_col",
"flat_smallint_col",
"flat_int_col",
"flat_bigint_col",
"flat_string_col",
"const_date_col"};

RowVectorPtr input = makeRowVector(
partitionColumnNames,
{makeNullableFlatVector<bool>({std::nullopt}),
makeNullableFlatVector<int8_t>({std::nullopt}),
makeNullableFlatVector<int16_t>({std::nullopt}),
makeNullableFlatVector<int32_t>({std::nullopt}),
makeNullableFlatVector<int64_t>({std::nullopt}),
makeNullableFlatVector<StringView>({std::nullopt}),
makeConstant<int32_t>(std::nullopt, 1, DATE())});

for (auto i = 0; i < partitionColumnNames.size(); i++) {
std::vector<column_index_t> partitionChannels = {(column_index_t)i};
auto partitionEntries = extractPartitionKeyValues(
makePartitionsVector(input, partitionChannels), 0);
EXPECT_EQ(1, partitionEntries.size());
EXPECT_EQ(partitionColumnNames[i], partitionEntries[0].first);
EXPECT_EQ("", partitionEntries[0].second);
}
}

0 comments on commit f15e096

Please sign in to comment.