Skip to content

Commit

Permalink
fix: Support timestamp type partition write
Browse files Browse the repository at this point in the history
  • Loading branch information
wypb committed Dec 9, 2024
1 parent 4dd0e60 commit 90f78bc
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 0 deletions.
24 changes: 24 additions & 0 deletions velox/connectors/hive/tests/PartitionIdGeneratorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,30 @@ TEST_F(PartitionIdGeneratorTest, multipleBoolKeys) {
numPartitions - 1);
}

TEST_F(PartitionIdGeneratorTest, singleTimestampKeys) {
auto numPartitions = 100;

PartitionIdGenerator idGenerator(ROW({TIMESTAMP()}), {0}, 100, pool(), true);

auto input = makeRowVector({makeFlatVector<Timestamp>(
numPartitions,
[](vector_size_t row) {
return Timestamp(
row, (row % 1000) * Timestamp::kNanosecondsInMillisecond);
},
nullEvery(10))});

raw_vector<uint64_t> ids;
idGenerator.run(input, ids);

std::unordered_set<uint64_t> distinctIds(ids.begin(), ids.end());
EXPECT_EQ(distinctIds.size(), 91);
EXPECT_EQ(*std::min_element(distinctIds.begin(), distinctIds.end()), 0);
EXPECT_EQ(
*std::max_element(distinctIds.begin(), distinctIds.end()),
90);
}

TEST_F(PartitionIdGeneratorTest, stableIdsSingleKey) {
PartitionIdGenerator idGenerator(ROW({BIGINT()}), {0}, 100, pool(), true);

Expand Down
9 changes: 9 additions & 0 deletions velox/exec/VectorHasher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ namespace facebook::velox::exec {
case TypeKind::BIGINT: { \
return TEMPLATE_FUNC<TypeKind::BIGINT>(__VA_ARGS__); \
} \
case TypeKind::TIMESTAMP: { \
return TEMPLATE_FUNC<TypeKind::TIMESTAMP>(__VA_ARGS__); \
} \
case TypeKind::VARCHAR: \
case TypeKind::VARBINARY: { \
return TEMPLATE_FUNC<TypeKind::VARCHAR>(__VA_ARGS__); \
Expand Down Expand Up @@ -598,6 +601,11 @@ void VectorHasher::analyze(
analyzeTyped, typeKind_, groups, numGroups, offset, nullByte, nullMask);
}

template <>
void VectorHasher::analyzeValue(Timestamp value) {
analyzeValue(value.toNanos());
}

template <>
void VectorHasher::analyzeValue(StringView value) {
int size = value.size();
Expand Down Expand Up @@ -736,6 +744,7 @@ void extendRange(
case TypeKind::BIGINT:
case TypeKind::VARCHAR:
case TypeKind::VARBINARY:
case TypeKind::TIMESTAMP:
extendRange<int64_t>(reserve, min, max);
break;

Expand Down
16 changes: 16 additions & 0 deletions velox/exec/VectorHasher.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class UniqueValue {
data_ = value;
}

explicit UniqueValue(Timestamp value) : UniqueValue(value.toNanos()) {}

explicit UniqueValue(const char* value, uint32_t size) {
size_ = size;
data_ = 0;
Expand Down Expand Up @@ -293,6 +295,7 @@ class VectorHasher {
case TypeKind::BIGINT:
case TypeKind::VARCHAR:
case TypeKind::VARBINARY:
case TypeKind::TIMESTAMP:
return true;
default:
return false;
Expand Down Expand Up @@ -593,6 +596,14 @@ bool VectorHasher::makeValueIdsForRows<TypeKind::VARCHAR>(
uint8_t nullMask,
uint64_t* result);

template <>
inline int64_t VectorHasher::toInt64(Timestamp value) const {
return value.toNanos();
}

template <>
void VectorHasher::analyzeValue(Timestamp value);

template <>
void VectorHasher::analyzeValue(StringView value);

Expand All @@ -604,6 +615,11 @@ inline bool VectorHasher::tryMapToRange(
return false;
}

template <>
inline uint64_t VectorHasher::valueId(Timestamp value) {
return valueId(value.toNanos());
}

template <>
inline uint64_t VectorHasher::valueId(StringView value) {
auto size = value.size();
Expand Down

0 comments on commit 90f78bc

Please sign in to comment.