diff --git a/velox/connectors/hive/tests/PartitionIdGeneratorTest.cpp b/velox/connectors/hive/tests/PartitionIdGeneratorTest.cpp index d544d071797f8..0b4e13d4a7104 100644 --- a/velox/connectors/hive/tests/PartitionIdGeneratorTest.cpp +++ b/velox/connectors/hive/tests/PartitionIdGeneratorTest.cpp @@ -107,6 +107,30 @@ TEST_F(PartitionIdGeneratorTest, multipleBoolKeys) { numPartitions - 1); } +TEST_F(PartitionIdGeneratorTest, singleTimestampKeys) { + auto numPartitions = 100; + + PartitionIdGenerator idGenerator(ROW({TIMESTAMP()}), {0}, 100, pool(), true); + + auto input = makeRowVector({makeFlatVector( + numPartitions, + [](vector_size_t row) { + return Timestamp( + row, (row % 1000) * Timestamp::kNanosecondsInMillisecond); + }, + nullEvery(10))}); + + raw_vector ids; + idGenerator.run(input, ids); + + std::unordered_set distinctIds(ids.begin(), ids.end()); + EXPECT_EQ(distinctIds.size(), 91); + EXPECT_EQ(*std::min_element(distinctIds.begin(), distinctIds.end()), 0); + EXPECT_EQ( + *std::max_element(distinctIds.begin(), distinctIds.end()), + 90); +} + TEST_F(PartitionIdGeneratorTest, stableIdsSingleKey) { PartitionIdGenerator idGenerator(ROW({BIGINT()}), {0}, 100, pool(), true); diff --git a/velox/exec/VectorHasher.cpp b/velox/exec/VectorHasher.cpp index a98e3d9b3d466..ad1075d3a274d 100644 --- a/velox/exec/VectorHasher.cpp +++ b/velox/exec/VectorHasher.cpp @@ -41,6 +41,9 @@ namespace facebook::velox::exec { case TypeKind::BIGINT: { \ return TEMPLATE_FUNC(__VA_ARGS__); \ } \ + case TypeKind::TIMESTAMP: { \ + return TEMPLATE_FUNC(__VA_ARGS__); \ + } \ case TypeKind::VARCHAR: \ case TypeKind::VARBINARY: { \ return TEMPLATE_FUNC(__VA_ARGS__); \ @@ -598,6 +601,11 @@ void VectorHasher::analyze( analyzeTyped, typeKind_, groups, numGroups, offset, nullByte, nullMask); } +template <> +void VectorHasher::analyzeValue(Timestamp value) { + analyzeValue(value.toNanos()); +} + template <> void VectorHasher::analyzeValue(StringView value) { int size = value.size(); @@ -736,6 +744,7 @@ void extendRange( case TypeKind::BIGINT: case TypeKind::VARCHAR: case TypeKind::VARBINARY: + case TypeKind::TIMESTAMP: extendRange(reserve, min, max); break; diff --git a/velox/exec/VectorHasher.h b/velox/exec/VectorHasher.h index 425fe267bd8a7..55371bf0c04bf 100644 --- a/velox/exec/VectorHasher.h +++ b/velox/exec/VectorHasher.h @@ -35,6 +35,8 @@ class UniqueValue { data_ = value; } + explicit UniqueValue(Timestamp value) : UniqueValue(value.toNanos()) {} + explicit UniqueValue(const char* value, uint32_t size) { size_ = size; data_ = 0; @@ -293,6 +295,7 @@ class VectorHasher { case TypeKind::BIGINT: case TypeKind::VARCHAR: case TypeKind::VARBINARY: + case TypeKind::TIMESTAMP: return true; default: return false; @@ -593,6 +596,14 @@ bool VectorHasher::makeValueIdsForRows( uint8_t nullMask, uint64_t* result); +template <> +inline int64_t VectorHasher::toInt64(Timestamp value) const { + return value.toNanos(); +} + +template <> +void VectorHasher::analyzeValue(Timestamp value); + template <> void VectorHasher::analyzeValue(StringView value); @@ -604,6 +615,11 @@ inline bool VectorHasher::tryMapToRange( return false; } +template <> +inline uint64_t VectorHasher::valueId(Timestamp value) { + return valueId(value.toNanos()); +} + template <> inline uint64_t VectorHasher::valueId(StringView value) { auto size = value.size();