From 179b1082b93a29724c89d976925fb1dc3c799a79 Mon Sep 17 00:00:00 2001 From: Masha Basmanova Date: Thu, 6 Jun 2024 08:58:40 -0700 Subject: [PATCH] Add UUID Presto type (#10078) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/10078 UUID is a logical type backed by HUGEINT physical type. Add uuid() function, CAST(uuid AS varchar) and CAST(varchar AS uuid). ``` presto> select uuid(); _col0 -------------------------------------- 9a97a14a-25d7-48c7-842b-38539d056e2f (1 row) ``` Reviewed By: xiaoxmeng, amitkdutta Differential Revision: D58222115 fbshipit-source-id: c8bb54ae391e2f02e6dde5d6879c89df34270757 --- velox/docs/develop/types.rst | 3 +- velox/docs/functions.rst | 3 +- velox/docs/functions/presto/uuid.rst | 7 + velox/duckdb/conversion/DuckConversion.cpp | 6 + velox/expression/tests/CustomTypeTest.cpp | 2 + velox/functions/prestosql/TypeOf.cpp | 4 + velox/functions/prestosql/UuidFunctions.h | 47 +++++++ .../registration/RegistrationFunctions.cpp | 2 + .../functions/prestosql/tests/CMakeLists.txt | 1 + .../prestosql/tests/UuidFunctionsTest.cpp | 92 ++++++++++++ .../functions/prestosql/types/CMakeLists.txt | 2 +- velox/functions/prestosql/types/UuidType.cpp | 131 ++++++++++++++++++ velox/functions/prestosql/types/UuidType.h | 78 +++++++++++ .../prestosql/types/tests/CMakeLists.txt | 6 +- .../prestosql/types/tests/UuidTypeTest.cpp | 41 ++++++ 15 files changed, 420 insertions(+), 5 deletions(-) create mode 100644 velox/docs/functions/presto/uuid.rst create mode 100644 velox/functions/prestosql/UuidFunctions.h create mode 100644 velox/functions/prestosql/tests/UuidFunctionsTest.cpp create mode 100644 velox/functions/prestosql/types/UuidType.cpp create mode 100644 velox/functions/prestosql/types/UuidType.h create mode 100644 velox/functions/prestosql/types/tests/UuidTypeTest.cpp diff --git a/velox/docs/develop/types.rst b/velox/docs/develop/types.rst index a6d256c15515..39166de9373e 100644 --- a/velox/docs/develop/types.rst +++ b/velox/docs/develop/types.rst @@ -135,6 +135,7 @@ Presto Type Physical Type HYPERLOGLOG VARBINARY JSON VARCHAR TIMESTAMP WITH TIME ZONE BIGINT +UUID HUGEINT ======================== ===================== TIMESTAMP WITH TIME ZONE represents a time point in milliseconds precision @@ -143,4 +144,4 @@ The high 52 bits of bigint store signed integer for milliseconds in UTC. Supported range of milliseconds is [0xFFF8000000000000L, 0x7FFFFFFFFFFFF] (or [-69387-04-22T03:45:14.752, 73326-09-11T20:14:45.247]). The low 12 bits store timezone ID. Supported range of timezone ID is [1, 1680]. -The definition of timezone IDs can be found in ``TimeZoneDatabase.cpp``. \ No newline at end of file +The definition of timezone IDs can be found in ``TimeZoneDatabase.cpp``. diff --git a/velox/docs/functions.rst b/velox/docs/functions.rst index 8aee16a81cfb..a83c80be2b4d 100644 --- a/velox/docs/functions.rst +++ b/velox/docs/functions.rst @@ -21,6 +21,7 @@ Presto Functions functions/presto/aggregate functions/presto/window functions/presto/hyperloglog + functions/presto/uuid Here is a list of all scalar and aggregate Presto functions available in Velox. Function names link to function descriptions. Check out coverage maps @@ -147,4 +148,4 @@ for :doc:`all ` and :doc:`most used :func:`f_cdf` not :func:`zip` :func:`filter` :func:`parse_datetime` :func:`zip_with` :func:`find_first` :func:`pi` - ====================================== ====================================== ====================================== == ====================================== == ====================================== \ No newline at end of file + ====================================== ====================================== ====================================== == ====================================== == ====================================== diff --git a/velox/docs/functions/presto/uuid.rst b/velox/docs/functions/presto/uuid.rst new file mode 100644 index 000000000000..5ac51ab89d89 --- /dev/null +++ b/velox/docs/functions/presto/uuid.rst @@ -0,0 +1,7 @@ +============== +UUID functions +============== + +.. function:: uuid() -> uuid + + Returns a pseudo randomly generated UUID (type 4). diff --git a/velox/duckdb/conversion/DuckConversion.cpp b/velox/duckdb/conversion/DuckConversion.cpp index 450d0ca0fc91..b01da12c6da4 100644 --- a/velox/duckdb/conversion/DuckConversion.cpp +++ b/velox/duckdb/conversion/DuckConversion.cpp @@ -181,6 +181,12 @@ TypePtr toVeloxType(LogicalType type, bool fileColumnNamesReadAsLowerCase) { } return ROW(std::move(names), std::move(types)); } + case LogicalTypeId::UUID: { + if (auto customType = getCustomType("UUID")) { + return customType; + } + [[fallthrough]]; + } case LogicalTypeId::USER: { const auto name = ::duckdb::UserType::GetTypeName(type); if (auto customType = getCustomType(name)) { diff --git a/velox/expression/tests/CustomTypeTest.cpp b/velox/expression/tests/CustomTypeTest.cpp index 5de7b287b476..01db687746d6 100644 --- a/velox/expression/tests/CustomTypeTest.cpp +++ b/velox/expression/tests/CustomTypeTest.cpp @@ -215,6 +215,7 @@ TEST_F(CustomTypeTest, getCustomTypeNames) { "JSON", "HYPERLOGLOG", "TIMESTAMP WITH TIME ZONE", + "UUID", }), names); @@ -227,6 +228,7 @@ TEST_F(CustomTypeTest, getCustomTypeNames) { "JSON", "HYPERLOGLOG", "TIMESTAMP WITH TIME ZONE", + "UUID", "FANCY_INT", }), names); diff --git a/velox/functions/prestosql/TypeOf.cpp b/velox/functions/prestosql/TypeOf.cpp index 3dc3509656ce..048c8d031c6a 100644 --- a/velox/functions/prestosql/TypeOf.cpp +++ b/velox/functions/prestosql/TypeOf.cpp @@ -17,6 +17,7 @@ #include "velox/functions/prestosql/types/HyperLogLogType.h" #include "velox/functions/prestosql/types/JsonType.h" #include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" +#include "velox/functions/prestosql/types/UuidType.h" namespace facebook::velox::functions { namespace { @@ -51,6 +52,9 @@ std::string typeName(const TypePtr& type) { } return "bigint"; case TypeKind::HUGEINT: { + if (isUuidType(type)) { + return "uuid"; + } VELOX_USER_CHECK( type->isDecimal(), "Expected decimal type. Got: {}", diff --git a/velox/functions/prestosql/UuidFunctions.h b/velox/functions/prestosql/UuidFunctions.h new file mode 100644 index 000000000000..9e4b319a7fff --- /dev/null +++ b/velox/functions/prestosql/UuidFunctions.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include "velox/functions/Macros.h" +#include "velox/functions/Registerer.h" +#include "velox/functions/prestosql/types/UuidType.h" + +namespace facebook::velox::functions { + +template +struct UuidFunction { + VELOX_DEFINE_FUNCTION_TYPES(T); + + static constexpr bool is_deterministic = false; + + FOLLY_ALWAYS_INLINE void call(int128_t& result) { + boost::uuids::uuid uuid = generator_(); + memcpy(&result, uuid.data, 16); + } + + private: + boost::uuids::random_generator generator_; +}; + +inline void registerUuidFunctions(const std::string& prefix) { + registerUuidType(); + registerFunction({prefix + "uuid"}); +} + +} // namespace facebook::velox::functions diff --git a/velox/functions/prestosql/registration/RegistrationFunctions.cpp b/velox/functions/prestosql/registration/RegistrationFunctions.cpp index 96f92c96db7f..3e1b8bb647e1 100644 --- a/velox/functions/prestosql/registration/RegistrationFunctions.cpp +++ b/velox/functions/prestosql/registration/RegistrationFunctions.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ #include +#include "velox/functions/prestosql/UuidFunctions.h" namespace facebook::velox::functions { @@ -106,6 +107,7 @@ void registerAllScalarFunctions(const std::string& prefix) { registerStringFunctions(prefix); registerBinaryFunctions(prefix); registerBitwiseFunctions(prefix); + registerUuidFunctions(prefix); } void registerMapAllowingDuplicates( diff --git a/velox/functions/prestosql/tests/CMakeLists.txt b/velox/functions/prestosql/tests/CMakeLists.txt index 7b6dd6694400..bb86f825b75e 100644 --- a/velox/functions/prestosql/tests/CMakeLists.txt +++ b/velox/functions/prestosql/tests/CMakeLists.txt @@ -97,6 +97,7 @@ add_executable( TypeOfTest.cpp URLFunctionsTest.cpp Utf8Test.cpp + UuidFunctionsTest.cpp WidthBucketArrayTest.cpp WordStemTest.cpp ZipTest.cpp diff --git a/velox/functions/prestosql/tests/UuidFunctionsTest.cpp b/velox/functions/prestosql/tests/UuidFunctionsTest.cpp new file mode 100644 index 000000000000..6290443a71eb --- /dev/null +++ b/velox/functions/prestosql/tests/UuidFunctionsTest.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/common/base/tests/GTestUtils.h" +#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" + +namespace facebook::velox::functions::prestosql { + +namespace { + +class UuidFunctionsTest : public functions::test::FunctionBaseTest {}; + +TEST_F(UuidFunctionsTest, uuid) { + auto result = + evaluate>("uuid()", makeRowVector(ROW({}), 10)); + + // Sanity check results. All values are unique. + std::unordered_set uuids; + for (auto i = 0; i < 10; ++i) { + const auto uuid = result->valueAt(i); + ASSERT_TRUE(uuids.insert(uuid).second); + } + ASSERT_EQ(10, uuids.size()); +} + +TEST_F(UuidFunctionsTest, typeof) { + auto result = evaluate("typeof(uuid())", makeRowVector(ROW({}), 10)); + + auto expected = makeConstant("uuid", 10); + velox::test::assertEqualVectors(expected, result); + + result = evaluate( + "typeof(array_constructor(uuid(), uuid()))", makeRowVector(ROW({}), 10)); + + expected = makeConstant("array(uuid)", 10); + velox::test::assertEqualVectors(expected, result); +} + +TEST_F(UuidFunctionsTest, castAsVarchar) { + auto result = evaluate>( + "cast(uuid() as varchar)", makeRowVector(ROW({}), 10)); + + // Sanity check results. All strings are unique. Each string is 36 bytes + // long. + std::unordered_set uuids; + for (auto i = 0; i < 10; ++i) { + const auto uuid = result->valueAt(i).str(); + ASSERT_EQ(36, uuid.size()); + ASSERT_TRUE(uuids.insert(uuid).second); + } + ASSERT_EQ(10, uuids.size()); +} + +TEST_F(UuidFunctionsTest, castRoundTrip) { + auto strings = makeFlatVector({ + "33355449-2c7d-43d7-967a-f53cd23215ad", + "eed9f812-4b0c-472f-8a10-4ae7bff79a47", + "f768f36d-4f09-4da7-a298-3564d8f3c986", + }); + + auto uuids = evaluate("cast(c0 as uuid)", makeRowVector({strings})); + auto stringsCopy = evaluate("cast(c0 as varchar)", makeRowVector({uuids})); + auto uuidsCopy = evaluate("cast(c0 as uuid)", makeRowVector({stringsCopy})); + + velox::test::assertEqualVectors(strings, stringsCopy); + velox::test::assertEqualVectors(uuids, uuidsCopy); +} + +TEST_F(UuidFunctionsTest, unsupportedCast) { + auto input = makeRowVector(ROW({}), 10); + VELOX_ASSERT_THROW( + evaluate("cast(uuid() as integer)", input), + "Cannot cast UUID to INTEGER"); + VELOX_ASSERT_THROW( + evaluate("cast(123 as uuid())", input), "Cannot cast BIGINT to UUID."); +} + +} // namespace +} // namespace facebook::velox::functions::prestosql diff --git a/velox/functions/prestosql/types/CMakeLists.txt b/velox/functions/prestosql/types/CMakeLists.txt index 4444b80b120b..b3481493a3c0 100644 --- a/velox/functions/prestosql/types/CMakeLists.txt +++ b/velox/functions/prestosql/types/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. add_library(velox_presto_types HyperLogLogType.cpp JsonType.cpp - TimestampWithTimeZoneType.cpp) + TimestampWithTimeZoneType.cpp UuidType.cpp) target_link_libraries( velox_presto_types velox_memory velox_expression velox_functions_util diff --git a/velox/functions/prestosql/types/UuidType.cpp b/velox/functions/prestosql/types/UuidType.cpp new file mode 100644 index 000000000000..8d11e9416647 --- /dev/null +++ b/velox/functions/prestosql/types/UuidType.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/functions/prestosql/types/UuidType.h" +#include +#include +#include + +namespace facebook::velox { + +namespace { + +class UuidCastOperator : public exec::CastOperator { + public: + bool isSupportedFromType(const TypePtr& other) const override { + return VARCHAR()->equivalent(*other); + } + + bool isSupportedToType(const TypePtr& other) const override { + return VARCHAR()->equivalent(*other); + } + + void castTo( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + const TypePtr& resultType, + VectorPtr& result) const override { + context.ensureWritable(rows, resultType, result); + + if (input.typeKind() == TypeKind::VARCHAR) { + castFromString(input, context, rows, *result); + } else { + VELOX_UNSUPPORTED( + "Cast from {} to UUID not yet supported", resultType->toString()); + } + } + + void castFrom( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + const TypePtr& resultType, + VectorPtr& result) const override { + context.ensureWritable(rows, resultType, result); + + if (resultType->kind() == TypeKind::VARCHAR) { + castToString(input, context, rows, *result); + } else { + VELOX_UNSUPPORTED( + "Cast from UUID to {} not yet supported", resultType->toString()); + } + } + + private: + static void castToString( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* uuids = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + const auto uuid = uuids->valueAt(row); + + boost::uuids::uuid u; + memcpy(&u, &uuid, 16); + + std::string s = boost::lexical_cast(u); + + exec::StringWriter result(flatResult, row); + result.append(s); + result.finalize(); + }); + } + + static void castFromString( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* uuidStrings = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + const auto uuidString = uuidStrings->valueAt(row); + + auto uuid = boost::lexical_cast(uuidString); + + int128_t u; + memcpy(&u, &uuid, 16); + + flatResult->set(row, u); + }); + } +}; + +class UuidTypeFactories : public CustomTypeFactories { + public: + UuidTypeFactories() = default; + + TypePtr getType() const override { + return UUID(); + } + + exec::CastOperatorPtr getCastOperator() const override { + return std::make_shared(); + } +}; + +} // namespace + +void registerUuidType() { + registerCustomType("uuid", std::make_unique()); +} + +} // namespace facebook::velox diff --git a/velox/functions/prestosql/types/UuidType.h b/velox/functions/prestosql/types/UuidType.h new file mode 100644 index 000000000000..7f37dd80b4c9 --- /dev/null +++ b/velox/functions/prestosql/types/UuidType.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/expression/CastExpr.h" +#include "velox/type/SimpleFunctionApi.h" +#include "velox/type/Type.h" + +namespace facebook::velox { + +/// Represents a UUID (Universally Unique IDentifier), also known as a +/// GUID (Globally Unique IDentifier), using the format defined in :rfc:`4122`. +/// +/// Example: UUID '12151fd2-7586-11e9-8f9e-2a86e4085a59' +class UuidType : public HugeintType { + UuidType() = default; + + public: + static const std::shared_ptr& get() { + static const std::shared_ptr instance{new UuidType()}; + + return instance; + } + + bool equivalent(const Type& other) const override { + // Pointer comparison works since this type is a singleton. + return this == &other; + } + + const char* name() const override { + return "UUID"; + } + + std::string toString() const override { + return name(); + } + + folly::dynamic serialize() const override { + folly::dynamic obj = folly::dynamic::object; + obj["name"] = "Type"; + obj["type"] = name(); + return obj; + } +}; + +FOLLY_ALWAYS_INLINE bool isUuidType(const TypePtr& type) { + // Pointer comparison works since this type is a singleton. + return UuidType::get() == type; +} + +FOLLY_ALWAYS_INLINE std::shared_ptr UUID() { + return UuidType::get(); +} + +// Type used for function registration. +struct UuidT { + using type = int128_t; + static constexpr const char* typeName = "uuid"; +}; + +using Uuid = CustomType; + +void registerUuidType(); + +} // namespace facebook::velox diff --git a/velox/functions/prestosql/types/tests/CMakeLists.txt b/velox/functions/prestosql/types/tests/CMakeLists.txt index 6eda6a99e73e..b7cbaa4f4336 100644 --- a/velox/functions/prestosql/types/tests/CMakeLists.txt +++ b/velox/functions/prestosql/types/tests/CMakeLists.txt @@ -11,9 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + add_executable( - velox_presto_types_test HyperLogLogTypeTest.cpp JsonTypeTest.cpp - TimestampWithTimeZoneTypeTest.cpp TypeTestBase.cpp) + velox_presto_types_test + HyperLogLogTypeTest.cpp JsonTypeTest.cpp TimestampWithTimeZoneTypeTest.cpp + TypeTestBase.cpp UuidTypeTest.cpp) add_test(velox_presto_types_test velox_presto_types_test) diff --git a/velox/functions/prestosql/types/tests/UuidTypeTest.cpp b/velox/functions/prestosql/types/tests/UuidTypeTest.cpp new file mode 100644 index 000000000000..cb92bffa22cb --- /dev/null +++ b/velox/functions/prestosql/types/tests/UuidTypeTest.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/prestosql/types/UuidType.h" +#include "velox/functions/prestosql/types/tests/TypeTestBase.h" + +namespace facebook::velox::test { + +class UuidTypeTest : public testing::Test, public TypeTestBase { + public: + UuidTypeTest() { + registerUuidType(); + } +}; + +TEST_F(UuidTypeTest, basic) { + ASSERT_EQ(UUID()->name(), "UUID"); + ASSERT_EQ(UUID()->kindName(), "HUGEINT"); + ASSERT_TRUE(UUID()->parameters().empty()); + ASSERT_EQ(UUID()->toString(), "UUID"); + + ASSERT_TRUE(hasType("UUID")); + ASSERT_EQ(*getType("UUID", {}), *UUID()); +} + +TEST_F(UuidTypeTest, serde) { + testTypeSerde(UUID()); +} +} // namespace facebook::velox::test