Skip to content

Commit

Permalink
Add UUID Presto type (facebookincubator#10078)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: facebookincubator#10078

UUID is a logical type backed by HUGEINT physical type.

Add uuid() function, CAST(uuid AS varchar) and CAST(varchar AS uuid).

```
presto> select uuid();
                _col0
--------------------------------------
 9a97a14a-25d7-48c7-842b-38539d056e2f
(1 row)
```

Reviewed By: xiaoxmeng, amitkdutta

Differential Revision: D58222115

fbshipit-source-id: c8bb54ae391e2f02e6dde5d6879c89df34270757
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Jun 6, 2024
1 parent 7fe89b4 commit 179b108
Show file tree
Hide file tree
Showing 15 changed files with 420 additions and 5 deletions.
3 changes: 2 additions & 1 deletion velox/docs/develop/types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ Presto Type Physical Type
HYPERLOGLOG VARBINARY
JSON VARCHAR
TIMESTAMP WITH TIME ZONE BIGINT
UUID HUGEINT
======================== =====================

TIMESTAMP WITH TIME ZONE represents a time point in milliseconds precision
Expand All @@ -143,4 +144,4 @@ The high 52 bits of bigint store signed integer for milliseconds in UTC.
Supported range of milliseconds is [0xFFF8000000000000L, 0x7FFFFFFFFFFFF]
(or [-69387-04-22T03:45:14.752, 73326-09-11T20:14:45.247]). The low 12 bits
store timezone ID. Supported range of timezone ID is [1, 1680].
The definition of timezone IDs can be found in ``TimeZoneDatabase.cpp``.
The definition of timezone IDs can be found in ``TimeZoneDatabase.cpp``.
3 changes: 2 additions & 1 deletion velox/docs/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Presto Functions
functions/presto/aggregate
functions/presto/window
functions/presto/hyperloglog
functions/presto/uuid

Here is a list of all scalar and aggregate Presto functions available in Velox.
Function names link to function descriptions. Check out coverage maps
Expand Down Expand Up @@ -147,4 +148,4 @@ for :doc:`all <functions/presto/coverage>` and :doc:`most used
:func:`f_cdf` not :func:`zip`
:func:`filter` :func:`parse_datetime` :func:`zip_with`
:func:`find_first` :func:`pi`
====================================== ====================================== ====================================== == ====================================== == ======================================
====================================== ====================================== ====================================== == ====================================== == ======================================
7 changes: 7 additions & 0 deletions velox/docs/functions/presto/uuid.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
==============
UUID functions
==============

.. function:: uuid() -> uuid

Returns a pseudo randomly generated UUID (type 4).
6 changes: 6 additions & 0 deletions velox/duckdb/conversion/DuckConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@ TypePtr toVeloxType(LogicalType type, bool fileColumnNamesReadAsLowerCase) {
}
return ROW(std::move(names), std::move(types));
}
case LogicalTypeId::UUID: {
if (auto customType = getCustomType("UUID")) {
return customType;
}
[[fallthrough]];
}
case LogicalTypeId::USER: {
const auto name = ::duckdb::UserType::GetTypeName(type);
if (auto customType = getCustomType(name)) {
Expand Down
2 changes: 2 additions & 0 deletions velox/expression/tests/CustomTypeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ TEST_F(CustomTypeTest, getCustomTypeNames) {
"JSON",
"HYPERLOGLOG",
"TIMESTAMP WITH TIME ZONE",
"UUID",
}),
names);

Expand All @@ -227,6 +228,7 @@ TEST_F(CustomTypeTest, getCustomTypeNames) {
"JSON",
"HYPERLOGLOG",
"TIMESTAMP WITH TIME ZONE",
"UUID",
"FANCY_INT",
}),
names);
Expand Down
4 changes: 4 additions & 0 deletions velox/functions/prestosql/TypeOf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "velox/functions/prestosql/types/HyperLogLogType.h"
#include "velox/functions/prestosql/types/JsonType.h"
#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h"
#include "velox/functions/prestosql/types/UuidType.h"

namespace facebook::velox::functions {
namespace {
Expand Down Expand Up @@ -51,6 +52,9 @@ std::string typeName(const TypePtr& type) {
}
return "bigint";
case TypeKind::HUGEINT: {
if (isUuidType(type)) {
return "uuid";
}
VELOX_USER_CHECK(
type->isDecimal(),
"Expected decimal type. Got: {}",
Expand Down
47 changes: 47 additions & 0 deletions velox/functions/prestosql/UuidFunctions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_generators.hpp>

#include "velox/functions/Macros.h"
#include "velox/functions/Registerer.h"
#include "velox/functions/prestosql/types/UuidType.h"

namespace facebook::velox::functions {

template <typename T>
struct UuidFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

static constexpr bool is_deterministic = false;

FOLLY_ALWAYS_INLINE void call(int128_t& result) {
boost::uuids::uuid uuid = generator_();
memcpy(&result, uuid.data, 16);
}

private:
boost::uuids::random_generator generator_;
};

inline void registerUuidFunctions(const std::string& prefix) {
registerUuidType();
registerFunction<UuidFunction, Uuid>({prefix + "uuid"});
}

} // namespace facebook::velox::functions
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/
#include <string>
#include "velox/functions/prestosql/UuidFunctions.h"

namespace facebook::velox::functions {

Expand Down Expand Up @@ -106,6 +107,7 @@ void registerAllScalarFunctions(const std::string& prefix) {
registerStringFunctions(prefix);
registerBinaryFunctions(prefix);
registerBitwiseFunctions(prefix);
registerUuidFunctions(prefix);
}

void registerMapAllowingDuplicates(
Expand Down
1 change: 1 addition & 0 deletions velox/functions/prestosql/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ add_executable(
TypeOfTest.cpp
URLFunctionsTest.cpp
Utf8Test.cpp
UuidFunctionsTest.cpp
WidthBucketArrayTest.cpp
WordStemTest.cpp
ZipTest.cpp
Expand Down
92 changes: 92 additions & 0 deletions velox/functions/prestosql/tests/UuidFunctionsTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/common/base/tests/GTestUtils.h"
#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h"

namespace facebook::velox::functions::prestosql {

namespace {

class UuidFunctionsTest : public functions::test::FunctionBaseTest {};

TEST_F(UuidFunctionsTest, uuid) {
auto result =
evaluate<FlatVector<int128_t>>("uuid()", makeRowVector(ROW({}), 10));

// Sanity check results. All values are unique.
std::unordered_set<int128_t> uuids;
for (auto i = 0; i < 10; ++i) {
const auto uuid = result->valueAt(i);
ASSERT_TRUE(uuids.insert(uuid).second);
}
ASSERT_EQ(10, uuids.size());
}

TEST_F(UuidFunctionsTest, typeof) {
auto result = evaluate("typeof(uuid())", makeRowVector(ROW({}), 10));

auto expected = makeConstant("uuid", 10);
velox::test::assertEqualVectors(expected, result);

result = evaluate(
"typeof(array_constructor(uuid(), uuid()))", makeRowVector(ROW({}), 10));

expected = makeConstant("array(uuid)", 10);
velox::test::assertEqualVectors(expected, result);
}

TEST_F(UuidFunctionsTest, castAsVarchar) {
auto result = evaluate<FlatVector<StringView>>(
"cast(uuid() as varchar)", makeRowVector(ROW({}), 10));

// Sanity check results. All strings are unique. Each string is 36 bytes
// long.
std::unordered_set<std::string> uuids;
for (auto i = 0; i < 10; ++i) {
const auto uuid = result->valueAt(i).str();
ASSERT_EQ(36, uuid.size());
ASSERT_TRUE(uuids.insert(uuid).second);
}
ASSERT_EQ(10, uuids.size());
}

TEST_F(UuidFunctionsTest, castRoundTrip) {
auto strings = makeFlatVector<std::string>({
"33355449-2c7d-43d7-967a-f53cd23215ad",
"eed9f812-4b0c-472f-8a10-4ae7bff79a47",
"f768f36d-4f09-4da7-a298-3564d8f3c986",
});

auto uuids = evaluate("cast(c0 as uuid)", makeRowVector({strings}));
auto stringsCopy = evaluate("cast(c0 as varchar)", makeRowVector({uuids}));
auto uuidsCopy = evaluate("cast(c0 as uuid)", makeRowVector({stringsCopy}));

velox::test::assertEqualVectors(strings, stringsCopy);
velox::test::assertEqualVectors(uuids, uuidsCopy);
}

TEST_F(UuidFunctionsTest, unsupportedCast) {
auto input = makeRowVector(ROW({}), 10);
VELOX_ASSERT_THROW(
evaluate("cast(uuid() as integer)", input),
"Cannot cast UUID to INTEGER");
VELOX_ASSERT_THROW(
evaluate("cast(123 as uuid())", input), "Cannot cast BIGINT to UUID.");
}

} // namespace
} // namespace facebook::velox::functions::prestosql
2 changes: 1 addition & 1 deletion velox/functions/prestosql/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
add_library(velox_presto_types HyperLogLogType.cpp JsonType.cpp
TimestampWithTimeZoneType.cpp)
TimestampWithTimeZoneType.cpp UuidType.cpp)

target_link_libraries(
velox_presto_types velox_memory velox_expression velox_functions_util
Expand Down
131 changes: 131 additions & 0 deletions velox/functions/prestosql/types/UuidType.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/functions/prestosql/types/UuidType.h"
#include <boost/lexical_cast.hpp>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_io.hpp>

namespace facebook::velox {

namespace {

class UuidCastOperator : public exec::CastOperator {
public:
bool isSupportedFromType(const TypePtr& other) const override {
return VARCHAR()->equivalent(*other);
}

bool isSupportedToType(const TypePtr& other) const override {
return VARCHAR()->equivalent(*other);
}

void castTo(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
const TypePtr& resultType,
VectorPtr& result) const override {
context.ensureWritable(rows, resultType, result);

if (input.typeKind() == TypeKind::VARCHAR) {
castFromString(input, context, rows, *result);
} else {
VELOX_UNSUPPORTED(
"Cast from {} to UUID not yet supported", resultType->toString());
}
}

void castFrom(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
const TypePtr& resultType,
VectorPtr& result) const override {
context.ensureWritable(rows, resultType, result);

if (resultType->kind() == TypeKind::VARCHAR) {
castToString(input, context, rows, *result);
} else {
VELOX_UNSUPPORTED(
"Cast from UUID to {} not yet supported", resultType->toString());
}
}

private:
static void castToString(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
BaseVector& result) {
auto* flatResult = result.as<FlatVector<StringView>>();
const auto* uuids = input.as<SimpleVector<int128_t>>();

context.applyToSelectedNoThrow(rows, [&](auto row) {
const auto uuid = uuids->valueAt(row);

boost::uuids::uuid u;
memcpy(&u, &uuid, 16);

std::string s = boost::lexical_cast<std::string>(u);

exec::StringWriter<false> result(flatResult, row);
result.append(s);
result.finalize();
});
}

static void castFromString(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
BaseVector& result) {
auto* flatResult = result.as<FlatVector<int128_t>>();
const auto* uuidStrings = input.as<SimpleVector<StringView>>();

context.applyToSelectedNoThrow(rows, [&](auto row) {
const auto uuidString = uuidStrings->valueAt(row);

auto uuid = boost::lexical_cast<boost::uuids::uuid>(uuidString);

int128_t u;
memcpy(&u, &uuid, 16);

flatResult->set(row, u);
});
}
};

class UuidTypeFactories : public CustomTypeFactories {
public:
UuidTypeFactories() = default;

TypePtr getType() const override {
return UUID();
}

exec::CastOperatorPtr getCastOperator() const override {
return std::make_shared<UuidCastOperator>();
}
};

} // namespace

void registerUuidType() {
registerCustomType("uuid", std::make_unique<const UuidTypeFactories>());
}

} // namespace facebook::velox
Loading

0 comments on commit 179b108

Please sign in to comment.