Skip to content

Commit

Permalink
Add IPAddress + IPPrefix and functions
Browse files Browse the repository at this point in the history
  • Loading branch information
mohsaka committed Jul 16, 2024
1 parent 0adc62e commit d4b1e78
Show file tree
Hide file tree
Showing 16 changed files with 1,274 additions and 5 deletions.
2 changes: 2 additions & 0 deletions velox/docs/develop/types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ HYPERLOGLOG VARBINARY
JSON VARCHAR
TIMESTAMP WITH TIME ZONE BIGINT
UUID HUGEINT
IPADDRESS HUGEINT
IPPREFIX (HUGEINT, BIGINT)
======================== =====================

TIMESTAMP WITH TIME ZONE represents a time point in milliseconds precision
Expand Down
51 changes: 51 additions & 0 deletions velox/docs/functions/presto/ipaddress.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
===================
IP Functions
===================

.. function:: ip_prefix(ip_address, prefix_bits) -> ipprefix

Returns the IP prefix of a given ``ip_address`` with subnet size of ``prefix_bits``.
``ip_address`` can be either of type ``VARCHAR`` or type ``IPADDRESS``. ::

SELECT ip_prefix(CAST('192.168.255.255' AS IPADDRESS), 9); -- {192.128.0.0/9}
SELECT ip_prefix('2001:0db8:85a3:0001:0001:8a2e:0370:7334', 48); -- {2001:db8:85a3::/48}

.. function:: ip_subnet_min(ip_prefix) -> ip_address

Returns the smallest IP address of type ``IPADDRESS`` in the subnet
specified by ``ip_prefix``. ::

SELECT ip_subnet_min(IPPREFIX '192.168.255.255/9'); -- {192.128.0.0}
SELECT ip_subnet_min(IPPREFIX '2001:0db8:85a3:0001:0001:8a2e:0370:7334/48'); -- {2001:db8:85a3::}

.. function:: ip_subnet_max(ip_prefix) -> ip_address

Returns the largest IP address of type ``IPADDRESS`` in the subnet
specified by ``ip_prefix``. ::

SELECT ip_subnet_max(IPPREFIX '192.64.0.0/9'); -- {192.127.255.255}
SELECT ip_subnet_max(IPPREFIX '2001:0db8:85a3:0001:0001:8a2e:0370:7334/48'); -- {2001:db8:85a3:ffff:ffff:ffff:ffff:ffff}

.. function:: ip_subnet_range(ip_prefix) -> array(ip_address)

Return an array of 2 IP addresses.
The array contains the smallest and the largest IP address
in the subnet specified by ``ip_prefix``. ::

SELECT ip_subnet_range(IPPREFIX '1.2.3.160/24'); -- [{1.2.3.0}, {1.2.3.255}]
SELECT ip_subnet_range(IPPREFIX '64:ff9b::52f4/120'); -- [{64:ff9b::5200}, {64:ff9b::52ff}]

.. function:: is_subnet_of(ip_prefix, ip_address) -> boolean

Returns ``true`` if the ``ip_address`` is in the subnet of ``ip_prefix``. ::

SELECT is_subnet_of(IPPREFIX '1.2.3.128/26', IPADDRESS '1.2.3.129'); -- true
SELECT is_subnet_of(IPPREFIX '64:fa9b::17/64', IPADDRESS '64:ffff::17'); -- false

.. function:: is_subnet_of(ip_prefix1, ip_prefix2) -> boolean

Returns ``true`` if ``ip_prefix2`` is a subnet of ``ip_prefix1``. ::

SELECT is_subnet_of(IPPREFIX '192.168.3.131/26', IPPREFIX '192.168.3.144/30'); -- true
SELECT is_subnet_of(IPPREFIX '64:ff9b::17/64', IPPREFIX '64:ffff::17/64'); -- false
SELECT is_subnet_of(IPPREFIX '192.168.3.131/26', IPPREFIX '192.168.3.131/26'); -- true
5 changes: 4 additions & 1 deletion velox/expression/tests/CustomTypeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ TEST_F(CustomTypeTest, getCustomTypeNames) {
"HYPERLOGLOG",
"TIMESTAMP WITH TIME ZONE",
"UUID",
}),
"IPADDRESS",
"IPPREFIX"}),
names);

ASSERT_TRUE(registerCustomType(
Expand All @@ -229,6 +230,8 @@ TEST_F(CustomTypeTest, getCustomTypeNames) {
"HYPERLOGLOG",
"TIMESTAMP WITH TIME ZONE",
"UUID",
"IPADDRESS",
"IPPREFIX",
"FANCY_INT",
}),
names);
Expand Down
185 changes: 185 additions & 0 deletions velox/functions/prestosql/IPAddressFunctions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include "velox/functions/Macros.h"
#include "velox/functions/Registerer.h"
#include "velox/functions/lib/string/StringImpl.h"
#include "velox/functions/prestosql/types/IPPrefixType.h"

namespace facebook::velox::functions {

inline bool isIPV4(int128_t ip) {
int128_t ipV4 = 0x0000FFFF00000000;
uint128_t mask = 0xFFFFFFFFFFFFFFFF;
mask = (mask << 64) | 0xFFFFFFFF00000000;
return (ip & mask) == ipV4;
}

template <typename T>
struct IPPrefixFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<TheIPPrefix>& result,
const arg_type<IPAddress>& ip,
const arg_type<int64_t> prefixBits) {
// Presto stores prefixBits in one signed byte. Cast to unsigned
uint8_t prefix = (uint8_t)prefixBits;
folly::ByteArray16 addrBytes;
memcpy(&addrBytes, &ip, 16);
bigEndianByteArray(addrBytes);

// All IPs are stored as V6
folly::IPAddressV6 v6Addr(addrBytes);

// For return
folly::ByteArray16 canonicalBytes;
int128_t canonicalAddrInt;

if (v6Addr.isIPv4Mapped()) {
canonicalBytes =
v6Addr.createIPv4().mask(prefix).createIPv6().toByteArray();
} else {
canonicalBytes = v6Addr.mask(prefix).toByteArray();
}
bigEndianByteArray(canonicalBytes);
memcpy(&canonicalAddrInt, &canonicalBytes, 16);

result = std::make_shared<IPPrefix>(canonicalAddrInt, prefix);
}

FOLLY_ALWAYS_INLINE void call(
out_type<TheIPPrefix>& result,
const arg_type<Varchar>& ip,
const arg_type<int64_t> prefixBits) {
int128_t intAddr;
folly::IPAddress addr(ip);
auto addrBytes = folly::IPAddress::createIPv6(addr).toByteArray();

bigEndianByteArray(addrBytes);
memcpy(&intAddr, &addrBytes, 16);

call(result, intAddr, prefixBits);
}
};

template <typename T>
struct IPSubnetMinFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<IPAddress>& result,
const arg_type<TheIPPrefix>& ipPrefix) {
// IPPrefix type should store the smallest(canonical) IP already
memcpy(&result, &ipPrefix->ip, 16);
}
};

inline int128_t getIPSubnetMax(int128_t ip, uint8_t prefix) {
uint128_t mask = 1;
int128_t result;
memcpy(&result, &ip, 16);

if (isIPV4(ip)) {
result |= (mask << (32 - prefix)) - 1;
} else {
// Special case: Overflow to all 0 subtracting 1 does not work.
if (prefix == 0) {
result = -1;
} else {
result |= (mask << (128 - prefix)) - 1;
}
}
return result;
}

template <typename T>
struct IPSubnetMaxFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<IPAddress>& result,
const arg_type<TheIPPrefix>& ipPrefix) {
result = getIPSubnetMax(ipPrefix->ip, (uint8_t)ipPrefix->prefix);
}
};

template <typename T>
struct IPSubnetRangeFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<Array<IPAddress>>& result,
const arg_type<TheIPPrefix>& ipPrefix) {
result.push_back(ipPrefix->ip);
result.push_back(getIPSubnetMax(ipPrefix->ip, (uint8_t)ipPrefix->prefix));
}
};

template <typename T>
struct IPSubnetOfFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE void call(
out_type<bool>& result,
const arg_type<TheIPPrefix>& ipPrefix,
const arg_type<IPAddress>& ip) {
uint128_t mask = 1;
uint8_t prefix = (uint8_t)ipPrefix->prefix;
int128_t checkIP = ip;

if (isIPV4(ipPrefix->ip)) {
checkIP &= ((mask << (32 - prefix)) - 1) ^ -1;
} else {
// Special case: Overflow to all 0 subtracting 1 does not work.
if (prefix == 0) {
checkIP = 0;
} else {
checkIP &= ((mask << (128 - prefix)) - 1) ^ -1;
}
}
result = (ipPrefix->ip == checkIP);
}

FOLLY_ALWAYS_INLINE void call(
out_type<bool>& result,
const arg_type<TheIPPrefix>& ipPrefix,
const arg_type<TheIPPrefix>& ipPrefix2) {
call(result, ipPrefix, ipPrefix2->ip);
result = result && (ipPrefix2->prefix >= ipPrefix->prefix);
}
};

void registerIPAddressFunctions(const std::string& prefix) {
registerIPAddressType();
registerIPPrefixType();
registerFunction<IPPrefixFunction, TheIPPrefix, IPAddress, int64_t>(
{prefix + "ip_prefix"});
registerFunction<IPPrefixFunction, TheIPPrefix, Varchar, int64_t>(
{prefix + "ip_prefix"});
registerFunction<IPSubnetMinFunction, IPAddress, TheIPPrefix>(
{prefix + "ip_subnet_min"});
registerFunction<IPSubnetMaxFunction, IPAddress, TheIPPrefix>(
{prefix + "ip_subnet_max"});
registerFunction<IPSubnetRangeFunction, Array<IPAddress>, TheIPPrefix>(
{prefix + "ip_subnet_range"});
registerFunction<IPSubnetOfFunction, bool, TheIPPrefix, IPAddress>(
{prefix + "is_subnet_of"});
registerFunction<IPSubnetOfFunction, bool, TheIPPrefix, TheIPPrefix>(
{prefix + "is_subnet_of"});
}

} // namespace facebook::velox::functions
9 changes: 9 additions & 0 deletions velox/functions/prestosql/TypeOf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include "velox/expression/VectorFunction.h"
#include "velox/functions/prestosql/types/HyperLogLogType.h"
#include "velox/functions/prestosql/types/JsonType.h"
#include "velox/functions/prestosql/types/IPAddressType.h"
#include "velox/functions/prestosql/types/IPPrefixType.h"
#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h"
#include "velox/functions/prestosql/types/UuidType.h"

Expand Down Expand Up @@ -54,6 +56,8 @@ std::string typeName(const TypePtr& type) {
case TypeKind::HUGEINT: {
if (isUuidType(type)) {
return "uuid";
} else if (isIPAddressType(type)) {
return "ipaddress";
}
VELOX_USER_CHECK(
type->isDecimal(),
Expand Down Expand Up @@ -104,6 +108,11 @@ std::string typeName(const TypePtr& type) {
}
case TypeKind::UNKNOWN:
return "unknown";
case TypeKind::OPAQUE:
if (isIPPrefixType(type)) {
return "ipprefix";
}
return "opaque";
default:
VELOX_UNSUPPORTED("Unsupported type: {}", type->toString())
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
#include <string>
#include "velox/functions/prestosql/UuidFunctions.h"
#include "velox/functions/prestosql/IPAddressFunctions.h"

namespace facebook::velox::functions {

Expand Down Expand Up @@ -104,6 +105,7 @@ void registerAllScalarFunctions(const std::string& prefix) {
registerGeneralFunctions(prefix);
registerDateTimeFunctions(prefix);
registerURLFunctions(prefix);
registerIPAddressFunctions(prefix);
registerStringFunctions(prefix);
registerBinaryFunctions(prefix);
registerBitwiseFunctions(prefix);
Expand Down
1 change: 1 addition & 0 deletions velox/functions/prestosql/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ add_executable(
GreatestLeastTest.cpp
HyperLogLogCastTest.cpp
HyperLogLogFunctionsTest.cpp
IPAddressFunctionsTest.cpp
InPredicateTest.cpp
JsonCastTest.cpp
JsonExtractScalarTest.cpp
Expand Down
Loading

0 comments on commit d4b1e78

Please sign in to comment.