Skip to content

Commit

Permalink
Min/Max functions
Browse files Browse the repository at this point in the history
  • Loading branch information
mohsaka committed Jun 26, 2024
1 parent 22f8e70 commit a7f3336
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 22 deletions.
74 changes: 69 additions & 5 deletions velox/functions/prestosql/IPAddressFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,18 @@ template <typename T>
struct IPPrefixFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

// TODO: Template to varchar doesn't work for some reason
//template <typename TInput>
FOLLY_ALWAYS_INLINE void call(
out_type<TheIPPrefix>& result,
const arg_type<IPAddress>& ip,
const arg_type<int8_t> prefixBits) {

// Presto stores prefixBits in one signed byte. Cast to unsigned
uint8_t prefix = (uint8_t)prefixBits;
boost::asio::ip::address_v6::bytes_type addrBytes;
memcpy(&addrBytes, &ip, 16);
bigEndianByteArray(addrBytes);

// All IPs are stored as V6
auto v6Addr = boost::asio::ip::make_address_v6(addrBytes);
Expand All @@ -61,35 +67,93 @@ struct IPPrefixFunction {
// For return
int128_t canonicalAddrInt;

// Presto stores prefixBits in one byte. Cast to unsigned
// Convert to V4/V6 respectively and create network to get canonical
// address as well as check validity of the prefix.
if (v6Addr.is_v4_mapped()) {
auto v4Addr =
boost::asio::ip::make_address_v4(boost::asio::ip::v4_mapped, v6Addr);
auto v4Network =
boost::asio::ip::make_network_v4(v4Addr, (uint8_t)prefixBits);
boost::asio::ip::make_network_v4(v4Addr, prefix);
v6CanonicalAddr = boost::asio::ip::make_address_v6(
boost::asio::ip::v4_mapped, v4Network.canonical().address());
} else {
auto v6Network =
boost::asio::ip::make_network_v6(v6Addr, (uint8_t)prefixBits);
boost::asio::ip::make_network_v6(v6Addr, prefix);
v6CanonicalAddr = v6Network.canonical().address();
}

auto canonicalBytes = v6CanonicalAddr.to_bytes();
bigEndianByteArray(canonicalBytes);
memcpy(&canonicalAddrInt, &canonicalBytes, 16);

result = std::make_shared<IPPrefix>(canonicalAddrInt, (uint8_t)prefixBits);
result = std::make_shared<IPPrefix>(canonicalAddrInt, prefix);
}
};

template <typename T>
struct IPSubnetMinFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<IPAddress>& result,
const arg_type<TheIPPrefix>& ipPrefix) {

// IPPrefix type should store the smallest(canonical) IP already
memcpy(&result, &ipPrefix->ip, 16);
}
};

template <typename T>
struct IPSubnetMaxFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<IPAddress>& result,
const arg_type<TheIPPrefix>& ipPrefix) {

// Presto stores prefixBits in one signed byte. Cast to unsigned
uint8_t prefix = (uint8_t)ipPrefix->prefix;
uint128_t mask = 1;
boost::asio::ip::address_v6::bytes_type addrBytes;
memcpy(&result, &ipPrefix->ip, 16);

memcpy(&addrBytes, &ipPrefix->ip, 16);
bigEndianByteArray(addrBytes);

auto v6Addr = boost::asio::ip::make_address_v6(addrBytes);

if (v6Addr.is_v4_mapped()) {
assert(prefix <= 32);
if(prefix < 32){
result |= (mask << 32 - prefix) - 1;
}
} else {
assert(prefix <= 128);

// Special case. Return all bits set to 1;
if(prefix == 0){
result = -1;
}
else if(prefix < 128){
result |= (mask << 128 - prefix) - 1;
}
}
}
};

inline void registerIPAddressFunctions(const std::string& prefix) {
void registerIPAddressFunctions(const std::string& prefix) {
registerIPAddressType();
registerIPPrefixType();
registerFunction<IPAddressFunction, IPAddress>({prefix + "ipaddress"});
registerFunction<IPPrefixFunction, TheIPPrefix, IPAddress, int8_t>(
{prefix + "ip_prefix"});
registerFunction<IPSubnetMinFunction, IPAddress, TheIPPrefix>(
{prefix + "ip_subset_min"});
registerFunction<IPSubnetMaxFunction, IPAddress, TheIPPrefix>(
{prefix + "ip_subset_max"});
//registerFunction<IPPrefixFunction, TheIPPrefix, Varchar, int8_t>(
// {prefix + "ip_prefix"});

}

} // namespace facebook::velox::functions
66 changes: 61 additions & 5 deletions velox/functions/prestosql/tests/IPAddressFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,31 @@ class IPAddressTest : public functions::test::FunctionBaseTest {
"cast(ip_prefix(cast(c0 as ipaddress), c1) as varchar)", input, mask);
return result;
}

std::optional<std::string> getIPPrefixUsingVarchar(
const std::optional<std::string> input,
std::optional<int8_t> mask) {
auto result = evaluateOnce<std::string>(
"cast(ip_prefix(c0, c1) as varchar)", input, mask);
return result;
}

std::optional<std::string> getIPSubnetMin(
const std::optional<std::string> input) {
auto result = evaluateOnce<std::string>(
"cast(ip_subset_min(cast(c0 as ipprefix)) as varchar)", input);
return result;
}

std::optional<std::string> getIPSubnetMax(
const std::optional<std::string> input) {
auto result = evaluateOnce<std::string>(
"cast(ip_subset_max(cast(c0 as ipprefix)) as varchar)", input);
return result;
}
};


TEST_F(IPAddressTest, castAsVarchar) {
auto result = evaluate<FlatVector<StringView>>(
"cast(ipaddress() as varchar)", makeRowVector(ROW({}), 10));
Expand All @@ -45,6 +68,7 @@ TEST_F(IPAddressTest, castAsVarchar) {
ASSERT_EQ(10, ipaddresses.size());
}


TEST_F(IPAddressTest, castRoundTrip) {
auto strings = makeFlatVector<std::string>(
{"87a0:ce14:8989:44c9:826e:b4d8:73f9:1542",
Expand All @@ -63,6 +87,8 @@ TEST_F(IPAddressTest, castRoundTrip) {
}

TEST_F(IPAddressTest, IPPrefixv4) {
//EXPECT_EQ("10.0.0.0/8", getIPPrefixUsingVarchar("10.135.23.12", 8));

EXPECT_EQ("10.0.0.0/8", getIPPrefix("10.135.23.12", 8));
EXPECT_EQ("192.128.0.0/9", getIPPrefix("192.168.255.255", 9));
EXPECT_EQ("192.168.255.255/32", getIPPrefix("192.168.255.255", 32));
Expand All @@ -76,6 +102,9 @@ TEST_F(IPAddressTest, IPPrefixv4) {
}

TEST_F(IPAddressTest, IPPrefixv6) {
//EXPECT_EQ(
// "2001:db8:85a3::/48",
// getIPPrefixUsingVarchar("2001:0db8:85a3:0001:0001:8a2e:0370:7334", 48));
EXPECT_EQ(
"2001:db8:85a3::/48",
getIPPrefix("2001:0db8:85a3:0001:0001:8a2e:0370:7334", 48));
Expand Down Expand Up @@ -105,21 +134,48 @@ TEST_F(IPAddressTest, IPPrefixv6) {

TEST_F(IPAddressTest, castRoundTripPrefix) {
auto strings = makeFlatVector<std::string>(
{"87a0:ce14:8989:44c9:826e:b4d8:73f9:1542/48",
"7cd6:bcec:1216:5c20:4b67:b1bd:173:ced/5",
"192.128.0.0/5"});
{"87a0:ce14:8989::/48",
"7800::/5",
"192.0.0.0/5"});

auto ipprefixes = evaluate("cast(c0 as ipprefix)", makeRowVector({strings}));
auto stringsCopy =
evaluate("cast(c0 as varchar)", makeRowVector({ipprefixes}));
auto ipprefixesCopy =
evaluate("cast(c0 as ipprefix)", makeRowVector({stringsCopy}));

// assertEqualVectors are comparing the shared pointers so cannot compare
// ipprefixes and ipprefixesCopy
velox::test::assertEqualVectors(strings, stringsCopy);
}

TEST_F(IPAddressTest, IPSubsetMin) {
EXPECT_EQ("192.0.0.0", getIPSubnetMin("192.64.1.1/9"));
EXPECT_EQ("0.0.0.0", getIPSubnetMin("192.64.1.1/0"));
EXPECT_EQ("128.0.0.0", getIPSubnetMin("192.64.1.1/1"));
EXPECT_EQ("192.64.1.0", getIPSubnetMin("192.64.1.1/31"));
EXPECT_EQ("192.64.1.1", getIPSubnetMin("192.64.1.1/32"));

EXPECT_EQ("2001:db8:85a3::", getIPSubnetMin("2001:0db8:85a3:0001:0001:8a2e:0370:7334/48"));
EXPECT_EQ("::", getIPSubnetMin("2001:0db8:85a3:0001:0001:8a2e:0370:7334/0"));
EXPECT_EQ("::", getIPSubnetMin("2001:0db8:85a3:0001:0001:8a2e:0370:7334/1"));
EXPECT_EQ("2001:db8:85a3:1:1:8a2e:370:7334", getIPSubnetMin("2001:0db8:85a3:0001:0001:8a2e:0370:7334/127"));
EXPECT_EQ("2001:db8:85a3:1:1:8a2e:370:7334", getIPSubnetMin("2001:0db8:85a3:0001:0001:8a2e:0370:7334/128"));
}

TEST_F(IPAddressTest, IPSubsetMax) {
EXPECT_EQ("192.127.255.255", getIPSubnetMax("192.64.1.1/9"));
EXPECT_EQ("255.255.255.255", getIPSubnetMax("192.64.1.1/0"));
EXPECT_EQ("255.255.255.255", getIPSubnetMax("192.64.1.1/1"));
EXPECT_EQ("192.64.1.1", getIPSubnetMax("192.64.1.1/31"));
EXPECT_EQ("192.64.1.1", getIPSubnetMax("192.64.1.1/32"));

EXPECT_EQ("2001:db8:85a3:ffff:ffff:ffff:ffff:ffff", getIPSubnetMax("2001:0db8:85a3:0001:0001:8a2e:0370:7334/48"));
EXPECT_EQ("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", getIPSubnetMax("2001:0db8:85a3:0001:0001:8a2e:0370:7334/0"));
EXPECT_EQ("7fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", getIPSubnetMax("2001:0db8:85a3:0001:0001:8a2e:0370:7334/1"));
EXPECT_EQ("2001:db8:85a3:1:1:8a2e:370:7335", getIPSubnetMax("2001:0db8:85a3:0001:0001:8a2e:0370:7334/127"));
EXPECT_EQ("2001:db8:85a3:1:1:8a2e:370:7334", getIPSubnetMax("2001:0db8:85a3:0001:0001:8a2e:0370:7334/128"));

}

} // namespace

} // namespace facebook::velox::functions::prestosql
2 changes: 2 additions & 0 deletions velox/functions/prestosql/types/IPAddressType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class IPAddressCastOperator : public exec::CastOperator {
std::string s;
memcpy(&addrBytes, &intAddr, 16);

bigEndianByteArray(addrBytes);
auto v6Addr = boost::asio::ip::make_address_v6(addrBytes);

if (v6Addr.is_v4_mapped()) {
Expand Down Expand Up @@ -117,6 +118,7 @@ class IPAddressCastOperator : public exec::CastOperator {
addrBytes = addr.to_v6().to_bytes();
}

bigEndianByteArray(addrBytes);
memcpy(&intAddr, &addrBytes, 16);

flatResult->set(row, intAddr);
Expand Down
14 changes: 9 additions & 5 deletions velox/functions/prestosql/types/IPAddressType.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,21 @@
#include <boost/asio/ip/network_v4.hpp>
#include <boost/asio/ip/network_v6.hpp>
#include <boost/lexical_cast.hpp>
#include <folly/IPAddress.h>
#include <folly/Bits.h>
#include "velox/expression/CastExpr.h"
#include "velox/type/SimpleFunctionApi.h"
#include "velox/type/Type.h"

namespace facebook::velox {

/// Represents a UUID (Universally Unique IDentifier), also known as a
/// GUID (Globally Unique IDentifier), using the format defined in :rfc:`4122`.
///
/// Example: UUID '12151fd2-7586-11e9-8f9e-2a86e4085a59'
// Converts BigEndian <-> native byte array
// NOOP if system is Big Endian already
inline void bigEndianByteArray(boost::asio::ip::address_v6::bytes_type &addrBytes){
if(folly::kIsLittleEndian){
std::reverse(addrBytes.begin(), addrBytes.end());
}
}

class IPAddressType : public HugeintType {
IPAddressType() = default;

Expand Down
18 changes: 11 additions & 7 deletions velox/functions/prestosql/types/IPPrefixType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ class IPPrefixCastOperator : public exec::CastOperator {
std::static_pointer_cast<IPPrefix>(ipaddresses->valueAt(row));
boost::asio::ip::address_v6::bytes_type addrBytes;
std::string s;

memcpy(&addrBytes, &intAddr->ip, 16);
bigEndianByteArray(addrBytes);
auto v6Addr = boost::asio::ip::make_address_v6(addrBytes);

if (v6Addr.is_v4_mapped()) {
Expand Down Expand Up @@ -116,18 +118,20 @@ class IPPrefixCastOperator : public exec::CastOperator {
auto addr = boost::asio::ip::make_address(ipOnly);
IPPrefix res(0, 0);
if (addr.is_v4()) {
res.prefix = (uint8_t)boost::asio::ip::make_network_v4(ipAddressString)
.prefix_length();
auto v4Net = boost::asio::ip::make_network_v4(ipAddressString);
res.prefix = (uint8_t)v4Net.prefix_length();
addrBytes = boost::asio::ip::make_address_v6(
boost::asio::ip::v4_mapped, addr.to_v4())
boost::asio::ip::v4_mapped, v4Net.canonical().address())
.to_bytes();
} else {
res.prefix = (uint8_t)boost::asio::ip::make_network_v6(ipAddressString)
.prefix_length();
addrBytes = addr.to_v6().to_bytes();
auto v6Net = boost::asio::ip::make_network_v6(ipAddressString);
res.prefix = (uint8_t)v6Net.prefix_length();
addrBytes = v6Net.canonical().address().to_bytes();
}

bigEndianByteArray(addrBytes);
memcpy(&res.ip, &addrBytes, 16);

flatResult->set(
row, std::make_shared<IPPrefix>(res.ip, (uint8_t)res.prefix));
});
Expand Down

0 comments on commit a7f3336

Please sign in to comment.