From 2298a16514f8b86aa05c03c5b81acecc37494a6d Mon Sep 17 00:00:00 2001 From: mohsaka <135669458+mohsaka@users.noreply.github.com> Date: Thu, 29 Aug 2024 01:08:06 -0700 Subject: [PATCH] Add IPPREFIX --- velox/docs/develop/types.rst | 18 + velox/docs/functions/presto/conversion.rst | 125 ++++++- velox/expression/tests/CustomTypeTest.cpp | 2 + .../functions/prestosql/IPAddressFunctions.h | 2 + velox/functions/prestosql/TypeOf.cpp | 3 + .../functions/prestosql/tests/CMakeLists.txt | 1 + .../prestosql/tests/IPAddressCastTest.cpp | 10 +- .../prestosql/tests/IPPrefixCastTest.cpp | 183 ++++++++++ .../functions/prestosql/types/CMakeLists.txt | 3 +- .../prestosql/types/IPAddressType.cpp | 67 +++- .../functions/prestosql/types/IPAddressType.h | 5 + .../prestosql/types/IPPrefixType.cpp | 329 ++++++++++++++++++ .../functions/prestosql/types/IPPrefixType.h | 73 ++++ velox/functions/prestosql/types/UuidType.cpp | 2 +- .../prestosql/types/tests/CMakeLists.txt | 3 +- .../types/tests/IPPrefixTypeTest.cpp | 41 +++ 16 files changed, 854 insertions(+), 13 deletions(-) create mode 100644 velox/functions/prestosql/tests/IPPrefixCastTest.cpp create mode 100644 velox/functions/prestosql/types/IPPrefixType.cpp create mode 100644 velox/functions/prestosql/types/IPPrefixType.h create mode 100644 velox/functions/prestosql/types/tests/IPPrefixTypeTest.cpp diff --git a/velox/docs/develop/types.rst b/velox/docs/develop/types.rst index ac8cefd322d1a..ad2cedbdc8da7 100644 --- a/velox/docs/develop/types.rst +++ b/velox/docs/develop/types.rst @@ -137,6 +137,7 @@ JSON VARCHAR TIMESTAMP WITH TIME ZONE BIGINT UUID HUGEINT IPADDRESS HUGEINT +IPPREFIX VARBINARY ======================== ===================== TIMESTAMP WITH TIME ZONE represents a time point in milliseconds precision @@ -155,6 +156,23 @@ used in IPADDRESS/IPPREFIX related functions. This type can be used to create IPPREFIX networks as well as to check IPADDRESS validity within IPPREFIX networks. +IPPREFIX represents an IPV6 or IPV4 formatted IPV6 address along with a one byte +prefix length. Its physical type is VARBINARY but has a fixed length of 17 bytes. +The format that the address is stored in is defined as part of `(RFC 4291#section-2.5.5.2) `_. +The prefix length is stored in the last byte of the VARBINARY array. +The IP address stored is the canonical(smallest) IP address in the +subnet range. This type can be used in IP subnet functions. + +Example: + +In this example the first 32 bits(*FFFF:FFFF*) represents the network prefix. +As a result the IPPREFIX object stores *FFFF:FFFF::* and the length 32 for both of these IPPREFIX objects. + +:: + + IPPREFIX 'FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF/32' -- IPPREFIX 'FFFF:FFFF:0000:0000:0000:0000:0000:0000/32' + IPPREFIX 'FFFF:FFFF:4455:6677:8899:AABB:CCDD:EEFF/32' -- IPPREFIX 'FFFF:FFFF:0000:0000:0000:0000:0000:0000/32' + Spark Types ~~~~~~~~~~~~ The `data types `_ in Spark have some semantic differences compared to those in diff --git a/velox/docs/functions/presto/conversion.rst b/velox/docs/functions/presto/conversion.rst index b24117ae3068a..bf9221d9a9744 100644 --- a/velox/docs/functions/presto/conversion.rst +++ b/velox/docs/functions/presto/conversion.rst @@ -30,7 +30,7 @@ are supported if the conversion of their element types are supported. In additio supported conversions to/from JSON are listed in :doc:`json`. .. list-table:: - :widths: 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 + :widths: 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 :header-rows: 1 * - @@ -49,6 +49,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - interval day to second - decimal - ipaddress + - ipprefix * - tinyint - Y - Y @@ -65,6 +66,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - smallint - Y - Y @@ -81,6 +83,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - integer - Y - Y @@ -97,6 +100,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - bigint - Y - Y @@ -113,6 +117,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - boolean - Y - Y @@ -129,6 +134,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - real - Y - Y @@ -145,6 +151,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - double - Y - Y @@ -161,6 +168,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - varchar - Y - Y @@ -177,6 +185,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - Y + - Y * - varbinary - - @@ -193,6 +202,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - Y + - * - timestamp - - @@ -209,6 +219,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - * - timestamp with time zone - - @@ -225,6 +236,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - * - date - - @@ -241,6 +253,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - * - interval day to second - - @@ -257,6 +270,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - * - decimal - Y - Y @@ -273,6 +287,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - ipaddress - - @@ -288,7 +303,25 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - Y + - Y + * - ipprefix + - + - + - + - + - - + - + - Y + - + - + - + - + - + - + - Y + - Y Cast to Integral Types ---------------------- @@ -689,6 +722,33 @@ IPV4 mapped IPV6: SELECT cast(ipaddress '::ffff:ffff:ffff' as varchar); -- '255.255.255.255' +From IPPrefix +^^^^^^^^^^^^^^ + +Casting from IPPREFIX to VARCHAR returns a string formatted as *x.x.x.x/* for IPV4 formatted IPV6 addresses. + +For all other IPV6 addresses it will be formatted in compressed alternate form IPV6 defined in `RFC 4291#section-2.2 `_ +followed by */*. `[RFC 4291#section-2.3] `_ + +IPV4: + +:: + + SELECT cast(ipprefix '1.2.0.0/16' as varchar); -- '1.2.0.0/16' + +IPV6: + +:: + + SELECT cast(ipprefix '2001:db8::ff00:42:8329/128' as varchar); -- '2001:db8::ff00:42:8329/128' + SELECT cast(ipprefix '0:0:0:0:0:0:13.1.68.3/32' as varchar); -- '::/32' + +IPV4 mapped IPV6: + +:: + + SELECT cast(ipaddress '::ffff:ffff:0000/16' as varchar); -- '255.255.0.0/16' + Cast to VARBINARY ----------------- @@ -1036,6 +1096,8 @@ Invalid example Cast to IPADDRESS ----------------- +.. _ipaddress-varchar: + From VARCHAR ^^^^^^^^^^^^ @@ -1128,6 +1190,67 @@ Invalid examples: SELECT cast(from_hex('f000001100') as ipaddress); -- Invalid IP address binary length: 5 +From IPPREFIX +^^^^^^^^^^^^^ + +Returns the canonical(lowest) IPADDRESS in the subnet range. + +Examples: + +:: + + SELECT cast(ipprefix '1.2.3.4/24' as ipaddress) -- ipaddress '1.2.3.0' + SELECT cast(ipprefix '2001:db8::ff00:42:8329/64' as ipaddress) -- ipaddress '2001:db8::' + +Cast to IPPREFIX +---------------- + +From VARCHAR +^^^^^^^^^^^^ + +The IPPREFIX string must be in the form of */* as defined in `(RFC 4291#section-2.3) `_ +The IPADDRESS portion of the IPPREFIX follows the same rules as casting +`IPADDRESS to VARCHAR <#ipaddress-varchar>`_. + +The prefix portion must be <= 32 if the IP is an IPV4 address or <= 128 for an IPV6 address. +As with IPADDRESS, any IPV6 address in the form of an IPV4 mapped IPV6 address will be +interpreted as an IPV4 address. Only the canonical(smallest) IP address will be stored +in the IPPREFIX. + +Examples: + +Valid examples: + +:: + + SELECT cast('2001:0db8:0000:0000:0000:ff00:0042:8329/32' as ipprefix); -- ipprefix '2001:0db8::/32' + SELECT cast('1.2.3.4/24' as ipprefix); -- ipprefix '1.2.3.0/24' + SELECT cast('::ffff:ffff:ffff/16' as ipprefix); -- ipprefix '255.255.0.0/16' + +Invalid examples: + +:: + + SELECT cast('2001:db8::1::1/1' as ipprefix); -- Invalid IP address '2001:db8::1::1' + SELECT cast('2001:0db8:0000:0000:0000:ff00:0042:8329/129' as ipprefix); -- CIDR value '129' is > network bit count '128' + SELECT cast('2001:0db8:0000:0000:0000:ff00:0042:8329/-1' as ipprefix); -- Mask value '-1' not a valid mask + SELECT cast('255.2.3.4/33' as ipprefix); -- CIDR value '33' is > network bit count '32' + SELECT cast('::ffff:ffff:ffff/33' as ipprefix); -- CIDR value '33' is > network bit count '32' + + +From IPADDRESS +^^^^^^^^^^^^^^ + +Returns an IPPREFIX where the prefix length is the length of the entire IP Address. +Prefix length for IPV4 is 32 and for IPV6 it is 128. + +Examples: + +:: + + SELECT cast(ipaddress '1.2.3.4' as ipprefix) -- ipprefix '1.2.3.4/32' + SELECT cast(ipaddress '2001:db8::ff00:42:8329' as ipprefix) -- ipprefix '2001:db8::/128' + Miscellaneous ------------- diff --git a/velox/expression/tests/CustomTypeTest.cpp b/velox/expression/tests/CustomTypeTest.cpp index c55015d85900e..97afc1f6c79d8 100644 --- a/velox/expression/tests/CustomTypeTest.cpp +++ b/velox/expression/tests/CustomTypeTest.cpp @@ -217,6 +217,7 @@ TEST_F(CustomTypeTest, getCustomTypeNames) { "TIMESTAMP WITH TIME ZONE", "UUID", "IPADDRESS", + "IPPREFIX", }), names); @@ -231,6 +232,7 @@ TEST_F(CustomTypeTest, getCustomTypeNames) { "TIMESTAMP WITH TIME ZONE", "UUID", "IPADDRESS", + "IPPREFIX", "FANCY_INT", }), names); diff --git a/velox/functions/prestosql/IPAddressFunctions.h b/velox/functions/prestosql/IPAddressFunctions.h index abd214a9aba47..a08ef92de5343 100644 --- a/velox/functions/prestosql/IPAddressFunctions.h +++ b/velox/functions/prestosql/IPAddressFunctions.h @@ -16,11 +16,13 @@ #pragma once #include "velox/functions/prestosql/types/IPAddressType.h" +#include "velox/functions/prestosql/types/IPPrefixType.h" namespace facebook::velox::functions { void registerIPAddressFunctions(const std::string& prefix) { registerIPAddressType(); + registerIPPrefixType(); } } // namespace facebook::velox::functions diff --git a/velox/functions/prestosql/TypeOf.cpp b/velox/functions/prestosql/TypeOf.cpp index 77a4e65387965..cffdd038f3289 100644 --- a/velox/functions/prestosql/TypeOf.cpp +++ b/velox/functions/prestosql/TypeOf.cpp @@ -16,6 +16,7 @@ #include "velox/expression/VectorFunction.h" #include "velox/functions/prestosql/types/HyperLogLogType.h" #include "velox/functions/prestosql/types/IPAddressType.h" +#include "velox/functions/prestosql/types/IPPrefixType.h" #include "velox/functions/prestosql/types/JsonType.h" #include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" #include "velox/functions/prestosql/types/UuidType.h" @@ -78,6 +79,8 @@ std::string typeName(const TypePtr& type) { case TypeKind::VARBINARY: if (isHyperLogLogType(type)) { return "HyperLogLog"; + } else if (isIPPrefixType(type)) { + return "ipaddress"; } return "varbinary"; case TypeKind::TIMESTAMP: diff --git a/velox/functions/prestosql/tests/CMakeLists.txt b/velox/functions/prestosql/tests/CMakeLists.txt index 2c0d1696c275e..477cf20937694 100644 --- a/velox/functions/prestosql/tests/CMakeLists.txt +++ b/velox/functions/prestosql/tests/CMakeLists.txt @@ -65,6 +65,7 @@ add_executable( HyperLogLogFunctionsTest.cpp InPredicateTest.cpp IPAddressCastTest.cpp + IPPrefixCastTest.cpp JsonCastTest.cpp JsonExtractScalarTest.cpp JsonFunctionsTest.cpp diff --git a/velox/functions/prestosql/tests/IPAddressCastTest.cpp b/velox/functions/prestosql/tests/IPAddressCastTest.cpp index 0f62b6b724808..70553de205637 100644 --- a/velox/functions/prestosql/tests/IPAddressCastTest.cpp +++ b/velox/functions/prestosql/tests/IPAddressCastTest.cpp @@ -24,20 +24,20 @@ namespace { class IPAddressCastTest : public functions::test::FunctionBaseTest { protected: std::optional castToVarchar( - const std::optional input) { + const std::optional& input) { auto result = evaluateOnce( - "cast(cast(c0 as ipaddress) as varchar)", input); + "cast(cast(cast(c0 as ipaddress) as ipaddress) as varchar)", input); return result; } std::optional castFromVarbinary( - const std::optional input) { + const std::optional& input) { auto result = evaluateOnce("cast(from_hex(c0) as ipaddress)", input); return result; } - std::optional allCasts(const std::optional input) { + std::optional allCasts(const std::optional& input) { auto result = evaluateOnce( "cast(cast(cast(cast(c0 as ipaddress) as varbinary) as ipaddress) as varchar)", input); @@ -45,7 +45,7 @@ class IPAddressCastTest : public functions::test::FunctionBaseTest { } }; -int128_t stringToInt128(std::string value) { +int128_t stringToInt128(const std::string& value) { int128_t res = 0; for (char c : value) { res = res * 10 + c - '0'; diff --git a/velox/functions/prestosql/tests/IPPrefixCastTest.cpp b/velox/functions/prestosql/tests/IPPrefixCastTest.cpp new file mode 100644 index 0000000000000..3defabc4c142f --- /dev/null +++ b/velox/functions/prestosql/tests/IPPrefixCastTest.cpp @@ -0,0 +1,183 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/common/base/tests/GTestUtils.h" +#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" + +namespace facebook::velox::functions::prestosql { + +namespace { + +class IPPrefixCastTest : public functions::test::FunctionBaseTest { + protected: + std::optional castToVarchar( + const std::optional input) { + auto result = evaluateOnce( + "cast(cast(c0 as ipprefix) as varchar)", input); + return result; + } + + std::optional castFromIPAddress( + const std::optional input) { + auto result = evaluateOnce( + "cast(cast(cast(c0 as ipaddress) as ipprefix) as varchar)", input); + return result; + } + + std::optional castToIPAddress( + const std::optional input) { + auto result = evaluateOnce( + "cast(cast(c0 as ipprefix) as ipaddress)", input); + return result; + } +}; + +int128_t stringToInt128(std::string value) { + int128_t res = 0; + for (char c : value) { + res = res * 10 + c - '0'; + } + return res; +} + +TEST_F(IPPrefixCastTest, varcharCast) { + EXPECT_EQ(castToVarchar("::ffff:1.2.3.4/24"), "1.2.3.0/24"); + EXPECT_EQ(castToVarchar("192.168.0.0/24"), "192.168.0.0/24"); + EXPECT_EQ(castToVarchar("255.2.3.4/0"), "0.0.0.0/0"); + EXPECT_EQ(castToVarchar("255.2.3.4/1"), "128.0.0.0/1"); + EXPECT_EQ(castToVarchar("255.2.3.4/2"), "192.0.0.0/2"); + EXPECT_EQ(castToVarchar("255.2.3.4/4"), "240.0.0.0/4"); + EXPECT_EQ(castToVarchar("1.2.3.4/8"), "1.0.0.0/8"); + EXPECT_EQ(castToVarchar("1.2.3.4/16"), "1.2.0.0/16"); + EXPECT_EQ(castToVarchar("1.2.3.4/24"), "1.2.3.0/24"); + EXPECT_EQ(castToVarchar("1.2.3.255/25"), "1.2.3.128/25"); + EXPECT_EQ(castToVarchar("1.2.3.255/26"), "1.2.3.192/26"); + EXPECT_EQ(castToVarchar("1.2.3.255/28"), "1.2.3.240/28"); + EXPECT_EQ(castToVarchar("1.2.3.255/30"), "1.2.3.252/30"); + EXPECT_EQ(castToVarchar("1.2.3.255/32"), "1.2.3.255/32"); + EXPECT_EQ( + castToVarchar("2001:0db8:0000:0000:0000:ff00:0042:8329/128"), + "2001:db8::ff00:42:8329/128"); + EXPECT_EQ( + castToVarchar("2001:db8::ff00:42:8329/128"), + "2001:db8::ff00:42:8329/128"); + EXPECT_EQ(castToVarchar("2001:db8:0:0:1:0:0:1/128"), "2001:db8::1:0:0:1/128"); + EXPECT_EQ(castToVarchar("2001:db8:0:0:1::1/128"), "2001:db8::1:0:0:1/128"); + EXPECT_EQ(castToVarchar("2001:db8::1:0:0:1/128"), "2001:db8::1:0:0:1/128"); + EXPECT_EQ( + castToVarchar("2001:DB8::FF00:ABCD:12EF/128"), + "2001:db8::ff00:abcd:12ef/128"); + EXPECT_EQ(castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/0"), "::/0"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/1"), "8000::/1"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/2"), "c000::/2"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/4"), "f000::/4"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/8"), "ff00::/8"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/16"), "ffff::/16"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/32"), + "ffff:ffff::/32"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/48"), + "ffff:ffff:ffff::/48"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/64"), + "ffff:ffff:ffff:ffff::/64"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/80"), + "ffff:ffff:ffff:ffff:ffff::/80"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/96"), + "ffff:ffff:ffff:ffff:ffff:ffff::/96"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/112"), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:0/112"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/120"), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00/120"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/124"), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0/124"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/126"), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffc/126"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/127"), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe/127"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128"), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128"); + EXPECT_EQ(castToVarchar("10.0.0.0/32"), "10.0.0.0/32"); + EXPECT_EQ(castToVarchar("64:ff9b::10.0.0.0/128"), "64:ff9b::a00:0/128"); + VELOX_ASSERT_THROW( + castToVarchar("facebook.com/32"), "Invalid IP address 'facebook.com'"); + VELOX_ASSERT_THROW( + castToVarchar("localhost/32"), "Invalid IP address 'localhost'"); + VELOX_ASSERT_THROW( + castToVarchar("2001:db8::1::1/128"), + "Invalid IP address '2001:db8::1::1'"); + VELOX_ASSERT_THROW( + castToVarchar("2001:zxy::1::1/128"), + "Invalid IP address '2001:zxy::1::1'"); + VELOX_ASSERT_THROW( + castToVarchar("789.1.1.1/32"), "Invalid IP address '789.1.1.1'"); + VELOX_ASSERT_THROW( + castToVarchar("192.1.1.1"), + "Invalid ipSlashCidr specified. Expected IP/CIDR format, got '192.1.1.1'"); + VELOX_ASSERT_THROW( + castToVarchar("192.1.1.1/128"), + "CIDR value '128' is > network bit count '32'"); + VELOX_ASSERT_THROW( + castToVarchar("192.1.1.1/-1"), "Mask value '-1' not a valid mask"); + VELOX_ASSERT_THROW( + castToVarchar("::ffff:ffff:ffff/33"), + "CIDR value '33' is > network bit count '32'"); + VELOX_ASSERT_THROW( + castToVarchar("::ffff:ffff:ffff/-1"), "Mask value '-1' not a valid mask"); + VELOX_ASSERT_THROW( + castToVarchar("::/129"), "CIDR value '129' is > network bit count '128'"); + VELOX_ASSERT_THROW( + castToVarchar("::/-1"), "Mask value '-1' not a valid mask"); +} + +TEST_F(IPPrefixCastTest, fromIPAddressCast) { + EXPECT_EQ(castFromIPAddress("1.2.3.4"), "1.2.3.4/32"); + EXPECT_EQ(castFromIPAddress("::ffff:102:304"), "1.2.3.4/32"); + EXPECT_EQ(castFromIPAddress("::1"), "::1/128"); + EXPECT_EQ( + castFromIPAddress("2001:db8::ff00:42:8329"), + "2001:db8::ff00:42:8329/128"); +} + +TEST_F(IPPrefixCastTest, toIPAddressCast) { + EXPECT_EQ(castToIPAddress("1.2.3.4/32"), stringToInt128("281470698652420")); + EXPECT_EQ(castToIPAddress("1.2.3.4/24"), stringToInt128("281470698652416")); + EXPECT_EQ(castToIPAddress("::1/128"), stringToInt128("1")); + EXPECT_EQ( + castToIPAddress("2001:db8::ff00:42:8329/128"), + stringToInt128("42540766411282592856904265327123268393")); + EXPECT_EQ( + castToIPAddress("2001:db8::ff00:42:8329/64"), + stringToInt128("42540766411282592856903984951653826560")); +} + +} // namespace + +} // namespace facebook::velox::functions::prestosql diff --git a/velox/functions/prestosql/types/CMakeLists.txt b/velox/functions/prestosql/types/CMakeLists.txt index 0089307a4f071..14c407ba89cec 100644 --- a/velox/functions/prestosql/types/CMakeLists.txt +++ b/velox/functions/prestosql/types/CMakeLists.txt @@ -17,7 +17,8 @@ velox_add_library( JsonType.cpp TimestampWithTimeZoneType.cpp UuidType.cpp - IPAddressType.cpp) + IPAddressType.cpp + IPPrefixType.cpp) velox_link_libraries( velox_presto_types diff --git a/velox/functions/prestosql/types/IPAddressType.cpp b/velox/functions/prestosql/types/IPAddressType.cpp index 691ca0a28ce2b..4df773ff04c00 100644 --- a/velox/functions/prestosql/types/IPAddressType.cpp +++ b/velox/functions/prestosql/types/IPAddressType.cpp @@ -17,11 +17,11 @@ #include "velox/functions/prestosql/types/IPAddressType.h" #include #include "velox/expression/CastExpr.h" +#include "velox/functions/prestosql/types/IPPrefixType.h" static constexpr int kIPV4AddressBytes = 4; static constexpr int kIPV4ToV6FFIndex = 10; static constexpr int kIPV4ToV6Index = 12; -static constexpr int kIPAddressBytes = 16; namespace facebook::velox { @@ -60,10 +60,14 @@ class IPAddressCastOperator : public exec::CastOperator { if (input.typeKind() == TypeKind::VARCHAR) { castFromString(input, context, rows, *result); } else if (input.typeKind() == TypeKind::VARBINARY) { - castFromVarbinary(input, context, rows, *result); + if (isIPPrefixType(input.type())) { + castFromIPPrefix(input, context, rows, *result); + } else { + castFromVarbinary(input, context, rows, *result); + } } else { VELOX_UNSUPPORTED( - "Cast from {} to IPAddress not supported", resultType->toString()); + "Cast from {} to IPAddress not supported", input.type()->toString()); } } @@ -78,7 +82,11 @@ class IPAddressCastOperator : public exec::CastOperator { if (resultType->kind() == TypeKind::VARCHAR) { castToString(input, context, rows, *result); } else if (resultType->kind() == TypeKind::VARBINARY) { - castToVarbinary(input, context, rows, *result); + if (isIPPrefixType(resultType)) { + castToIPPrefix(input, context, rows, *result); + } else { + castToVarbinary(input, context, rows, *result); + } } else { VELOX_UNSUPPORTED( "Cast from IPAddress to {} not supported", resultType->toString()); @@ -206,6 +214,57 @@ class IPAddressCastOperator : public exec::CastOperator { flatResult->set(row, intAddr); }); } + + static void castFromIPPrefix( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipaddresses = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + const auto intAddr = ipaddresses->valueAt(row); + int128_t addrResult = 0; + folly::ByteArray16 addrBytes; + + memcpy(&addrBytes, intAddr.data(), kIPAddressBytes); + std::reverse(addrBytes.begin(), addrBytes.end()); + + memcpy(&addrResult, &addrBytes, kIPAddressBytes); + flatResult->set(row, addrResult); + }); + } + + static void castToIPPrefix( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipAddresses = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + auto ipAddress = ipAddresses->valueAt(row); + folly::ByteArray16 addrBytes; + + exec::StringWriter result(flatResult, row); + result.resize(kIPPrefixBytes); + + memcpy(&addrBytes, &ipAddress, kIPAddressBytes); + std::reverse(addrBytes.begin(), addrBytes.end()); + memcpy(result.data(), &addrBytes, kIPAddressBytes); + + folly::IPAddressV6 v6Addr(addrBytes); + if (v6Addr.isIPv4Mapped()) { + result.data()[kIPAddressBytes] = kIPV4Bits; + } else { + result.data()[kIPAddressBytes] = (unsigned char)kIPV6Bits; + } + + result.finalize(); + }); + } }; class IPAddressTypeFactories : public CustomTypeFactories { diff --git a/velox/functions/prestosql/types/IPAddressType.h b/velox/functions/prestosql/types/IPAddressType.h index e1e2d9fc1bf28..4c6dbe0af9c05 100644 --- a/velox/functions/prestosql/types/IPAddressType.h +++ b/velox/functions/prestosql/types/IPAddressType.h @@ -18,6 +18,11 @@ #include "velox/type/SimpleFunctionApi.h" #include "velox/type/Type.h" +static constexpr int kIPAddressBytes = 16; +static constexpr int kIPPrefixBytes = 17; +static constexpr uint8_t kIPV4Bits = 32; +static constexpr uint8_t kIPV6Bits = 128; + namespace facebook::velox { class IPAddressType : public HugeintType { diff --git a/velox/functions/prestosql/types/IPPrefixType.cpp b/velox/functions/prestosql/types/IPPrefixType.cpp new file mode 100644 index 0000000000000..b92e5e6174603 --- /dev/null +++ b/velox/functions/prestosql/types/IPPrefixType.cpp @@ -0,0 +1,329 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/functions/prestosql/types/IPPrefixType.h" +#include +#include +#include "velox/expression/CastExpr.h" +#include "velox/functions/prestosql/types/IPAddressType.h" + +namespace facebook::velox { + +namespace { + +class IPPrefixCastOperator : public exec::CastOperator { + public: + bool isSupportedFromType(const TypePtr& other) const override { + switch (other->kind()) { + case TypeKind::VARCHAR: + return true; + case TypeKind::HUGEINT: + if (isIPAddressType(other)) { + return true; + } + default: + return false; + } + } + + bool isSupportedToType(const TypePtr& other) const override { + switch (other->kind()) { + case TypeKind::VARCHAR: + return true; + case TypeKind::HUGEINT: + if (isIPAddressType(other)) { + return true; + } + default: + return false; + } + } + + void castTo( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + const TypePtr& resultType, + VectorPtr& result) const override { + context.ensureWritable(rows, resultType, result); + + if (input.typeKind() == TypeKind::VARCHAR) { + castFromString(input, context, rows, *result); + } else if (isIPAddressType(input.type())) { + castFromIPAddress(input, context, rows, *result); + } else { + VELOX_UNSUPPORTED( + "Cast from {} to IPPrefix not yet supported", + input.type()->toString()); + } + } + + void castFrom( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + const TypePtr& resultType, + VectorPtr& result) const override { + context.ensureWritable(rows, resultType, result); + + if (resultType->kind() == TypeKind::VARCHAR) { + castToString(input, context, rows, *result); + } else if (isIPAddressType(resultType)) { + castToIPAddress(input, context, rows, *result); + } else { + VELOX_UNSUPPORTED( + "Cast from IPPrefix to {} not yet supported", resultType->toString()); + } + } + + private: + static void castToString( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipaddresses = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + const auto intAddr = ipaddresses->valueAt(row); + folly::ByteArray16 addrBytes; + std::string s; + + memcpy(&addrBytes, intAddr.data(), kIPAddressBytes); + folly::IPAddressV6 v6Addr(addrBytes); + + exec::StringWriter result(flatResult, row); + if (v6Addr.isIPv4Mapped()) { + result.append( + v6Addr.createIPv4().str() + "/" + + std::to_string((uint8_t)intAddr.data()[kIPAddressBytes])); + } else { + result.append( + v6Addr.str() + "/" + + std::to_string((uint8_t)intAddr.data()[kIPAddressBytes])); + } + result.finalize(); + }); + } + + static auto splitIpSlashCidr(folly::StringPiece ipSlashCidr) { + folly::small_vector vec; + folly::split('/', ipSlashCidr, vec); + return vec; + } + + static void castFromString( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipAddressStrings = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + auto ipAddressString = ipAddressStrings->valueAt(row); + + // Folly allows for creation of networks without a "/" + // Check to make sure that we have one + if (ipAddressString.str().find('/') == std::string::npos) { + context.setStatus( + row, + threadSkipErrorDetails() + ? Status::UserError() + : Status::UserError( + "Invalid ipSlashCidr specified. Expected IP/CIDR format, got '{}'", + ipAddressString.str())); + return; + } + + folly::ByteArray16 addrBytes; + auto const maybeNet = + folly::IPAddress::tryCreateNetwork(ipAddressString, -1, false); + + // All errors from folly createNetwork() to avoid throwing + if (maybeNet.hasError()) { + if (threadSkipErrorDetails()) { + context.setStatus(row, Status::UserError()); + } else { + switch (maybeNet.error()) { + case folly::CIDRNetworkError::INVALID_DEFAULT_CIDR: + context.setStatus( + row, Status::UserError("defaultCidr must be <= UINT8_MAX")); + break; + case folly::CIDRNetworkError::INVALID_IP_SLASH_CIDR: + context.setStatus( + row, + Status::UserError( + "Invalid ipSlashCidr specified. Expected IP/CIDR format, got '{}'", + ipAddressString.str())); + break; + case folly::CIDRNetworkError::INVALID_IP: { + auto const vec = splitIpSlashCidr(ipAddressString); + context.setStatus( + row, + Status::UserError( + "Invalid IP address '{}'", + vec.size() > 0 ? vec.at(0) : "")); + break; + } + case folly::CIDRNetworkError::INVALID_CIDR: { + auto const vec = splitIpSlashCidr(ipAddressString); + context.setStatus( + row, + Status::UserError( + "Mask value '{}' not a valid mask", + vec.size() > 1 ? vec.at(1) : "")); + break; + } + case folly::CIDRNetworkError::CIDR_MISMATCH: { + auto const vec = splitIpSlashCidr(ipAddressString); + auto const subnet = + folly::IPAddress::tryFromString(vec.at(0)).value(); + context.setStatus( + row, + Status::UserError( + "CIDR value '{}' is > network bit count '{}'", + vec.size() == 2 + ? vec.at(1) + : folly::to( + subnet.isV4() ? kIPV4Bits : kIPV6Bits), + subnet.bitCount())); + break; + } + default: + context.setStatus(row, Status::UserError()); + break; + } + } + return; + } + + auto net = maybeNet.value(); + if (net.first.isIPv4Mapped() || net.first.isV4()) { + // Take care of mask() throw condition + if (net.second > kIPV4Bits) { + context.setStatus( + row, + threadSkipErrorDetails() + ? Status::UserError() + : Status::UserError( + "CIDR value '{}' is > network bit count '{}'", + net.second, + kIPV4Bits)); + return; + } + addrBytes = folly::IPAddress::createIPv4(net.first) + .mask(net.second) + .createIPv6() + .toByteArray(); + } else { + // Take care of mask() throw condition + if (net.second > kIPV6Bits) { + context.setStatus( + row, + threadSkipErrorDetails() + ? Status::UserError() + : Status::UserError( + "CIDR value '{}' is > network bit count '{}'", + net.second, + kIPV6Bits)); + return; + } + addrBytes = folly::IPAddress::createIPv6(net.first) + .mask(net.second) + .toByteArray(); + } + + exec::StringWriter result(flatResult, row); + result.resize(kIPPrefixBytes); + memcpy(result.data(), &addrBytes, kIPAddressBytes); + result.data()[kIPAddressBytes] = net.second; + result.finalize(); + }); + } + + static void castToIPAddress( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipaddresses = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + const auto intAddr = ipaddresses->valueAt(row); + int128_t addrResult = 0; + folly::ByteArray16 addrBytes; + + memcpy(&addrBytes, intAddr.data(), kIPAddressBytes); + std::reverse(addrBytes.begin(), addrBytes.end()); + + memcpy(&addrResult, &addrBytes, kIPAddressBytes); + flatResult->set(row, addrResult); + }); + } + + static void castFromIPAddress( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipAddresses = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + auto ipAddress = ipAddresses->valueAt(row); + folly::ByteArray16 addrBytes; + + exec::StringWriter result(flatResult, row); + result.resize(kIPPrefixBytes); + + memcpy(&addrBytes, &ipAddress, kIPAddressBytes); + std::reverse(addrBytes.begin(), addrBytes.end()); + memcpy(result.data(), &addrBytes, kIPAddressBytes); + + folly::IPAddressV6 v6Addr(addrBytes); + if (v6Addr.isIPv4Mapped()) { + result.data()[kIPAddressBytes] = kIPV4Bits; + } else { + result.data()[kIPAddressBytes] = kIPV6Bits; + } + + result.finalize(); + }); + } +}; + +class IPPrefixTypeFactories : public CustomTypeFactories { + public: + TypePtr getType() const override { + return IPPrefixType::get(); + } + + exec::CastOperatorPtr getCastOperator() const override { + return std::make_shared(); + } +}; + +} // namespace + +void registerIPPrefixType() { + registerCustomType( + "ipprefix", std::make_unique()); +} + +} // namespace facebook::velox diff --git a/velox/functions/prestosql/types/IPPrefixType.h b/velox/functions/prestosql/types/IPPrefixType.h new file mode 100644 index 0000000000000..1e20c049ec0bd --- /dev/null +++ b/velox/functions/prestosql/types/IPPrefixType.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/type/SimpleFunctionApi.h" +#include "velox/type/Type.h" + +namespace facebook::velox { + +class IPPrefixType : public VarbinaryType { + IPPrefixType() = default; + + public: + static const std::shared_ptr& get() { + static const std::shared_ptr instance{ + new IPPrefixType()}; + + return instance; + } + + bool equivalent(const Type& other) const override { + // Pointer comparison works since this type is a singleton. + return this == &other; + } + + const char* name() const override { + return "IPPREFIX"; + } + + std::string toString() const override { + return name(); + } + + folly::dynamic serialize() const override { + folly::dynamic obj = folly::dynamic::object; + obj["name"] = "Type"; + obj["type"] = name(); + return obj; + } +}; + +FOLLY_ALWAYS_INLINE bool isIPPrefixType(const TypePtr& type) { + // Pointer comparison works since this type is a singleton. + return IPPrefixType::get() == type; +} + +FOLLY_ALWAYS_INLINE std::shared_ptr IPPREFIX() { + return IPPrefixType::get(); +} + +struct IPPrefixT { + using type = Varbinary; + static constexpr const char* typeName = "ipprefix"; +}; + +using IPPrefix = CustomType; + +void registerIPPrefixType(); + +} // namespace facebook::velox diff --git a/velox/functions/prestosql/types/UuidType.cpp b/velox/functions/prestosql/types/UuidType.cpp index 8d0b5b5b22fa0..5aa420113a69d 100644 --- a/velox/functions/prestosql/types/UuidType.cpp +++ b/velox/functions/prestosql/types/UuidType.cpp @@ -45,7 +45,7 @@ class UuidCastOperator : public exec::CastOperator { castFromString(input, context, rows, *result); } else { VELOX_UNSUPPORTED( - "Cast from {} to UUID not yet supported", resultType->toString()); + "Cast from {} to UUID not yet supported", input.type()->toString()); } } diff --git a/velox/functions/prestosql/types/tests/CMakeLists.txt b/velox/functions/prestosql/types/tests/CMakeLists.txt index e2b8105d268ef..6dbffad027d55 100644 --- a/velox/functions/prestosql/types/tests/CMakeLists.txt +++ b/velox/functions/prestosql/types/tests/CMakeLists.txt @@ -19,7 +19,8 @@ add_executable( TimestampWithTimeZoneTypeTest.cpp TypeTestBase.cpp UuidTypeTest.cpp - IPAddressTypeTest.cpp) + IPAddressTypeTest.cpp + IPPrefixTypeTest.cpp) add_test(velox_presto_types_test velox_presto_types_test) diff --git a/velox/functions/prestosql/types/tests/IPPrefixTypeTest.cpp b/velox/functions/prestosql/types/tests/IPPrefixTypeTest.cpp new file mode 100644 index 0000000000000..2e360bf48d114 --- /dev/null +++ b/velox/functions/prestosql/types/tests/IPPrefixTypeTest.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/prestosql/types/IPPrefixType.h" +#include "velox/functions/prestosql/types/tests/TypeTestBase.h" + +namespace facebook::velox::test { + +class IPPrefixTypeTest : public testing::Test, public TypeTestBase { + public: + IPPrefixTypeTest() { + registerIPPrefixType(); + } +}; + +TEST_F(IPPrefixTypeTest, basic) { + ASSERT_STREQ(IPPREFIX()->name(), "IPPREFIX"); + ASSERT_STREQ(IPPREFIX()->kindName(), "VARBINARY"); + ASSERT_EQ(IPPREFIX()->name(), "IPPREFIX"); + ASSERT_TRUE(IPPREFIX()->parameters().empty()); + + ASSERT_TRUE(hasType("IPPREFIX")); + ASSERT_EQ(*getType("IPPREFIX", {}), *IPPREFIX()); +} + +TEST_F(IPPrefixTypeTest, serde) { + testTypeSerde(IPPREFIX()); +} +} // namespace facebook::velox::test