diff --git a/velox/docs/develop/types.rst b/velox/docs/develop/types.rst index 62489f48da9e8..f111c74fc81f7 100644 --- a/velox/docs/develop/types.rst +++ b/velox/docs/develop/types.rst @@ -164,6 +164,7 @@ JSON VARCHAR TIMESTAMP WITH TIME ZONE BIGINT UUID HUGEINT IPADDRESS HUGEINT +IPPREFIX VARBINARY ======================== ===================== TIMESTAMP WITH TIME ZONE represents a time point in milliseconds precision @@ -174,7 +175,7 @@ Supported range of milliseconds is [0xFFF8000000000000L, 0x7FFFFFFFFFFFF] store timezone ID. Supported range of timezone ID is [1, 1680]. The definition of timezone IDs can be found in ``TimeZoneDatabase.cpp``. -IPADDRESS represents an IPV6 or IPV4 formatted IPV6 address. Its physical +IPADDRESS represents an IPv6 or IPv4 formatted IPv6 address. Its physical type is HUGEINT. The format that the address is stored in is defined as part of `(RFC 4291#section-2.5.5.2) `_ As Velox is run on Little Endian systems and the standard is network byte(Big Endian) order, we reverse the bytes to allow for masking and other bit operations @@ -182,6 +183,23 @@ used in IPADDRESS/IPPREFIX related functions. This type can be used to create IPPREFIX networks as well as to check IPADDRESS validity within IPPREFIX networks. +IPPREFIX represents an IPv6 or IPv4 formatted IPv6 address along with a one byte +prefix length. Its physical type is VARBINARY but has a fixed length of 17 bytes. +The format that the address is stored in is defined as part of `(RFC 4291#section-2.5.5.2) `_. +The prefix length is stored in the last byte of the VARBINARY array. +The IP address stored is the canonical(smallest) IP address in the +subnet range. This type can be used in IP subnet functions. + +Example: + +In this example the first 32 bits(*FFFF:FFFF*) represents the network prefix. +As a result the IPPREFIX object stores *FFFF:FFFF::* and the length 32 for both of these IPPREFIX objects. + +:: + + IPPREFIX 'FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF/32' -- IPPREFIX 'FFFF:FFFF:0000:0000:0000:0000:0000:0000/32' + IPPREFIX 'FFFF:FFFF:4455:6677:8899:AABB:CCDD:EEFF/32' -- IPPREFIX 'FFFF:FFFF:0000:0000:0000:0000:0000:0000/32' + Spark Types ~~~~~~~~~~~~ The `data types `_ in Spark have some semantic differences compared to those in diff --git a/velox/docs/functions/presto/conversion.rst b/velox/docs/functions/presto/conversion.rst index b24117ae3068a..74a7ab637b5a8 100644 --- a/velox/docs/functions/presto/conversion.rst +++ b/velox/docs/functions/presto/conversion.rst @@ -30,7 +30,7 @@ are supported if the conversion of their element types are supported. In additio supported conversions to/from JSON are listed in :doc:`json`. .. list-table:: - :widths: 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 + :widths: 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 :header-rows: 1 * - @@ -49,6 +49,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - interval day to second - decimal - ipaddress + - ipprefix * - tinyint - Y - Y @@ -65,6 +66,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - smallint - Y - Y @@ -81,6 +83,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - integer - Y - Y @@ -97,6 +100,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - bigint - Y - Y @@ -113,6 +117,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - boolean - Y - Y @@ -129,6 +134,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - real - Y - Y @@ -145,6 +151,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - double - Y - Y @@ -161,6 +168,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - varchar - Y - Y @@ -177,6 +185,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - Y + - Y * - varbinary - - @@ -193,6 +202,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - Y + - * - timestamp - - @@ -209,6 +219,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - * - timestamp with time zone - - @@ -225,6 +236,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - * - date - - @@ -241,6 +253,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - * - interval day to second - - @@ -257,6 +270,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - * - decimal - Y - Y @@ -273,6 +287,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - ipaddress - - @@ -288,7 +303,25 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - Y + - Y + * - ipprefix + - + - + - + - - + - + - + - Y + - + - + - + - + - + - + - Y + - Y Cast to Integral Types ---------------------- @@ -667,52 +700,79 @@ is the number of whole days in the interval, HH is then number of hours between From IPADDRESS ^^^^^^^^^^^^^^ -Casting from IPADDRESS to VARCHAR returns a string formatted as x.x.x.x for IPV4 formatted IPV6 addresses. -For all other IPV6 addresses it will be formatted in compressed alternate form IPV6 defined in `RFC 4291#section-2.2 `_ +Casting from IPADDRESS to VARCHAR returns a string formatted as x.x.x.x for IPv4 formatted IPv6 addresses. +For all other IPv6 addresses it will be formatted in compressed alternate form IPv6 defined in `RFC 4291#section-2.2 `_ -IPV4: +IPv4: :: SELECT cast(ipaddress '1.2.3.4' as varchar); -- '1.2.3.4' -IPV6: +IPv6: :: SELECT cast(ipaddress '2001:0db8:0000:0000:0000:ff00:0042:8329' as varchar); -- '2001:db8::ff00:42:8329' SELECT cast(ipaddress '0:0:0:0:0:0:13.1.68.3' as varchar); -- '::13.1.68.3' -IPV4 mapped IPV6: +IPv4 mapped IPv6: :: SELECT cast(ipaddress '::ffff:ffff:ffff' as varchar); -- '255.255.255.255' +From IPPREFIX +^^^^^^^^^^^^^ + +Casting from IPPREFIX to VARCHAR returns a string formatted as *x.x.x.x/* for IPv4 formatted IPv6 addresses. + +For all other IPv6 addresses it will be formatted in compressed alternate form IPv6 defined in `RFC 4291#section-2.2 `_ +followed by */*. `[RFC 4291#section-2.3] `_ + +IPv4: + +:: + + SELECT cast(ipprefix '1.2.0.0/16' as varchar); -- '1.2.0.0/16' + +IPv6: + +:: + + SELECT cast(ipprefix '2001:db8::ff00:42:8329/128' as varchar); -- '2001:db8::ff00:42:8329/128' + SELECT cast(ipprefix '0:0:0:0:0:0:13.1.68.3/32' as varchar); -- '::/32' + +IPv4 mapped IPv6: + +:: + + SELECT cast(ipaddress '::ffff:ffff:0000/16' as varchar); -- '255.255.0.0/16' + Cast to VARBINARY ----------------- From IPADDRESS ^^^^^^^^^^^^^^ -Returns the IPV6 address as a 16 byte varbinary string in network byte order. +Returns the IPv6 address as a 16 byte varbinary string in network byte order. Internally, the type is a pure IPv6 address. Support for IPv4 is handled using the IPv4-mapped IPv6 address range `(RFC 4291#section-2.5.5.2) `_. When creating an IPADDRESS, IPv4 addresses will be mapped into that range. -IPV6: +IPv6: :: SELECT cast(ipaddress '2001:0db8:0000:0000:0000:ff00:0042:8329' as varbinary); -- 0x20010db8000000000000ff0000428329 -IPV4: +IPv4: :: SELECT cast('1.2.3.4' as ipaddress); -- 0x00000000000000000000ffff01020304 -IPV4 mapped IPV6: +IPv4 mapped IPv6: :: @@ -1036,16 +1096,18 @@ Invalid example Cast to IPADDRESS ----------------- +.. _ipaddress-varchar: + From VARCHAR ^^^^^^^^^^^^ To cast a varchar to IPAddress input string must be in the form of either -IPV4 or IPV6. +IPv4 or IPv6. -For IPV4 it must be in the form of: +For IPv4 it must be in the form of: x.x.x.x where each x is an integer value between 0-255. -For IPV6 it must follow any of the forms defined in `RFC 4291#section-2.2 `_. +For IPv6 it must follow any of the forms defined in `RFC 4291#section-2.2 `_. Full form: @@ -1087,16 +1149,16 @@ Invalid examples: From VARBINARY ^^^^^^^^^^^^^^ -To cast a varbinary to IPAddress it must be either IPV4(4 Bytes) -or IPV6(16 Bytes) in network byte order. +To cast a varbinary to IPAddress it must be either IPv4(4 Bytes) +or IPv6(16 Bytes) in network byte order. -IPV4: +IPv4: :: [01, 02, 03, 04] -> 1.2.3.4 -IPV6: +IPv6: :: @@ -1108,7 +1170,7 @@ When creating an IPADDRESS, IPv4 addresses will be mapped into that range. When formatting an IPADDRESS, any address within the mapped range will be formatted as an IPv4 address. Other addresses will be formatted as IPv6 using the canonical format defined in `RFC 5952 `_. -IPV6 mapped IPV4 address: +IPv6 mapped IPv4 address: :: @@ -1128,6 +1190,67 @@ Invalid examples: SELECT cast(from_hex('f000001100') as ipaddress); -- Invalid IP address binary length: 5 +From IPPREFIX +^^^^^^^^^^^^^ + +Returns the canonical(lowest) IPADDRESS in the subnet range. + +Examples: + +:: + + SELECT cast(ipprefix '1.2.3.4/24' as ipaddress) -- ipaddress '1.2.3.0' + SELECT cast(ipprefix '2001:db8::ff00:42:8329/64' as ipaddress) -- ipaddress '2001:db8::' + +Cast to IPPREFIX +---------------- + +From VARCHAR +^^^^^^^^^^^^ + +The IPPREFIX string must be in the form of */* as defined in `(RFC 4291#section-2.3) `_ +The IPADDRESS portion of the IPPREFIX follows the same rules as casting +`IPADDRESS to VARCHAR <#ipaddress-varchar>`_. + +The prefix portion must be <= 32 if the IP is an IPv4 address or <= 128 for an IPv6 address. +As with IPADDRESS, any IPv6 address in the form of an IPv4 mapped IPv6 address will be +interpreted as an IPv4 address. Only the canonical(smallest) IP address will be stored +in the IPPREFIX. + +Examples: + +Valid examples: + +:: + + SELECT cast('2001:0db8:0000:0000:0000:ff00:0042:8329/32' as ipprefix); -- ipprefix '2001:0db8::/32' + SELECT cast('1.2.3.4/24' as ipprefix); -- ipprefix '1.2.3.0/24' + SELECT cast('::ffff:ffff:ffff/16' as ipprefix); -- ipprefix '255.255.0.0/16' + +Invalid examples: + +:: + + SELECT cast('2001:db8::1::1/1' as ipprefix); -- Invalid IP address '2001:db8::1::1' + SELECT cast('2001:0db8:0000:0000:0000:ff00:0042:8329/129' as ipprefix); -- CIDR value '129' is > network bit count '128' + SELECT cast('2001:0db8:0000:0000:0000:ff00:0042:8329/-1' as ipprefix); -- Mask value '-1' not a valid mask + SELECT cast('255.2.3.4/33' as ipprefix); -- CIDR value '33' is > network bit count '32' + SELECT cast('::ffff:ffff:ffff/33' as ipprefix); -- CIDR value '33' is > network bit count '32' + + +From IPADDRESS +^^^^^^^^^^^^^^ + +Returns an IPPREFIX where the prefix length is the length of the entire IP Address. +Prefix length for IPv4 is 32 and for IPv6 it is 128. + +Examples: + +:: + + SELECT cast(ipaddress '1.2.3.4' as ipprefix) -- ipprefix '1.2.3.4/32' + SELECT cast(ipaddress '2001:db8::ff00:42:8329' as ipprefix) -- ipprefix '2001:db8::ff00:42:8329/128' + Miscellaneous ------------- diff --git a/velox/expression/tests/CustomTypeTest.cpp b/velox/expression/tests/CustomTypeTest.cpp index c55015d85900e..97afc1f6c79d8 100644 --- a/velox/expression/tests/CustomTypeTest.cpp +++ b/velox/expression/tests/CustomTypeTest.cpp @@ -217,6 +217,7 @@ TEST_F(CustomTypeTest, getCustomTypeNames) { "TIMESTAMP WITH TIME ZONE", "UUID", "IPADDRESS", + "IPPREFIX", }), names); @@ -231,6 +232,7 @@ TEST_F(CustomTypeTest, getCustomTypeNames) { "TIMESTAMP WITH TIME ZONE", "UUID", "IPADDRESS", + "IPPREFIX", "FANCY_INT", }), names); diff --git a/velox/functions/prestosql/IPAddressFunctions.h b/velox/functions/prestosql/IPAddressFunctions.h index abd214a9aba47..a08ef92de5343 100644 --- a/velox/functions/prestosql/IPAddressFunctions.h +++ b/velox/functions/prestosql/IPAddressFunctions.h @@ -16,11 +16,13 @@ #pragma once #include "velox/functions/prestosql/types/IPAddressType.h" +#include "velox/functions/prestosql/types/IPPrefixType.h" namespace facebook::velox::functions { void registerIPAddressFunctions(const std::string& prefix) { registerIPAddressType(); + registerIPPrefixType(); } } // namespace facebook::velox::functions diff --git a/velox/functions/prestosql/TypeOf.cpp b/velox/functions/prestosql/TypeOf.cpp index 77a4e65387965..6df17f29ad61e 100644 --- a/velox/functions/prestosql/TypeOf.cpp +++ b/velox/functions/prestosql/TypeOf.cpp @@ -16,6 +16,7 @@ #include "velox/expression/VectorFunction.h" #include "velox/functions/prestosql/types/HyperLogLogType.h" #include "velox/functions/prestosql/types/IPAddressType.h" +#include "velox/functions/prestosql/types/IPPrefixType.h" #include "velox/functions/prestosql/types/JsonType.h" #include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" #include "velox/functions/prestosql/types/UuidType.h" @@ -78,6 +79,8 @@ std::string typeName(const TypePtr& type) { case TypeKind::VARBINARY: if (isHyperLogLogType(type)) { return "HyperLogLog"; + } else if (isIPPrefixType(type)) { + return "ipprefix"; } return "varbinary"; case TypeKind::TIMESTAMP: diff --git a/velox/functions/prestosql/tests/CMakeLists.txt b/velox/functions/prestosql/tests/CMakeLists.txt index 2c0d1696c275e..477cf20937694 100644 --- a/velox/functions/prestosql/tests/CMakeLists.txt +++ b/velox/functions/prestosql/tests/CMakeLists.txt @@ -65,6 +65,7 @@ add_executable( HyperLogLogFunctionsTest.cpp InPredicateTest.cpp IPAddressCastTest.cpp + IPPrefixCastTest.cpp JsonCastTest.cpp JsonExtractScalarTest.cpp JsonFunctionsTest.cpp diff --git a/velox/functions/prestosql/tests/IPAddressCastTest.cpp b/velox/functions/prestosql/tests/IPAddressCastTest.cpp index 0f62b6b724808..a13ec9114e94a 100644 --- a/velox/functions/prestosql/tests/IPAddressCastTest.cpp +++ b/velox/functions/prestosql/tests/IPAddressCastTest.cpp @@ -24,20 +24,20 @@ namespace { class IPAddressCastTest : public functions::test::FunctionBaseTest { protected: std::optional castToVarchar( - const std::optional input) { + const std::optional& input) { auto result = evaluateOnce( "cast(cast(c0 as ipaddress) as varchar)", input); return result; } std::optional castFromVarbinary( - const std::optional input) { + const std::optional& input) { auto result = evaluateOnce("cast(from_hex(c0) as ipaddress)", input); return result; } - std::optional allCasts(const std::optional input) { + std::optional allCasts(const std::optional& input) { auto result = evaluateOnce( "cast(cast(cast(cast(c0 as ipaddress) as varbinary) as ipaddress) as varchar)", input); @@ -45,7 +45,7 @@ class IPAddressCastTest : public functions::test::FunctionBaseTest { } }; -int128_t stringToInt128(std::string value) { +int128_t stringToInt128(const std::string& value) { int128_t res = 0; for (char c : value) { res = res * 10 + c - '0'; diff --git a/velox/functions/prestosql/tests/IPPrefixCastTest.cpp b/velox/functions/prestosql/tests/IPPrefixCastTest.cpp new file mode 100644 index 0000000000000..419e03e2f048d --- /dev/null +++ b/velox/functions/prestosql/tests/IPPrefixCastTest.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/common/base/tests/GTestUtils.h" +#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" + +namespace facebook::velox::functions::prestosql { + +namespace { + +class IPPrefixCastTest : public functions::test::FunctionBaseTest { + protected: + std::optional castToVarchar( + const std::optional& input) { + auto result = evaluateOnce( + "cast(cast(c0 as ipprefix) as varchar)", input); + return result; + } + + std::optional castFromIPAddress( + const std::optional& input) { + auto result = evaluateOnce( + "cast(cast(cast(c0 as ipaddress) as ipprefix) as varchar)", input); + return result; + } + + std::optional castToIPAddress( + const std::optional& input) { + auto result = evaluateOnce( + "cast(cast(c0 as ipprefix) as ipaddress)", input); + return result; + } +}; + +int128_t stringToInt128(const std::string& value) { + int128_t res = 0; + for (char c : value) { + res = res * 10 + c - '0'; + } + return res; +} + +TEST_F(IPPrefixCastTest, varcharCast) { + EXPECT_EQ(castToVarchar("::ffff:1.2.3.4/24"), "1.2.3.0/24"); + EXPECT_EQ(castToVarchar("192.168.0.0/24"), "192.168.0.0/24"); + EXPECT_EQ(castToVarchar("255.2.3.4/0"), "0.0.0.0/0"); + EXPECT_EQ(castToVarchar("255.2.3.4/1"), "128.0.0.0/1"); + EXPECT_EQ(castToVarchar("255.2.3.4/2"), "192.0.0.0/2"); + EXPECT_EQ(castToVarchar("255.2.3.4/4"), "240.0.0.0/4"); + EXPECT_EQ(castToVarchar("1.2.3.4/8"), "1.0.0.0/8"); + EXPECT_EQ(castToVarchar("1.2.3.4/16"), "1.2.0.0/16"); + EXPECT_EQ(castToVarchar("1.2.3.4/24"), "1.2.3.0/24"); + EXPECT_EQ(castToVarchar("1.2.3.255/25"), "1.2.3.128/25"); + EXPECT_EQ(castToVarchar("1.2.3.255/26"), "1.2.3.192/26"); + EXPECT_EQ(castToVarchar("1.2.3.255/28"), "1.2.3.240/28"); + EXPECT_EQ(castToVarchar("1.2.3.255/30"), "1.2.3.252/30"); + EXPECT_EQ(castToVarchar("1.2.3.255/32"), "1.2.3.255/32"); + EXPECT_EQ( + castToVarchar("2001:0db8:0000:0000:0000:ff00:0042:8329/128"), + "2001:db8::ff00:42:8329/128"); + EXPECT_EQ( + castToVarchar("2001:db8::ff00:42:8329/128"), + "2001:db8::ff00:42:8329/128"); + EXPECT_EQ(castToVarchar("2001:db8:0:0:1:0:0:1/128"), "2001:db8::1:0:0:1/128"); + EXPECT_EQ(castToVarchar("2001:db8:0:0:1::1/128"), "2001:db8::1:0:0:1/128"); + EXPECT_EQ(castToVarchar("2001:db8::1:0:0:1/128"), "2001:db8::1:0:0:1/128"); + EXPECT_EQ( + castToVarchar("2001:DB8::FF00:ABCD:12EF/128"), + "2001:db8::ff00:abcd:12ef/128"); + EXPECT_EQ(castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/0"), "::/0"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/1"), "8000::/1"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/2"), "c000::/2"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/4"), "f000::/4"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/8"), "ff00::/8"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/16"), "ffff::/16"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/32"), + "ffff:ffff::/32"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/48"), + "ffff:ffff:ffff::/48"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/64"), + "ffff:ffff:ffff:ffff::/64"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/80"), + "ffff:ffff:ffff:ffff:ffff::/80"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/96"), + "ffff:ffff:ffff:ffff:ffff:ffff::/96"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/112"), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:0/112"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/120"), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00/120"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/124"), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0/124"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/126"), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffc/126"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/127"), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe/127"); + EXPECT_EQ( + castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128"), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128"); + EXPECT_EQ(castToVarchar("10.0.0.0/32"), "10.0.0.0/32"); + EXPECT_EQ(castToVarchar("64:ff9b::10.0.0.0/128"), "64:ff9b::a00:0/128"); +} + +TEST_F(IPPrefixCastTest, invalidIPPrefix) { + VELOX_ASSERT_THROW( + castToVarchar("facebook.com/32"), "Invalid IP address 'facebook.com'"); + VELOX_ASSERT_THROW( + castToVarchar("localhost/32"), "Invalid IP address 'localhost'"); + VELOX_ASSERT_THROW( + castToVarchar("2001:db8::1::1/128"), + "Invalid IP address '2001:db8::1::1'"); + VELOX_ASSERT_THROW( + castToVarchar("2001:zxy::1::1/128"), + "Invalid IP address '2001:zxy::1::1'"); + VELOX_ASSERT_THROW( + castToVarchar("789.1.1.1/32"), "Invalid IP address '789.1.1.1'"); + VELOX_ASSERT_THROW( + castToVarchar("192.1.1.1"), + "Invalid CIDR IP address specified. Expected IP/PREFIX format, got '192.1.1.1'"); + VELOX_ASSERT_THROW( + castToVarchar("192.1.1.1/128"), + "CIDR value '128' is > network bit count '32'"); + VELOX_ASSERT_THROW( + castToVarchar("192.1.1.1/-1"), "Mask value '-1' not a valid mask"); + VELOX_ASSERT_THROW( + castToVarchar("::ffff:ffff:ffff/33"), + "CIDR value '33' is > network bit count '32'"); + VELOX_ASSERT_THROW( + castToVarchar("::ffff:ffff:ffff/-1"), "Mask value '-1' not a valid mask"); + VELOX_ASSERT_THROW( + castToVarchar("::/129"), "CIDR value '129' is > network bit count '128'"); + VELOX_ASSERT_THROW( + castToVarchar("::/-1"), "Mask value '-1' not a valid mask"); +} + +TEST_F(IPPrefixCastTest, fromIPAddressCast) { + EXPECT_EQ(castFromIPAddress("1.2.3.4"), "1.2.3.4/32"); + EXPECT_EQ(castFromIPAddress("::ffff:102:304"), "1.2.3.4/32"); + EXPECT_EQ(castFromIPAddress("::1"), "::1/128"); + EXPECT_EQ( + castFromIPAddress("2001:db8::ff00:42:8329"), + "2001:db8::ff00:42:8329/128"); +} + +TEST_F(IPPrefixCastTest, toIPAddressCast) { + EXPECT_EQ(castToIPAddress("1.2.3.4/32"), stringToInt128("281470698652420")); + EXPECT_EQ(castToIPAddress("1.2.3.4/24"), stringToInt128("281470698652416")); + EXPECT_EQ(castToIPAddress("::1/128"), stringToInt128("1")); + EXPECT_EQ( + castToIPAddress("2001:db8::ff00:42:8329/128"), + stringToInt128("42540766411282592856904265327123268393")); + EXPECT_EQ( + castToIPAddress("2001:db8::ff00:42:8329/64"), + stringToInt128("42540766411282592856903984951653826560")); +} + +} // namespace + +} // namespace facebook::velox::functions::prestosql diff --git a/velox/functions/prestosql/types/CMakeLists.txt b/velox/functions/prestosql/types/CMakeLists.txt index 0089307a4f071..14c407ba89cec 100644 --- a/velox/functions/prestosql/types/CMakeLists.txt +++ b/velox/functions/prestosql/types/CMakeLists.txt @@ -17,7 +17,8 @@ velox_add_library( JsonType.cpp TimestampWithTimeZoneType.cpp UuidType.cpp - IPAddressType.cpp) + IPAddressType.cpp + IPPrefixType.cpp) velox_link_libraries( velox_presto_types diff --git a/velox/functions/prestosql/types/IPAddressType.cpp b/velox/functions/prestosql/types/IPAddressType.cpp index 691ca0a28ce2b..9bf87bd78dd53 100644 --- a/velox/functions/prestosql/types/IPAddressType.cpp +++ b/velox/functions/prestosql/types/IPAddressType.cpp @@ -17,11 +17,11 @@ #include "velox/functions/prestosql/types/IPAddressType.h" #include #include "velox/expression/CastExpr.h" +#include "velox/functions/prestosql/types/IPPrefixType.h" static constexpr int kIPV4AddressBytes = 4; static constexpr int kIPV4ToV6FFIndex = 10; static constexpr int kIPV4ToV6Index = 12; -static constexpr int kIPAddressBytes = 16; namespace facebook::velox { @@ -60,10 +60,14 @@ class IPAddressCastOperator : public exec::CastOperator { if (input.typeKind() == TypeKind::VARCHAR) { castFromString(input, context, rows, *result); } else if (input.typeKind() == TypeKind::VARBINARY) { - castFromVarbinary(input, context, rows, *result); + if (isIPPrefixType(input.type())) { + castFromIPPrefix(input, context, rows, *result); + } else { + castFromVarbinary(input, context, rows, *result); + } } else { VELOX_UNSUPPORTED( - "Cast from {} to IPAddress not supported", resultType->toString()); + "Cast from {} to IPAddress not supported", input.type()->toString()); } } @@ -78,7 +82,11 @@ class IPAddressCastOperator : public exec::CastOperator { if (resultType->kind() == TypeKind::VARCHAR) { castToString(input, context, rows, *result); } else if (resultType->kind() == TypeKind::VARBINARY) { - castToVarbinary(input, context, rows, *result); + if (isIPPrefixType(resultType)) { + castToIPPrefix(input, context, rows, *result); + } else { + castToVarbinary(input, context, rows, *result); + } } else { VELOX_UNSUPPORTED( "Cast from IPAddress to {} not supported", resultType->toString()); @@ -206,6 +214,57 @@ class IPAddressCastOperator : public exec::CastOperator { flatResult->set(row, intAddr); }); } + + static void castFromIPPrefix( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* prefixes = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + const auto prefix = prefixes->valueAt(row); + int128_t addrResult = 0; + folly::ByteArray16 addrBytes; + + memcpy(&addrBytes, prefix.data(), kIPAddressBytes); + std::reverse(addrBytes.begin(), addrBytes.end()); + + memcpy(&addrResult, &addrBytes, kIPAddressBytes); + flatResult->set(row, addrResult); + }); + } + + static void castToIPPrefix( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipAddresses = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + auto ipAddress = ipAddresses->valueAt(row); + folly::ByteArray16 addrBytes; + + exec::StringWriter result(flatResult, row); + result.resize(kIPPrefixBytes); + + memcpy(&addrBytes, &ipAddress, kIPAddressBytes); + std::reverse(addrBytes.begin(), addrBytes.end()); + memcpy(result.data(), &addrBytes, kIPAddressBytes); + + folly::IPAddressV6 v6Addr(addrBytes); + if (v6Addr.isIPv4Mapped()) { + result.data()[kIPAddressBytes] = kIPV4Bits; + } else { + result.data()[kIPAddressBytes] = kIPV6Bits; + } + + result.finalize(); + }); + } }; class IPAddressTypeFactories : public CustomTypeFactories { diff --git a/velox/functions/prestosql/types/IPAddressType.h b/velox/functions/prestosql/types/IPAddressType.h index e1e2d9fc1bf28..4c6dbe0af9c05 100644 --- a/velox/functions/prestosql/types/IPAddressType.h +++ b/velox/functions/prestosql/types/IPAddressType.h @@ -18,6 +18,11 @@ #include "velox/type/SimpleFunctionApi.h" #include "velox/type/Type.h" +static constexpr int kIPAddressBytes = 16; +static constexpr int kIPPrefixBytes = 17; +static constexpr uint8_t kIPV4Bits = 32; +static constexpr uint8_t kIPV6Bits = 128; + namespace facebook::velox { class IPAddressType : public HugeintType { diff --git a/velox/functions/prestosql/types/IPPrefixType.cpp b/velox/functions/prestosql/types/IPPrefixType.cpp new file mode 100644 index 0000000000000..0a5da3bad7ed3 --- /dev/null +++ b/velox/functions/prestosql/types/IPPrefixType.cpp @@ -0,0 +1,326 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/functions/prestosql/types/IPPrefixType.h" +#include +#include +#include "velox/expression/CastExpr.h" +#include "velox/functions/prestosql/types/IPAddressType.h" + +namespace facebook::velox { + +namespace { + +class IPPrefixCastOperator : public exec::CastOperator { + public: + bool isSupportedFromType(const TypePtr& other) const override { + switch (other->kind()) { + case TypeKind::VARCHAR: + return true; + case TypeKind::HUGEINT: + if (isIPAddressType(other)) { + return true; + } + default: + return false; + } + } + + bool isSupportedToType(const TypePtr& other) const override { + switch (other->kind()) { + case TypeKind::VARCHAR: + return true; + case TypeKind::HUGEINT: + if (isIPAddressType(other)) { + return true; + } + default: + return false; + } + } + + void castTo( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + const TypePtr& resultType, + VectorPtr& result) const override { + context.ensureWritable(rows, resultType, result); + + if (input.typeKind() == TypeKind::VARCHAR) { + castFromString(input, context, rows, *result); + } else if (isIPAddressType(input.type())) { + castFromIPAddress(input, context, rows, *result); + } else { + VELOX_UNSUPPORTED( + "Cast from {} to IPPrefix not yet supported", + input.type()->toString()); + } + } + + void castFrom( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + const TypePtr& resultType, + VectorPtr& result) const override { + context.ensureWritable(rows, resultType, result); + + if (resultType->kind() == TypeKind::VARCHAR) { + castToString(input, context, rows, *result); + } else if (isIPAddressType(resultType)) { + castToIPAddress(input, context, rows, *result); + } else { + VELOX_UNSUPPORTED( + "Cast from IPPrefix to {} not yet supported", resultType->toString()); + } + } + + private: + static void castToString( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipaddresses = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + const auto intAddr = ipaddresses->valueAt(row); + folly::ByteArray16 addrBytes; + + memcpy(&addrBytes, intAddr.data(), kIPAddressBytes); + folly::IPAddressV6 v6Addr(addrBytes); + + exec::StringWriter result(flatResult, row); + if (v6Addr.isIPv4Mapped()) { + result.append(fmt::format( + "{}/{}", + v6Addr.createIPv4().str(), + (uint8_t)intAddr.data()[kIPAddressBytes])); + } else { + result.append(fmt::format( + "{}/{}", v6Addr.str(), (uint8_t)intAddr.data()[kIPAddressBytes])); + } + result.finalize(); + }); + } + + static folly::small_vector splitIpSlashCidr( + const folly::StringPiece& ipSlashCidr) { + folly::small_vector vec; + folly::split('/', ipSlashCidr, vec); + return vec; + } + + static void castFromString( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipAddressStrings = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + auto ipAddressString = ipAddressStrings->valueAt(row); + + // Folly allows for creation of networks without a "/" so check to make + // sure that we have one. + if (ipAddressString.str().find('/') == std::string::npos) { + context.setStatus( + row, + threadSkipErrorDetails() + ? Status::UserError() + : Status::UserError( + "Invalid CIDR IP address specified. Expected IP/PREFIX format, got '{}'", + ipAddressString.str())); + return; + } + + folly::ByteArray16 addrBytes; + auto const maybeNet = + folly::IPAddress::tryCreateNetwork(ipAddressString, -1, false); + + if (maybeNet.hasError()) { + if (threadSkipErrorDetails()) { + context.setStatus(row, Status::UserError()); + } else { + switch (maybeNet.error()) { + case folly::CIDRNetworkError::INVALID_DEFAULT_CIDR: + context.setStatus( + row, Status::UserError("defaultCidr must be <= UINT8_MAX")); + break; + case folly::CIDRNetworkError::INVALID_IP_SLASH_CIDR: + context.setStatus( + row, + Status::UserError( + "Invalid CIDR IP address specified. Expected IP/PREFIX format, got '{}'", + ipAddressString.str())); + break; + case folly::CIDRNetworkError::INVALID_IP: { + auto const vec = splitIpSlashCidr(ipAddressString); + context.setStatus( + row, + Status::UserError( + "Invalid IP address '{}'", + vec.size() > 0 ? vec.at(0) : "")); + break; + } + case folly::CIDRNetworkError::INVALID_CIDR: { + auto const vec = splitIpSlashCidr(ipAddressString); + context.setStatus( + row, + Status::UserError( + "Mask value '{}' not a valid mask", + vec.size() > 1 ? vec.at(1) : "")); + break; + } + case folly::CIDRNetworkError::CIDR_MISMATCH: { + auto const vec = splitIpSlashCidr(ipAddressString); + auto const subnet = + folly::IPAddress::tryFromString(vec.at(0)).value(); + context.setStatus( + row, + Status::UserError( + "CIDR value '{}' is > network bit count '{}'", + vec.size() == 2 + ? vec.at(1) + : folly::to( + subnet.isV4() ? kIPV4Bits : kIPV6Bits), + subnet.bitCount())); + break; + } + default: + context.setStatus(row, Status::UserError()); + break; + } + } + return; + } + + auto net = maybeNet.value(); + if (net.first.isIPv4Mapped() || net.first.isV4()) { + if (net.second > kIPV4Bits) { + context.setStatus( + row, + threadSkipErrorDetails() + ? Status::UserError() + : Status::UserError( + "CIDR value '{}' is > network bit count '{}'", + net.second, + kIPV4Bits)); + return; + } + addrBytes = folly::IPAddress::createIPv4(net.first) + .mask(net.second) + .createIPv6() + .toByteArray(); + } else { + if (net.second > kIPV6Bits) { + context.setStatus( + row, + threadSkipErrorDetails() + ? Status::UserError() + : Status::UserError( + "CIDR value '{}' is > network bit count '{}'", + net.second, + kIPV6Bits)); + return; + } + addrBytes = folly::IPAddress::createIPv6(net.first) + .mask(net.second) + .toByteArray(); + } + + exec::StringWriter result(flatResult, row); + result.resize(kIPPrefixBytes); + memcpy(result.data(), &addrBytes, kIPAddressBytes); + result.data()[kIPAddressBytes] = net.second; + result.finalize(); + }); + } + + static void castToIPAddress( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipaddresses = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + const auto intAddr = ipaddresses->valueAt(row); + int128_t addrResult = 0; + folly::ByteArray16 addrBytes; + + memcpy(&addrBytes, intAddr.data(), kIPAddressBytes); + std::reverse(addrBytes.begin(), addrBytes.end()); + + memcpy(&addrResult, &addrBytes, kIPAddressBytes); + flatResult->set(row, addrResult); + }); + } + + static void castFromIPAddress( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipAddresses = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + auto ipAddress = ipAddresses->valueAt(row); + folly::ByteArray16 addrBytes; + + exec::StringWriter result(flatResult, row); + result.resize(kIPPrefixBytes); + + memcpy(&addrBytes, &ipAddress, kIPAddressBytes); + std::reverse(addrBytes.begin(), addrBytes.end()); + memcpy(result.data(), &addrBytes, kIPAddressBytes); + + folly::IPAddressV6 v6Addr(addrBytes); + if (v6Addr.isIPv4Mapped()) { + result.data()[kIPAddressBytes] = kIPV4Bits; + } else { + result.data()[kIPAddressBytes] = kIPV6Bits; + } + + result.finalize(); + }); + } +}; + +class IPPrefixTypeFactories : public CustomTypeFactories { + public: + TypePtr getType() const override { + return IPPrefixType::get(); + } + + exec::CastOperatorPtr getCastOperator() const override { + return std::make_shared(); + } +}; + +} // namespace + +void registerIPPrefixType() { + registerCustomType( + "ipprefix", std::make_unique()); +} + +} // namespace facebook::velox diff --git a/velox/functions/prestosql/types/IPPrefixType.h b/velox/functions/prestosql/types/IPPrefixType.h new file mode 100644 index 0000000000000..1e20c049ec0bd --- /dev/null +++ b/velox/functions/prestosql/types/IPPrefixType.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/type/SimpleFunctionApi.h" +#include "velox/type/Type.h" + +namespace facebook::velox { + +class IPPrefixType : public VarbinaryType { + IPPrefixType() = default; + + public: + static const std::shared_ptr& get() { + static const std::shared_ptr instance{ + new IPPrefixType()}; + + return instance; + } + + bool equivalent(const Type& other) const override { + // Pointer comparison works since this type is a singleton. + return this == &other; + } + + const char* name() const override { + return "IPPREFIX"; + } + + std::string toString() const override { + return name(); + } + + folly::dynamic serialize() const override { + folly::dynamic obj = folly::dynamic::object; + obj["name"] = "Type"; + obj["type"] = name(); + return obj; + } +}; + +FOLLY_ALWAYS_INLINE bool isIPPrefixType(const TypePtr& type) { + // Pointer comparison works since this type is a singleton. + return IPPrefixType::get() == type; +} + +FOLLY_ALWAYS_INLINE std::shared_ptr IPPREFIX() { + return IPPrefixType::get(); +} + +struct IPPrefixT { + using type = Varbinary; + static constexpr const char* typeName = "ipprefix"; +}; + +using IPPrefix = CustomType; + +void registerIPPrefixType(); + +} // namespace facebook::velox diff --git a/velox/functions/prestosql/types/UuidType.cpp b/velox/functions/prestosql/types/UuidType.cpp index 8d0b5b5b22fa0..5aa420113a69d 100644 --- a/velox/functions/prestosql/types/UuidType.cpp +++ b/velox/functions/prestosql/types/UuidType.cpp @@ -45,7 +45,7 @@ class UuidCastOperator : public exec::CastOperator { castFromString(input, context, rows, *result); } else { VELOX_UNSUPPORTED( - "Cast from {} to UUID not yet supported", resultType->toString()); + "Cast from {} to UUID not yet supported", input.type()->toString()); } } diff --git a/velox/functions/prestosql/types/tests/CMakeLists.txt b/velox/functions/prestosql/types/tests/CMakeLists.txt index e2b8105d268ef..6dbffad027d55 100644 --- a/velox/functions/prestosql/types/tests/CMakeLists.txt +++ b/velox/functions/prestosql/types/tests/CMakeLists.txt @@ -19,7 +19,8 @@ add_executable( TimestampWithTimeZoneTypeTest.cpp TypeTestBase.cpp UuidTypeTest.cpp - IPAddressTypeTest.cpp) + IPAddressTypeTest.cpp + IPPrefixTypeTest.cpp) add_test(velox_presto_types_test velox_presto_types_test) diff --git a/velox/functions/prestosql/types/tests/IPPrefixTypeTest.cpp b/velox/functions/prestosql/types/tests/IPPrefixTypeTest.cpp new file mode 100644 index 0000000000000..2e360bf48d114 --- /dev/null +++ b/velox/functions/prestosql/types/tests/IPPrefixTypeTest.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/prestosql/types/IPPrefixType.h" +#include "velox/functions/prestosql/types/tests/TypeTestBase.h" + +namespace facebook::velox::test { + +class IPPrefixTypeTest : public testing::Test, public TypeTestBase { + public: + IPPrefixTypeTest() { + registerIPPrefixType(); + } +}; + +TEST_F(IPPrefixTypeTest, basic) { + ASSERT_STREQ(IPPREFIX()->name(), "IPPREFIX"); + ASSERT_STREQ(IPPREFIX()->kindName(), "VARBINARY"); + ASSERT_EQ(IPPREFIX()->name(), "IPPREFIX"); + ASSERT_TRUE(IPPREFIX()->parameters().empty()); + + ASSERT_TRUE(hasType("IPPREFIX")); + ASSERT_EQ(*getType("IPPREFIX", {}), *IPPREFIX()); +} + +TEST_F(IPPrefixTypeTest, serde) { + testTypeSerde(IPPREFIX()); +} +} // namespace facebook::velox::test