From bfcdffbb2b181df7470832fabaf2ab00c45c0ca0 Mon Sep 17 00:00:00 2001 From: mohsaka <135669458+mohsaka@users.noreply.github.com> Date: Mon, 28 Oct 2024 16:10:28 -0700 Subject: [PATCH] Add IPPREFIX <-> VARCHAR cast --- velox/docs/functions/presto/conversion.rst | 137 +++++++++++++--- velox/functions/prestosql/TypeOf.cpp | 5 +- .../functions/prestosql/tests/CMakeLists.txt | 2 +- .../prestosql/types/IPAddressType.cpp | 1 - .../functions/prestosql/types/IPAddressType.h | 2 + .../prestosql/types/IPPrefixType.cpp | 147 +++++++++++++++++- 6 files changed, 264 insertions(+), 30 deletions(-) diff --git a/velox/docs/functions/presto/conversion.rst b/velox/docs/functions/presto/conversion.rst index b24117ae3068a..1be330e1a0985 100644 --- a/velox/docs/functions/presto/conversion.rst +++ b/velox/docs/functions/presto/conversion.rst @@ -30,7 +30,7 @@ are supported if the conversion of their element types are supported. In additio supported conversions to/from JSON are listed in :doc:`json`. .. list-table:: - :widths: 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 + :widths: 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 :header-rows: 1 * - @@ -49,6 +49,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - interval day to second - decimal - ipaddress + - ipprefix * - tinyint - Y - Y @@ -65,6 +66,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - smallint - Y - Y @@ -81,6 +83,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - integer - Y - Y @@ -97,6 +100,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - bigint - Y - Y @@ -113,6 +117,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - boolean - Y - Y @@ -129,6 +134,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - real - Y - Y @@ -145,6 +151,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - double - Y - Y @@ -161,6 +168,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - varchar - Y - Y @@ -177,6 +185,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - Y + - Y * - varbinary - - @@ -193,6 +202,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - Y + - * - timestamp - - @@ -209,6 +219,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - * - timestamp with time zone - - @@ -225,6 +236,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - * - date - - @@ -241,6 +253,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - * - interval day to second - - @@ -257,6 +270,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - * - decimal - Y - Y @@ -273,6 +287,7 @@ supported conversions to/from JSON are listed in :doc:`json`. - - Y - + - * - ipaddress - - @@ -287,8 +302,26 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - + - + - Y + - + * - ipprefix - - + - + - + - + - + - + - Y + - + - + - + - + - + - + - + - Y Cast to Integral Types ---------------------- @@ -667,52 +700,79 @@ is the number of whole days in the interval, HH is then number of hours between From IPADDRESS ^^^^^^^^^^^^^^ -Casting from IPADDRESS to VARCHAR returns a string formatted as x.x.x.x for IPV4 formatted IPV6 addresses. -For all other IPV6 addresses it will be formatted in compressed alternate form IPV6 defined in `RFC 4291#section-2.2 `_ +Casting from IPADDRESS to VARCHAR returns a string formatted as x.x.x.x for IPv4 formatted IPv6 addresses. +For all other IPv6 addresses it will be formatted in compressed alternate form IPv6 defined in `RFC 4291#section-2.2 `_. -IPV4: +IPv4: :: SELECT cast(ipaddress '1.2.3.4' as varchar); -- '1.2.3.4' -IPV6: +IPv6: :: SELECT cast(ipaddress '2001:0db8:0000:0000:0000:ff00:0042:8329' as varchar); -- '2001:db8::ff00:42:8329' SELECT cast(ipaddress '0:0:0:0:0:0:13.1.68.3' as varchar); -- '::13.1.68.3' -IPV4 mapped IPV6: +IPv4 mapped IPv6: :: SELECT cast(ipaddress '::ffff:ffff:ffff' as varchar); -- '255.255.255.255' +From IPPREFIX +^^^^^^^^^^^^^ + +Casting from IPPREFIX to VARCHAR returns a string formatted as *x.x.x.x/* for IPv4 formatted IPv6 addresses. + +For all other IPv6 addresses it will be formatted in compressed alternate form IPv6 defined in `RFC 4291#section-2.2 `_ +followed by */*. [`RFC 4291#section-2.3 `_] + +IPv4: + +:: + + SELECT cast(ipprefix '1.2.0.0/16' as varchar); -- '1.2.0.0/16' + +IPv6: + +:: + + SELECT cast(ipprefix '2001:db8::ff00:42:8329/128' as varchar); -- '2001:db8::ff00:42:8329/128' + SELECT cast(ipprefix '0:0:0:0:0:0:13.1.68.3/32' as varchar); -- '::/32' + +IPv4 mapped IPv6: + +:: + + SELECT cast(ipaddress '::ffff:ffff:0000/16' as varchar); -- '255.255.0.0/16' + Cast to VARBINARY ----------------- From IPADDRESS ^^^^^^^^^^^^^^ -Returns the IPV6 address as a 16 byte varbinary string in network byte order. +Returns the IPv6 address as a 16 byte varbinary string in network byte order. -Internally, the type is a pure IPv6 address. Support for IPv4 is handled using the IPv4-mapped IPv6 address range `(RFC 4291#section-2.5.5.2) `_. +Internally, the type is a pure IPv6 address. Support for IPv4 is handled using the IPv4-mapped IPv6 address range. [`RFC 4291#section-2.5.5.2 `_] When creating an IPADDRESS, IPv4 addresses will be mapped into that range. -IPV6: +IPv6: :: SELECT cast(ipaddress '2001:0db8:0000:0000:0000:ff00:0042:8329' as varbinary); -- 0x20010db8000000000000ff0000428329 -IPV4: +IPv4: :: SELECT cast('1.2.3.4' as ipaddress); -- 0x00000000000000000000ffff01020304 -IPV4 mapped IPV6: +IPv4 mapped IPv6: :: @@ -1036,16 +1096,18 @@ Invalid example Cast to IPADDRESS ----------------- +.. _ipaddress-from-varchar: + From VARCHAR ^^^^^^^^^^^^ To cast a varchar to IPAddress input string must be in the form of either -IPV4 or IPV6. +IPv4 or IPv6. -For IPV4 it must be in the form of: +For IPv4 it must be in the form of: x.x.x.x where each x is an integer value between 0-255. -For IPV6 it must follow any of the forms defined in `RFC 4291#section-2.2 `_. +For IPv6 it must follow any of the forms defined in `RFC 4291#section-2.2 `_. Full form: @@ -1087,16 +1149,16 @@ Invalid examples: From VARBINARY ^^^^^^^^^^^^^^ -To cast a varbinary to IPAddress it must be either IPV4(4 Bytes) -or IPV6(16 Bytes) in network byte order. +To cast a varbinary to IPAddress it must be either IPv4(4 Bytes) +or IPv6(16 Bytes) in network byte order. -IPV4: +IPv4: :: [01, 02, 03, 04] -> 1.2.3.4 -IPV6: +IPv6: :: @@ -1108,7 +1170,7 @@ When creating an IPADDRESS, IPv4 addresses will be mapped into that range. When formatting an IPADDRESS, any address within the mapped range will be formatted as an IPv4 address. Other addresses will be formatted as IPv6 using the canonical format defined in `RFC 5952 `_. -IPV6 mapped IPV4 address: +IPv6 mapped IPv4 address: :: @@ -1128,6 +1190,41 @@ Invalid examples: SELECT cast(from_hex('f000001100') as ipaddress); -- Invalid IP address binary length: 5 +Cast to IPPREFIX +---------------- + +From VARCHAR +^^^^^^^^^^^^ + +The IPPREFIX string must be in the form of */* as defined in `RFC 4291#section-2.3 `_. +The IPADDRESS portion of the IPPREFIX follows the same rules as casting +`IPADDRESS from VARCHAR <#ipaddress-from-varchar>`_. + +The prefix portion must be <= 32 if the IP is an IPv4 address or <= 128 for an IPv6 address. +As with IPADDRESS, any IPv6 address in the form of an IPv4 mapped IPv6 address will be +interpreted as an IPv4 address. Only the canonical(smallest) IP address will be stored +in the IPPREFIX. + +Examples: + +Valid examples: + +:: + + SELECT cast('2001:0db8:0000:0000:0000:ff00:0042:8329/32' as ipprefix); -- ipprefix '2001:0db8::/32' + SELECT cast('1.2.3.4/24' as ipprefix); -- ipprefix '1.2.3.0/24' + SELECT cast('::ffff:ffff:ffff/16' as ipprefix); -- ipprefix '255.255.0.0/16' + +Invalid examples: + +:: + + SELECT cast('2001:db8::1::1/1' as ipprefix); -- Cannot cast value to IPPREFIX: 2001:db8::1::1/1 + SELECT cast('2001:0db8:0000:0000:0000:ff00:0042:8329/129' as ipprefix); -- Cannot cast value to IPPREFIX: 2001:0db8:0000:0000:0000:ff00:0042:8329/129 + SELECT cast('2001:0db8:0000:0000:0000:ff00:0042:8329/-1' as ipprefix); -- Cannot cast value to IPPREFIX: 2001:0db8:0000:0000:0000:ff00:0042:8329/-1 + SELECT cast('255.2.3.4/33' as ipprefix); -- Cannot cast value to IPPREFIX: 255.2.3.4/33 + SELECT cast('::ffff:ffff:ffff/33' as ipprefix); -- Cannot cast value to IPPREFIX: ::ffff:ffff:ffff/33 + Miscellaneous ------------- @@ -1137,4 +1234,4 @@ Miscellaneous SELECT typeof(123); -- integer SELECT typeof(1.5); -- double - SELECT typeof(array[1,2,3]); -- array(integer) + SELECT typeof(array[1,2,3]); -- array(integer) \ No newline at end of file diff --git a/velox/functions/prestosql/TypeOf.cpp b/velox/functions/prestosql/TypeOf.cpp index 85cf19128f758..f2a16af5ac2ff 100644 --- a/velox/functions/prestosql/TypeOf.cpp +++ b/velox/functions/prestosql/TypeOf.cpp @@ -79,8 +79,6 @@ std::string typeName(const TypePtr& type) { case TypeKind::VARBINARY: if (isHyperLogLogType(type)) { return "HyperLogLog"; - } else if (isIPPrefixType(type)) { - return "ipprefix"; } return "varbinary"; case TypeKind::TIMESTAMP: @@ -93,6 +91,9 @@ std::string typeName(const TypePtr& type) { typeName(type->childAt(0)), typeName(type->childAt(1))); case TypeKind::ROW: { + if (isIPPrefixType(type)) { + return "ipprefix"; + } const auto& rowType = type->asRow(); std::ostringstream out; out << "row("; diff --git a/velox/functions/prestosql/tests/CMakeLists.txt b/velox/functions/prestosql/tests/CMakeLists.txt index 2c0d1696c275e..61702674c4530 100644 --- a/velox/functions/prestosql/tests/CMakeLists.txt +++ b/velox/functions/prestosql/tests/CMakeLists.txt @@ -65,6 +65,7 @@ add_executable( HyperLogLogFunctionsTest.cpp InPredicateTest.cpp IPAddressCastTest.cpp + IPPrefixCastTest.cpp JsonCastTest.cpp JsonExtractScalarTest.cpp JsonFunctionsTest.cpp @@ -105,7 +106,6 @@ add_executable( WordStemTest.cpp ZipTest.cpp ZipWithTest.cpp) - add_test(velox_functions_test velox_functions_test) target_link_libraries( diff --git a/velox/functions/prestosql/types/IPAddressType.cpp b/velox/functions/prestosql/types/IPAddressType.cpp index 691ca0a28ce2b..69808499eccc3 100644 --- a/velox/functions/prestosql/types/IPAddressType.cpp +++ b/velox/functions/prestosql/types/IPAddressType.cpp @@ -21,7 +21,6 @@ static constexpr int kIPV4AddressBytes = 4; static constexpr int kIPV4ToV6FFIndex = 10; static constexpr int kIPV4ToV6Index = 12; -static constexpr int kIPAddressBytes = 16; namespace facebook::velox { diff --git a/velox/functions/prestosql/types/IPAddressType.h b/velox/functions/prestosql/types/IPAddressType.h index e1e2d9fc1bf28..078e31e857656 100644 --- a/velox/functions/prestosql/types/IPAddressType.h +++ b/velox/functions/prestosql/types/IPAddressType.h @@ -18,6 +18,8 @@ #include "velox/type/SimpleFunctionApi.h" #include "velox/type/Type.h" +static constexpr int kIPAddressBytes = 16; + namespace facebook::velox { class IPAddressType : public HugeintType { diff --git a/velox/functions/prestosql/types/IPPrefixType.cpp b/velox/functions/prestosql/types/IPPrefixType.cpp index aad808d7cfbc2..69e587df91517 100644 --- a/velox/functions/prestosql/types/IPPrefixType.cpp +++ b/velox/functions/prestosql/types/IPPrefixType.cpp @@ -14,11 +14,16 @@ * limitations under the License. */ +#include #include #include "velox/expression/CastExpr.h" +#include "velox/functions/prestosql/types/IPAddressType.h" #include "velox/functions/prestosql/types/IPPrefixType.h" +static constexpr uint8_t kIPV4Bits = 32; +static constexpr uint8_t kIPV6Bits = 128; + namespace facebook::velox { namespace { @@ -26,11 +31,29 @@ namespace { class IPPrefixCastOperator : public exec::CastOperator { public: bool isSupportedFromType(const TypePtr& other) const override { - return false; + switch (other->kind()) { + case TypeKind::VARCHAR: + return true; + case TypeKind::HUGEINT: + if (isIPAddressType(other)) { + return true; + } + default: + return false; + } } bool isSupportedToType(const TypePtr& other) const override { - return false; + switch (other->kind()) { + case TypeKind::VARCHAR: + return true; + case TypeKind::HUGEINT: + if (isIPAddressType(other)) { + return true; + } + default: + return false; + } } void castTo( @@ -40,8 +63,14 @@ class IPPrefixCastOperator : public exec::CastOperator { const TypePtr& resultType, VectorPtr& result) const override { context.ensureWritable(rows, resultType, result); - VELOX_NYI( - "Cast from {} to IPPrefix not yet supported", input.type()->toString()); + + if (input.typeKind() == TypeKind::VARCHAR) { + castFromString(input, context, rows, *result); + } else { + VELOX_NYI( + "Cast from {} to IPPrefix not yet supported", + input.type()->toString()); + } } void castFrom( @@ -51,8 +80,114 @@ class IPPrefixCastOperator : public exec::CastOperator { const TypePtr& resultType, VectorPtr& result) const override { context.ensureWritable(rows, resultType, result); - VELOX_NYI( - "Cast from IPPrefix to {} not yet supported", resultType->toString()); + + if (resultType->kind() == TypeKind::VARCHAR) { + castToString(input, context, rows, *result); + } else { + VELOX_NYI( + "Cast from IPPrefix to {} not yet supported", resultType->toString()); + } + } + + private: + static void castToString( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipprefixes = input.as(); + const auto* ip = ipprefixes->childAt(0)->as>(); + const auto* prefix = ipprefixes->childAt(1)->as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + const auto intAddr = ip->valueAt(row); + folly::ByteArray16 addrBytes; + + memcpy(&addrBytes, &intAddr, kIPAddressBytes); + std::reverse(addrBytes.begin(), addrBytes.end()); + folly::IPAddressV6 v6Addr(addrBytes); + + exec::StringWriter resultWriter(flatResult, row); + if (v6Addr.isIPv4Mapped()) { + resultWriter.append(fmt::format( + "{}/{}", v6Addr.createIPv4().str(), prefix->valueAt(row))); + } else { + resultWriter.append( + fmt::format("{}/{}", v6Addr.str(), (uint8_t)prefix->valueAt(row))); + } + resultWriter.finalize(); + }); + } + + static folly::small_vector splitIpSlashCidr( + const folly::StringPiece& ipSlashCidr) { + folly::small_vector vec; + folly::split('/', ipSlashCidr, vec); + return vec; + } + + static void castFromString( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + int128_t intAddr; + folly::ByteArray16 addrBytes; + auto* rowResult = result.as(); + const auto* ipAddressStrings = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + auto ipAddressString = ipAddressStrings->valueAt(row); + + // Folly allows for creation of networks without a "/" so check to make + // sure that we have one. + if (ipAddressString.str().find('/') == std::string::npos) { + context.setStatus( + row, + threadSkipErrorDetails() ? Status::UserError() + : Status::UserError( + "Cannot cast value to IPPREFIX: {}", + ipAddressString.str())); + return; + } + + auto const maybeNet = + folly::IPAddress::tryCreateNetwork(ipAddressString, -1, false); + + if (maybeNet.hasError()) { + context.setStatus( + row, + threadSkipErrorDetails() ? Status::UserError() + : Status::UserError( + "Cannot cast value to IPPREFIX: {}", + ipAddressString.str())); + return; + } + + auto [ip, prefix] = maybeNet.value(); + if (prefix > ((ip.isIPv4Mapped() || ip.isV4()) ? kIPV4Bits : kIPV6Bits)) { + context.setStatus( + row, + threadSkipErrorDetails() ? Status::UserError() + : Status::UserError( + "Cannot cast value to IPPREFIX: {}", + ipAddressString.str())); + return; + } + + addrBytes = (ip.isIPv4Mapped() || ip.isV4()) + ? folly::IPAddress::createIPv4(ip) + .mask(prefix) + .createIPv6() + .toByteArray() + : folly::IPAddress::createIPv6(ip).mask(prefix).toByteArray(); + + std::reverse(addrBytes.begin(), addrBytes.end()); + memcpy(&intAddr, &addrBytes, kIPAddressBytes); + rowResult->childAt(0)->as>()->set(row, intAddr); + rowResult->childAt(1)->as>()->set(row, prefix); + }); } };