Skip to content

Commit

Permalink
Add ipprefix cast operators for varchar [1/n] (facebookincubator#11460)
Browse files Browse the repository at this point in the history
Summary:

Support cast for ipprefix for varchar. Based off of facebookincubator#11122

Will do IPAddress in follow up diff

Differential Revision: D65449935
  • Loading branch information
yuandagits authored and facebook-github-bot committed Nov 7, 2024
1 parent d1bf9da commit 87f9769
Show file tree
Hide file tree
Showing 3 changed files with 341 additions and 8 deletions.
110 changes: 110 additions & 0 deletions velox/functions/prestosql/tests/IPPrefixCastTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h"

namespace facebook::velox::functions::prestosql {

class IPPrefixTypeTest : public functions::test::FunctionBaseTest {
protected:
std::optional<std::string> castToVarchar(
const std::optional<std::string>& input) {
auto result = evaluateOnce<std::string>(
"cast(cast(c0 as ipprefix) as varchar)", input);
return result;
}
};

TEST_F(IPPrefixTypeTest, castToVarchar) {
EXPECT_EQ(castToVarchar("::ffff:1.2.3.4/24"), "1.2.3.0/24");
EXPECT_EQ(castToVarchar("192.168.0.0/24"), "192.168.0.0/24");
EXPECT_EQ(castToVarchar("255.2.3.4/0"), "0.0.0.0/0");
EXPECT_EQ(castToVarchar("255.2.3.4/1"), "128.0.0.0/1");
EXPECT_EQ(castToVarchar("255.2.3.4/2"), "192.0.0.0/2");
EXPECT_EQ(castToVarchar("255.2.3.4/4"), "240.0.0.0/4");
EXPECT_EQ(castToVarchar("1.2.3.4/8"), "1.0.0.0/8");
EXPECT_EQ(castToVarchar("1.2.3.4/16"), "1.2.0.0/16");
EXPECT_EQ(castToVarchar("1.2.3.4/24"), "1.2.3.0/24");
EXPECT_EQ(castToVarchar("1.2.3.255/25"), "1.2.3.128/25");
EXPECT_EQ(castToVarchar("1.2.3.255/26"), "1.2.3.192/26");
EXPECT_EQ(castToVarchar("1.2.3.255/28"), "1.2.3.240/28");
EXPECT_EQ(castToVarchar("1.2.3.255/30"), "1.2.3.252/30");
EXPECT_EQ(castToVarchar("1.2.3.255/32"), "1.2.3.255/32");
EXPECT_EQ(
castToVarchar("2001:0db8:0000:0000:0000:ff00:0042:8329/128"),
"2001:db8::ff00:42:8329/128");
EXPECT_EQ(
castToVarchar("2001:db8::ff00:42:8329/128"),
"2001:db8::ff00:42:8329/128");
EXPECT_EQ(castToVarchar("2001:db8:0:0:1:0:0:1/128"), "2001:db8::1:0:0:1/128");
EXPECT_EQ(castToVarchar("2001:db8:0:0:1::1/128"), "2001:db8::1:0:0:1/128");
EXPECT_EQ(castToVarchar("2001:db8::1:0:0:1/128"), "2001:db8::1:0:0:1/128");
EXPECT_EQ(
castToVarchar("2001:DB8::FF00:ABCD:12EF/128"),
"2001:db8::ff00:abcd:12ef/128");
EXPECT_EQ(castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/0"), "::/0");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/1"), "8000::/1");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/2"), "c000::/2");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/4"), "f000::/4");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/8"), "ff00::/8");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/16"), "ffff::/16");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/32"),
"ffff:ffff::/32");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/48"),
"ffff:ffff:ffff::/48");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/64"),
"ffff:ffff:ffff:ffff::/64");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/80"),
"ffff:ffff:ffff:ffff:ffff::/80");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/96"),
"ffff:ffff:ffff:ffff:ffff:ffff::/96");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/112"),
"ffff:ffff:ffff:ffff:ffff:ffff:ffff:0/112");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/120"),
"ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00/120");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/124"),
"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0/124");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/126"),
"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffc/126");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/127"),
"ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe/127");
EXPECT_EQ(
castToVarchar("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128"),
"ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128");
EXPECT_THROW(castToVarchar("facebook.com/32"), VeloxUserError);
EXPECT_THROW(castToVarchar("localhost/32"), VeloxUserError);
EXPECT_THROW(castToVarchar("2001:db8::1::1/128"), VeloxUserError);
EXPECT_THROW(castToVarchar("2001:zxy::1::1/128"), VeloxUserError);
EXPECT_THROW(castToVarchar("789.1.1.1/32"), VeloxUserError);
EXPECT_THROW(castToVarchar("192.1.1.1"), VeloxUserError);
EXPECT_THROW(castToVarchar("192.1.1.1/128"), VeloxUserError);
}
} // namespace facebook::velox::functions::prestosql
105 changes: 98 additions & 7 deletions velox/functions/prestosql/types/IPPrefixType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <folly/small_vector.h>

#include "velox/expression/CastExpr.h"
#include "velox/expression/VectorWriters.h"
#include "velox/functions/prestosql/types/IPPrefixType.h"

namespace facebook::velox {
Expand All @@ -26,11 +26,21 @@ namespace {
class IPPrefixCastOperator : public exec::CastOperator {
public:
bool isSupportedFromType(const TypePtr& other) const override {
return false;
switch (other->kind()) {
case TypeKind::VARCHAR:
return true;
default:
return false;
}
}

bool isSupportedToType(const TypePtr& other) const override {
return false;
switch (other->kind()) {
case TypeKind::VARCHAR:
return true;
default:
return false;
}
}

void castTo(
Expand All @@ -40,8 +50,14 @@ class IPPrefixCastOperator : public exec::CastOperator {
const TypePtr& resultType,
VectorPtr& result) const override {
context.ensureWritable(rows, resultType, result);
VELOX_NYI(
"Cast from {} to IPPrefix not yet supported", input.type()->toString());
switch (input.typeKind()) {
case TypeKind::VARCHAR:
return castFromString(input, context, rows, *result);
default:
VELOX_NYI(
"Cast from {} to IPPrefix not yet supported",
input.type()->toString());
}
}

void castFrom(
Expand All @@ -51,8 +67,83 @@ class IPPrefixCastOperator : public exec::CastOperator {
const TypePtr& resultType,
VectorPtr& result) const override {
context.ensureWritable(rows, resultType, result);
VELOX_NYI(
"Cast from IPPrefix to {} not yet supported", resultType->toString());
switch (resultType->kind()) {
case TypeKind::VARCHAR:
return castToString(input, context, rows, *result);
default:
VELOX_NYI(
"Cast from IPPrefix to {} not yet supported",
resultType->toString());
}
}

private:
static void castToString(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
BaseVector& result) {
auto* flatResult = result.as<FlatVector<StringView>>();
auto rowVector = input.as<RowVector>();
auto rowType = rowVector->type();
const auto* ipaddr = rowVector->childAt(ipaddress::kIpRowIndex)
->as<SimpleVector<int128_t>>();
const auto* prefix = rowVector->childAt(ipaddress::kIpPrefixRowIndex)
->as<SimpleVector<int8_t>>();
context.applyToSelectedNoThrow(rows, [&](auto row) {
const auto ipAddrVal = ipaddr->valueAt(row);
// The string representation of the last byte needs
// to be unsigned
const uint8_t prefixVal = prefix->valueAt(row);

// Copy the first 16 bytes into a ByteArray16.
folly::ByteArray16 addrBytes;
memcpy(&addrBytes, &ipAddrVal, ipaddress::kIPAddressBytes);
// Reverse the bytes to get the correct order. Similar to
// IPAddressType. We assume we're ALWAYS on a little endian machine.
// Note: for big endian, we should not reverse the bytes.
std::reverse(addrBytes.begin(), addrBytes.end());
// // Construct a V6 address from the ByteArray16.
folly::IPAddressV6 v6Addr(addrBytes);

// Inline func to get string for ipv4 or ipv6 string
const auto ipString =
(v6Addr.isIPv4Mapped()) ? v6Addr.createIPv4().str() : v6Addr.str();

// Format of string is {ipString}/{mask}
auto stringRet = fmt::format("{}/{}", ipString, prefixVal);

// Write the string to the result vector
exec::StringWriter<false> result(flatResult, row);
result.append(stringRet);
result.finalize();
});
}

static void castFromString(
const BaseVector& input,
exec::EvalCtx& context,
const SelectivityVector& rows,
BaseVector& result) {
auto* rowVectorResult = result.as<RowVector>();
const auto* ipPrefixStrings = input.as<SimpleVector<StringView>>();

context.applyToSelectedNoThrow(rows, [&](auto row) {
auto ipAddressStringView = ipPrefixStrings->valueAt(row);
auto tryIpPrefix = ipaddress::tryParseIpPrefixString(ipAddressStringView);
if (tryIpPrefix.hasError()) {
context.setStatus(row, std::move(tryIpPrefix.error()));
}

const auto& ipPrefix = tryIpPrefix.value();
auto writer = exec::VectorWriter<Row<int128_t, int8_t>>();
writer.init(*rowVectorResult);
writer.setOffset(row);
auto& rowWriter = writer.current();
rowWriter.get_writer_at<0>() = ipPrefix.first;
rowWriter.get_writer_at<1>() = ipPrefix.second;
writer.commit();
});
}
};

Expand Down
134 changes: 133 additions & 1 deletion velox/functions/prestosql/types/IPPrefixType.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,145 @@
*/
#pragma once

#include <folly/small_vector.h>

#include "velox/common/base/Status.h"
#include "velox/functions/prestosql/types/IPAddressType.h"
#include "velox/type/SimpleFunctionApi.h"
#include "velox/type/Type.h"

namespace facebook::velox {

namespace ipaddress {
constexpr uint8_t kIPV4Bits = 32;
constexpr uint8_t kIPV6Bits = 128;
constexpr int kIPPrefixLengthIndex = 16;
constexpr int kIPPrefixBytes = 17;
constexpr auto kIpRowIndex = "ip";
constexpr auto kIpPrefixRowIndex = "prefix";

namespace {
auto splitIpSlashCidr(folly::StringPiece ipSlashCidr) {
folly::small_vector<folly::StringPiece, 2> vec;
folly::split('/', ipSlashCidr, vec);
return vec;
}

Status handleFailedToCreateNetworkError(
folly::StringPiece ipaddress,
folly::CIDRNetworkError error) {
if (threadSkipErrorDetails()) {
return Status::UserError();
}

switch (error) {
case folly::CIDRNetworkError::INVALID_DEFAULT_CIDR: {
return Status::UserError(
"defaultCidr must be <= std::numeric_limits<uint8_t>::max()");
}
case folly::CIDRNetworkError::INVALID_IP_SLASH_CIDR: {
return Status::UserError(
"Invalid IP address string received. Received string:{} of length:{}",
ipaddress,
ipaddress.size());
}
case folly::CIDRNetworkError::INVALID_IP: {
const auto vec = splitIpSlashCidr(ipaddress);
return Status::UserError(
"Invalid IP address '{}'", vec.size() > 0 ? vec.at(0) : "");
}
case folly::CIDRNetworkError::INVALID_CIDR: {
auto const vec = splitIpSlashCidr(ipaddress);
return Status::UserError(
"Mask value '{}' not a valid mask", vec.size() > 1 ? vec.at(1) : "");
}
case folly::CIDRNetworkError::CIDR_MISMATCH: {
const auto vec = splitIpSlashCidr(ipaddress);
if (!vec.empty()) {
const auto subnet = folly::IPAddress::tryFromString(vec.at(0)).value();
return Status::UserError(
"CIDR value '{}' is > network bit count '{}'",
vec.size() == 2 ? vec.at(1)
: folly::to<std::string>(
subnet.isV4() ? ipaddress::kIPV4Bits
: ipaddress::kIPV6Bits),
subnet.bitCount());
}
return Status::UserError(
"Invalid IP address of size:{} received", ipaddress.size());
}
default:
return Status::UserError(
"Unknown parsing error when parsing IP address: {} ", ipaddress);
}
}
} // namespace

inline folly::Expected<std::pair<int128_t, int8_t>, Status>
tryParseIpPrefixString(folly::StringPiece ipprefixString) {
// Ensure '/' is present
if (ipprefixString.find('/') == std::string::npos) {
return folly::makeUnexpected(
threadSkipErrorDetails()
? Status::UserError()
: Status::UserError(
"Invalid CIDR IP address specified. Expected IP/PREFIX format, got: {}",
ipprefixString));
}

auto tryCdirNetwork = folly::IPAddress::tryCreateNetwork(
ipprefixString, /*defaultCidr*/ -1, /*applyMask*/ false);
if (tryCdirNetwork.hasError()) {
return folly::makeUnexpected(handleFailedToCreateNetworkError(
ipprefixString, std::move(tryCdirNetwork.error())));
}

folly::ByteArray16 addrBytes;
const auto& cdirNetwork = tryCdirNetwork.value();
if (cdirNetwork.first.isIPv4Mapped() || cdirNetwork.first.isV4()) {
// Validate that the prefix value is <= 32 for ipv4
if (cdirNetwork.second > ipaddress::kIPV4Bits) {
return folly::makeUnexpected(
threadSkipErrorDetails()
? Status::UserError()
: Status::UserError(
"CIDR value '{}' is > network bit count '{}'",
cdirNetwork.second,
ipaddress::kIPV4Bits));
}
auto ipv4Addr = folly::IPAddress::createIPv4(cdirNetwork.first);
auto ipv4AddrWithMask = ipv4Addr.mask(cdirNetwork.second);
auto ipv6Addr = ipv4AddrWithMask.createIPv6();
addrBytes = ipv6Addr.toByteArray();
} else {
// Validate that the prefix value is <= 128 for ipv6
if (cdirNetwork.second > ipaddress::kIPV6Bits) {
return folly::makeUnexpected(
threadSkipErrorDetails()
? Status::UserError()
: Status::UserError(
"CIDR value '{}' is > network bit count '{}'",
cdirNetwork.second,
ipaddress::kIPV6Bits));
}
auto ipv6Addr = folly::IPAddress::createIPv6(cdirNetwork.first);
auto ipv6AddrWithMask = ipv6Addr.mask(cdirNetwork.second);
addrBytes = ipv6AddrWithMask.toByteArray();
}

int128_t intAddr;
// Similar to IPAdressType, assume Velox is always on little endian systems
std::reverse(addrBytes.begin(), addrBytes.end());
memcpy(&intAddr, &addrBytes, ipaddress::kIPAddressBytes);
return std::make_pair(intAddr, cdirNetwork.second);
}
}; // namespace ipaddress

class IPPrefixType : public RowType {
IPPrefixType() : RowType({"ip", "prefix"}, {HUGEINT(), TINYINT()}) {}
IPPrefixType()
: RowType(
{ipaddress::kIpRowIndex, ipaddress::kIpPrefixRowIndex},
{HUGEINT(), TINYINT()}) {}

public:
static const std::shared_ptr<const IPPrefixType>& get() {
Expand Down

0 comments on commit 87f9769

Please sign in to comment.