Skip to content

Commit

Permalink
Merge pull request ClickHouse#62085 from JackyWoo/support_minmax_inde…
Browse files Browse the repository at this point in the history
…x_for_pointInPolygon

Support minmax index for `pointInPolygon`
  • Loading branch information
alexey-milovidov authored Sep 28, 2024
2 parents cbfbdbf + 19bbc17 commit 6126310
Show file tree
Hide file tree
Showing 6 changed files with 239 additions and 9 deletions.
2 changes: 2 additions & 0 deletions src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,8 @@ static void buildORCSearchArgumentImpl(
}
/// There is no optimization with space-filling curves for ORC.
case KeyCondition::RPNElement::FUNCTION_ARGS_IN_HYPERRECTANGLE:
/// There is no optimization with pointInPolygon for ORC.
case KeyCondition::RPNElement::FUNCTION_POINT_IN_POLYGON:
case KeyCondition::RPNElement::FUNCTION_UNKNOWN:
{
builder.literal(orc::TruthValue::YES_NO_NULL);
Expand Down
152 changes: 146 additions & 6 deletions src/Storages/MergeTree/KeyCondition.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#include <Storages/MergeTree/KeyCondition.h>
#include <Storages/MergeTree/BoolMask.h>
#include <Core/PlainRanges.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/FieldToDataType.h>
#include <DataTypes/getLeastSupertype.h>
#include <DataTypes/Utils.h>
Expand All @@ -17,8 +17,10 @@
#include <Functions/indexHint.h>
#include <Functions/CastOverloadResolver.h>
#include <Functions/IFunction.h>
#include <Functions/geometryConverters.h>
#include <Common/FieldVisitorToString.h>
#include <Common/HilbertUtils.h>
#include <Common/FieldVisitorConvertToNumber.h>
#include <Common/MortonUtils.h>
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeTuple.h>
Expand All @@ -28,18 +30,20 @@
#include <Interpreters/convertFieldToType.h>
#include <Interpreters/Set.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Storages/MergeTree/MergeTreeIndexUtils.h>

#include <algorithm>
#include <cassert>
#include <stack>
#include <limits>

#include <boost/geometry.hpp>
#include <boost/geometry/geometries/polygon.hpp>
#include <boost/geometry/geometries/multi_polygon.hpp>


namespace DB
{
Expand Down Expand Up @@ -459,12 +463,20 @@ const KeyCondition::AtomMap KeyCondition::atom_map
out.range = Range::createWholeUniverseWithoutNull();
return true;
}
},
{
"pointInPolygon",
[] (RPNElement & out, const Field &)
{
out.function = RPNElement::FUNCTION_POINT_IN_POLYGON;
return true;
}
}
};

static const std::set<std::string_view> always_relaxed_atom_functions = {"match"};
static const std::set<KeyCondition::RPNElement::Function> always_relaxed_atom_elements
= {KeyCondition::RPNElement::FUNCTION_UNKNOWN, KeyCondition::RPNElement::FUNCTION_ARGS_IN_HYPERRECTANGLE};
= {KeyCondition::RPNElement::FUNCTION_UNKNOWN, KeyCondition::RPNElement::FUNCTION_ARGS_IN_HYPERRECTANGLE, KeyCondition::RPNElement::FUNCTION_POINT_IN_POLYGON};

/// Functions with range inversion cannot be relaxed. It will become stricter instead.
/// For example:
Expand Down Expand Up @@ -1850,6 +1862,54 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme
if (atom_map.find(func_name) == std::end(atom_map))
return false;

auto analyze_point_in_polygon = [&, this]() -> bool
{
/// pointInPolygon((x, y), [(0, 0), (8, 4), (5, 8), (0, 2)])
if (func.getArgumentAt(0).tryGetConstant(const_value, const_type))
return false;
if (!func.getArgumentAt(1).tryGetConstant(const_value, const_type))
return false;

const auto atom_it = atom_map.find(func_name);

/// Analyze (x, y)
RPNElement::MultiColumnsFunctionDescription column_desc;
column_desc.function_name = func_name;
auto first_argument = func.getArgumentAt(0).toFunctionNode();

if (first_argument.getArgumentsSize() != 2 || first_argument.getFunctionName() != "tuple")
return false;

for (size_t i = 0; i < 2; ++i)
{
auto name = first_argument.getArgumentAt(i).getColumnName();
auto it = key_columns.find(name);
if (it == key_columns.end())
return false;
column_desc.key_columns.push_back(name);
column_desc.key_column_positions.push_back(key_columns[name]);
}
out.point_in_polygon_column_description = column_desc;

/// Analyze [(0, 0), (8, 4), (5, 8), (0, 2)]
chassert(WhichDataType(const_type).isArray());
for (const auto & elem : const_value.safeGet<Array>())
{
if (elem.getType() != Field::Types::Tuple)
return false;

const auto & elem_tuple = elem.safeGet<Tuple>();
if (elem_tuple.size() != 2)
return false;

auto x = applyVisitor(FieldVisitorConvertToNumber<Float64>(), elem_tuple[0]);
auto y = applyVisitor(FieldVisitorConvertToNumber<Float64>(), elem_tuple[1]);
out.polygon.outer().push_back({x, y});
}
boost::geometry::correct(out.polygon);
return atom_it->second(out, const_value);
};

if (always_relaxed_atom_functions.contains(func_name))
relaxed = true;

Expand Down Expand Up @@ -1879,6 +1939,11 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme
else
return false;
}
else if (func_name == "pointInPolygon")
{
/// Case1 no holes in polygon
return analyze_point_in_polygon();
}
else if (func.getArgumentAt(1).tryGetConstant(const_value, const_type))
{
/// If the const operand is null, the atom will be always false
Expand Down Expand Up @@ -2047,7 +2112,15 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme

}
else
return false;
{
if (func_name == "pointInPolygon")
{
/// Case2 has holes in polygon, when checking skip index, the hole will be ignored.
return analyze_point_in_polygon();
}
else
return false;
}

const auto atom_it = atom_map.find(func_name);

Expand Down Expand Up @@ -3059,6 +3132,46 @@ BoolMask KeyCondition::checkInHyperrectangle(
* represented by a set of hyperrectangles.
*/
}
else if (element.function == RPNElement::FUNCTION_POINT_IN_POLYGON)
{
/** There are 2 kinds of polygons:
* 1. Polygon by minmax index
* 2. Polygons which is provided by user
*
* Polygon by minmax index:
* For hyperactangle [1, 2] × [3, 4] we can create a polygon with 4 points: (1, 3), (1, 4), (2, 4), (2, 3)
*
* Algorithm:
* Check whether there is any intersection of the 2 polygons. If true return {true, true}, else return {false, true}.
*/
const auto & key_column_positions = element.point_in_polygon_column_description->key_column_positions;

Float64 x_min = applyVisitor(FieldVisitorConvertToNumber<Float64>(), hyperrectangle[key_column_positions[0]].left);
Float64 x_max = applyVisitor(FieldVisitorConvertToNumber<Float64>(), hyperrectangle[key_column_positions[0]].right);
Float64 y_min = applyVisitor(FieldVisitorConvertToNumber<Float64>(), hyperrectangle[key_column_positions[1]].left);
Float64 y_max = applyVisitor(FieldVisitorConvertToNumber<Float64>(), hyperrectangle[key_column_positions[1]].right);

if (unlikely(isNaN(x_min) || isNaN(x_max) || isNaN(y_min) || isNaN(y_max)))
{
rpn_stack.emplace_back(true, true);
continue;
}

using Point = boost::geometry::model::d2::point_xy<Float64>;
using Polygon = boost::geometry::model::polygon<Point>;
Polygon polygon_by_minmax_index;
polygon_by_minmax_index.outer().emplace_back(x_min, y_min);
polygon_by_minmax_index.outer().emplace_back(x_min, y_max);
polygon_by_minmax_index.outer().emplace_back(x_max, y_max);
polygon_by_minmax_index.outer().emplace_back(x_max, y_min);

/// Close ring
boost::geometry::correct(polygon_by_minmax_index);

/// Because the polygon may have a hole so the "can_be_false" should always be true.
rpn_stack.emplace_back(
boost::geometry::intersects(polygon_by_minmax_index, element.polygon), true);
}
else if (
element.function == RPNElement::FUNCTION_IS_NULL
|| element.function == RPNElement::FUNCTION_IS_NOT_NULL)
Expand Down Expand Up @@ -3138,7 +3251,16 @@ bool KeyCondition::mayBeTrueInRange(
String KeyCondition::RPNElement::toString() const
{
if (argument_num_of_space_filling_curve)
return toString(fmt::format("argument {} of column {}", *argument_num_of_space_filling_curve, key_column), true);
return toString(fmt::format("argument {} of column {}", *argument_num_of_space_filling_curve, key_column), false);
else if (point_in_polygon_column_description)
{
return toString(
fmt::format(
"column ({}, {})",
point_in_polygon_column_description->key_columns[0],
point_in_polygon_column_description->key_columns[1]),
false);
}
else
return toString(fmt::format("column {}", key_column), true);
}
Expand Down Expand Up @@ -3218,6 +3340,23 @@ String KeyCondition::RPNElement::toString(std::string_view column_name, bool pri
buf << ")";
return buf.str();
}
case FUNCTION_POINT_IN_POLYGON:
{
auto points_in_polygon = polygon.outer();
buf << "(";
print_wrapped_column(buf);
buf << " in ";
buf << "[";
for (size_t i = 0; i < points_in_polygon.size(); ++i)
{
if (i != 0)
buf << ", ";
buf << "(" << points_in_polygon[i].x() << ", " << points_in_polygon[i].y() << ")";
}
buf << "]";
buf << ")";
return buf.str();
}
case FUNCTION_IS_NULL:
case FUNCTION_IS_NOT_NULL:
{
Expand Down Expand Up @@ -3269,6 +3408,7 @@ bool KeyCondition::unknownOrAlwaysTrue(bool unknown_any) const
|| element.function == RPNElement::FUNCTION_IN_SET
|| element.function == RPNElement::FUNCTION_NOT_IN_SET
|| element.function == RPNElement::FUNCTION_ARGS_IN_HYPERRECTANGLE
|| element.function == RPNElement::FUNCTION_POINT_IN_POLYGON
|| element.function == RPNElement::FUNCTION_IS_NULL
|| element.function == RPNElement::FUNCTION_IS_NOT_NULL
|| element.function == RPNElement::ALWAYS_FALSE)
Expand Down
23 changes: 20 additions & 3 deletions src/Storages/MergeTree/KeyCondition.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@

#include <optional>

#include <boost/geometry.hpp>

#include <Core/SortDescription.h>
#include <Core/Range.h>
#include <Core/PlainRanges.h>

#include <DataTypes/Serializations/ISerialization.h>

#include <Parsers/ASTExpressionList.h>

#include <Interpreters/Set.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/TreeRewriter.h>
Expand Down Expand Up @@ -168,6 +167,9 @@ class KeyCondition
/// this expression will be analyzed and then represented by following:
/// args in hyperrectangle [10, 20] × [20, 30].
FUNCTION_ARGS_IN_HYPERRECTANGLE,
/// Special for pointInPolygon to utilize minmax indices.
/// For example: pointInPolygon((x, y), [(0, 0), (0, 2), (2, 2), (2, 0)])
FUNCTION_POINT_IN_POLYGON,
/// Can take any value.
FUNCTION_UNKNOWN,
/// Operators of the logical expression.
Expand Down Expand Up @@ -206,6 +208,21 @@ class KeyCondition
/// For FUNCTION_ARGS_IN_HYPERRECTANGLE
Hyperrectangle space_filling_curve_args_hyperrectangle;

/// For FUNCTION_POINT_IN_POLYGON.
/// Function like 'pointInPolygon' has multiple columns.
/// This struct description column part of the function, such as (x, y) in 'pointInPolygon'.
struct MultiColumnsFunctionDescription
{
String function_name;
std::vector<size_t> key_column_positions;
std::vector<String> key_columns;
};
std::optional<MultiColumnsFunctionDescription> point_in_polygon_column_description;

using Point = boost::geometry::model::d2::point_xy<Float64>;
using Polygon = boost::geometry::model::polygon<Point>;
Polygon polygon;

MonotonicFunctionsChain monotonic_functions_chain;
};

Expand Down
Loading

0 comments on commit 6126310

Please sign in to comment.