Skip to content

Commit

Permalink
Merge pull request #96 from Altinity/TMP/fix_20.3_bloom_filter_hasAny
Browse files Browse the repository at this point in the history
[port of ClickHouse#24900 to Altinity 20.3] Added support of hasAny function to bloom_filter index.

Validated by manual build and manual test run
  • Loading branch information
Enmk authored Jun 7, 2021
2 parents db57016 + 22bf534 commit f3e28cb
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 9 deletions.
48 changes: 39 additions & 9 deletions src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,12 @@ bool MergeTreeIndexConditionBloomFilter::alwaysUnknownOrTrue() const
rpn_stack.push_back(true);
}
else if (element.function == RPNElement::FUNCTION_EQUALS
|| element.function == RPNElement::FUNCTION_NOT_EQUALS
|| element.function == RPNElement::FUNCTION_HAS
|| element.function == RPNElement::FUNCTION_IN
|| element.function == RPNElement::FUNCTION_NOT_IN
|| element.function == RPNElement::ALWAYS_FALSE)
|| element.function == RPNElement::FUNCTION_NOT_EQUALS
|| element.function == RPNElement::FUNCTION_HAS
|| element.function == RPNElement::FUNCTION_HAS_ANY
|| element.function == RPNElement::FUNCTION_IN
|| element.function == RPNElement::FUNCTION_NOT_IN
|| element.function == RPNElement::ALWAYS_FALSE)
{
rpn_stack.push_back(false);
}
Expand Down Expand Up @@ -153,7 +154,8 @@ bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndex
|| element.function == RPNElement::FUNCTION_NOT_IN
|| element.function == RPNElement::FUNCTION_EQUALS
|| element.function == RPNElement::FUNCTION_NOT_EQUALS
|| element.function == RPNElement::FUNCTION_HAS)
|| element.function == RPNElement::FUNCTION_HAS
|| element.function == RPNElement::FUNCTION_HAS_ANY)
{
bool match_rows = true;
const auto & predicate = element.predicate;
Expand Down Expand Up @@ -216,7 +218,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Bl
const_value.getType() == Field::Types::Float64)
{
/// Zero in all types is represented in memory the same way as in UInt64.
out.function = const_value.get<UInt64>() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE;
out.function = const_value.reinterpret<UInt64>() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE;
return true;
}
}
Expand All @@ -234,7 +236,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Bl
if (const auto & prepared_set = getPreparedSet(arguments[1]))
return traverseASTIn(function->name, arguments[0], prepared_set, out);
}
else if (function->name == "equals" || function->name == "notEquals" || function->name == "has")
else if (function->name == "equals" || function->name == "notEquals" || function->name == "has" || function->name == "hasAny")
{
Field const_value;
DataTypePtr const_type;
Expand Down Expand Up @@ -322,10 +324,38 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
Field converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
}
else if (function_name == "hasAny")
{
if (!array_type)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be an array.", function_name);

if (value_field.getType() != Field::Types::Array)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be an array.", function_name);

const DataTypePtr actual_type = BloomFilter::getPrimitiveType(array_type->getNestedType());
ColumnPtr column;
{
const auto is_nullable = actual_type->isNullable();
auto mutable_column = actual_type->createColumn();

for (const auto & f : value_field.get<Array>())
{
if (f.isNull() && !is_nullable)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} can't contain NULLs.", function_name);

mutable_column->insert(convertFieldToType(f, *actual_type, value_type.get()));
}

column = std::move(mutable_column);
}

out.function = RPNElement::FUNCTION_HAS_ANY;
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(actual_type, column, 0, column->size())));
}
else
{
if (array_type)
throw Exception("An array type of bloom_filter supports only has() function.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception("An array type of bloom_filter supports only has() and hasAny() functions.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS;
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class MergeTreeIndexConditionBloomFilter : public IMergeTreeIndexCondition
FUNCTION_EQUALS,
FUNCTION_NOT_EQUALS,
FUNCTION_HAS,
FUNCTION_HAS_ANY,
FUNCTION_IN,
FUNCTION_NOT_IN,
FUNCTION_UNKNOWN, /// Can take any value.
Expand Down
Empty file.
30 changes: 30 additions & 0 deletions tests/queries/0_stateless/01888_bloom_filter_hasAny.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
CREATE TABLE bftest (
k Int64,
x Array(Int64),
index ix1(x) TYPE bloom_filter GRANULARITY 3
)
Engine=MergeTree
ORDER BY k;

INSERT INTO bftest SELECT number, arrayMap(i->rand64()%565656, range(10)) FROM numbers(1000);

SET force_data_skipping_indices='ix1';
SELECT count() FROM bftest WHERE has (x, 42) or has(x, -42) FORMAT Null;
SELECT count() FROM bftest WHERE hasAny(x, [42,-42]) FORMAT Null;
SELECT count() FROM bftest WHERE hasAny(x, []) FORMAT Null;
SELECT count() FROM bftest WHERE hasAny(x, [1]) FORMAT Null;

-- can't use bloom_filter with `hasAny` on non-constant arguments (just like `has`)
SELECT count() FROM bftest WHERE hasAny(x, materialize([1,2,3])) FORMAT Null; -- { serverError 277 }

-- NULLs are not Ok
SELECT count() FROM bftest WHERE hasAny(x, [NULL,-42]) FORMAT Null; -- { serverError 43 }
SELECT count() FROM bftest WHERE hasAny(x, [0,NULL]) FORMAT Null; -- { serverError 43 }

-- non-compatible types
SELECT count() FROM bftest WHERE hasAny(x, [[123], -42]) FORMAT Null; -- { serverError 386 }
SELECT count() FROM bftest WHERE hasAny(x, [toDecimal32(123, 3), 2]) FORMAT Null; -- { serverError 53 }

-- Bug discovered by AST fuzzier (fixed, shouldn't crash).
SELECT 1 FROM bftest WHERE has(x, -0.) OR 0. FORMAT Null;
SELECT count() FROM bftest WHERE hasAny(x, [0, 1]) OR 0. FORMAT Null;

0 comments on commit f3e28cb

Please sign in to comment.