Skip to content

Commit

Permalink
fix(query): fix match function panic in native format (databendlabs#1…
Browse files Browse the repository at this point in the history
…5402)

* fix(query): fix match function panic in native format

* fix tests
  • Loading branch information
b41sh authored May 6, 2024
1 parent ff5c3f4 commit 62ff0b9
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ use std::sync::Arc;
use databend_common_exception::ErrorCode;
use databend_common_exception::Result;
use databend_common_expression::TableSchemaRef;
use databend_common_expression::SEARCH_MATCHED_COL_NAME;
use databend_common_expression::SEARCH_SCORE_COL_NAME;

use crate::optimizer::extract::Matcher;
use crate::optimizer::rule::Rule;
Expand Down Expand Up @@ -77,6 +79,13 @@ impl RulePushDownPrewhere {
.index_of(column.column.column_name.as_str())
.is_ok())
{
if column.column.column_name == SEARCH_SCORE_COL_NAME
|| column.column.column_name == SEARCH_MATCHED_COL_NAME
{
return Err(ErrorCode::StorageUnsupported(
"Prewhere don't support search functions".to_string(),
));
}
self.columns.insert(column.column.index);
return Ok(());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,50 +188,6 @@ SELECT id, score(), content FROM t WHERE match(content, 'fly')
----


# Test pruning status

statement ok
CREATE TABLE t_small_blocks (id int, content string) row_per_block=2

statement ok
CREATE INVERTED INDEX IF NOT EXISTS inverted_idx2 ON t_small_blocks(content) tokenizer = 'chinese'

statement ok
INSERT INTO t_small_blocks VALUES
(1, 'The quick brown fox jumps over the lazy dog'),
(2, 'A picture is worth a thousand words'),
(3, 'The early bird catches the worm'),
(4, 'Actions speak louder than words'),
(5, 'Time flies like an arrow; fruit flies like a banana'),
(6, 'Beauty is in the eye of the beholder'),
(7, 'When life gives you lemons, make lemonade'),
(8, 'Put all your eggs in one basket'),
(9, 'You can not judge a book by its cover'),
(10, 'An apple a day keeps the doctor away')

query IT
SELECT id, content FROM t_small_blocks WHERE query('content:"early bird"')
----
3 The early bird catches the worm

query T
EXPLAIN SELECT id, content FROM t_small_blocks WHERE query('content:"early bird"')
----
Filter
├── output columns: [t_small_blocks.id (#0), t_small_blocks.content (#1)]
├── filters: [t_small_blocks._search_matched (#2)]
├── estimated rows: 10.00
└── TableScan
├── table: default.test_index.t_small_blocks
├── output columns: [id (#0), content (#1), _search_matched (#2)]
├── read rows: 2
├── read size: < 1 KiB
├── partitions total: 5
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 5 to 5, inverted pruning: 5 to 1>]
├── push downs: [filters: [t_small_blocks._search_matched (#2)], limit: NONE]
└── estimated rows: 10.00

statement ok
CREATE TABLE books(
id int,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
## Copyright 2023 Databend Cloud
##
## Licensed under the Elastic License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
## https://www.elastic.co/licensing/elastic-license
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.

statement ok
DROP DATABASE IF EXISTS test_inverted_index_db

statement ok
CREATE DATABASE test_inverted_index_db

statement ok
USE test_inverted_index_db

statement ok
DROP TABLE IF EXISTS t1

statement ok
CREATE TABLE t1 (id int, content string) row_per_block=2 storage_format='parquet'

statement ok
CREATE INVERTED INDEX IF NOT EXISTS idx1 ON t1(content)

statement ok
INSERT INTO t1 VALUES
(1, 'The quick brown fox jumps over the lazy dog'),
(2, 'A picture is worth a thousand words'),
(3, 'The early bird catches the worm'),
(4, 'Actions speak louder than words'),
(5, 'Time flies like an arrow; fruit flies like a banana'),
(6, 'Beauty is in the eye of the beholder'),
(7, 'When life gives you lemons, make lemonade'),
(8, 'Put all your eggs in one basket'),
(9, 'You can not judge a book by its cover'),
(10, 'An apple a day keeps the doctor away')

query T
EXPLAIN SELECT id, content FROM t1 WHERE query('content:"early bird"')
----
Filter
├── output columns: [t1.id (#0), t1.content (#1)]
├── filters: [t1._search_matched (#2)]
├── estimated rows: 10.00
└── TableScan
├── table: default.test_inverted_index_db.t1
├── output columns: [id (#0), content (#1), _search_matched (#2)]
├── read rows: 2
├── read size: < 1 KiB
├── partitions total: 5
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 5 to 5, inverted pruning: 5 to 1>]
├── push downs: [filters: [t1._search_matched (#2)], limit: NONE]
└── estimated rows: 10.00

statement ok
DROP TABLE IF EXISTS t2

statement ok
CREATE TABLE t2 (id int, content string) row_per_block=2 storage_format='native'

statement ok
CREATE INVERTED INDEX IF NOT EXISTS idx1 ON t2(content)

statement ok
INSERT INTO t2 VALUES
(1, 'The quick brown fox jumps over the lazy dog'),
(2, 'A picture is worth a thousand words'),
(3, 'The early bird catches the worm'),
(4, 'Actions speak louder than words'),
(5, 'Time flies like an arrow; fruit flies like a banana'),
(6, 'Beauty is in the eye of the beholder'),
(7, 'When life gives you lemons, make lemonade'),
(8, 'Put all your eggs in one basket'),
(9, 'You can not judge a book by its cover'),
(10, 'An apple a day keeps the doctor away')

query T
EXPLAIN SELECT id, content FROM t2 WHERE query('content:"early bird"')
----
Filter
├── output columns: [t2.id (#0), t2.content (#1)]
├── filters: [t2._search_matched (#2)]
├── estimated rows: 10.00
└── TableScan
├── table: default.test_inverted_index_db.t2
├── output columns: [id (#0), content (#1), _search_matched (#2)]
├── read rows: 2
├── read size: < 1 KiB
├── partitions total: 5
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 5 to 5, inverted pruning: 5 to 1>]
├── push downs: [filters: [t2._search_matched (#2)], limit: NONE]
└── estimated rows: 10.00

statement ok
USE default

statement ok
DROP DATABASE IF EXISTS test_inverted_index_db
33 changes: 33 additions & 0 deletions tests/sqllogictests/suites/mode/standalone/explain/explain.test
Original file line number Diff line number Diff line change
Expand Up @@ -1438,12 +1438,45 @@ EvalScalar
└── limit: NONE


statement ok
drop table if exists t3;

statement ok
CREATE TABLE t3(a int, b map(string, string) null);

statement ok
INSERT INTO t3 VALUES (1, {'k1':'a', 'k2':'b'}), (2, null), (3, {'k3':'z'});

statement ok
INSERT INTO t3 VALUES (4, {'k1':'a', 'k2':'m'}), (5, null), (6, {'k3':'z'});

query I
EXPLAIN SELECT * FROM t3 WHERE b['k2'] = 'm';
----
Filter
├── output columns: [t3.a (#0), t3.b (#1)]
├── filters: [is_true(get(t3.b (#1), 'k2') = 'm')]
├── estimated rows: 1.20
└── TableScan
├── table: default.default.t3
├── output columns: [a (#0), b (#1)]
├── read rows: 3
├── read size: < 1 KiB
├── partitions total: 2
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 2 to 2>, blocks: <range pruning: 2 to 2, bloom pruning: 2 to 1>]
├── push downs: [filters: [is_true(get(t3.b (#1), 'k2') = 'm')], limit: NONE]
└── estimated rows: 6.00

statement ok
drop table t1;

statement ok
drop table t2;

statement ok
drop table t3;

statement ok
CREATE TABLE customers AS SELECT
number % 100 AS customer_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1078,3 +1078,38 @@ Sort
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [], limit: NONE]
└── estimated rows: 3.00

statement ok
drop table if exists t3;

statement ok
CREATE TABLE t3(a int, b map(string, string) null);

statement ok
INSERT INTO t3 VALUES (1, {'k1':'a', 'k2':'b'}), (2, null), (3, {'k3':'z'});

statement ok
INSERT INTO t3 VALUES (4, {'k1':'a', 'k2':'m'}), (5, null), (6, {'k3':'z'});

query I
EXPLAIN SELECT * FROM t3 WHERE b['k2'] = 'm';
----
TableScan
├── table: default.default.t3
├── output columns: [a (#0), b (#1)]
├── read rows: 3
├── read size: < 1 KiB
├── partitions total: 2
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 2 to 2>, blocks: <range pruning: 2 to 2, bloom pruning: 2 to 1>]
├── push downs: [filters: [is_true(get(t3.b (#1), 'k2') = 'm')], limit: NONE]
└── estimated rows: 1.20

statement ok
drop table t1;

statement ok
drop table t2;

statement ok
drop table t3;

0 comments on commit 62ff0b9

Please sign in to comment.