Skip to content

Commit

Permalink
feat(query): match function support multiple fields with boost (datab…
Browse files Browse the repository at this point in the history
…endlabs#15196)

* feat(query): match function support multiple fields with boost

* fix

* fix test
  • Loading branch information
b41sh authored Apr 16, 2024
1 parent 3305810 commit 1c5286b
Show file tree
Hide file tree
Showing 8 changed files with 506 additions and 89 deletions.
13 changes: 12 additions & 1 deletion src/query/catalog/src/plan/pushdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use std::fmt::Debug;

use databend_common_expression::types::DataType;
use databend_common_expression::types::F32;
use databend_common_expression::DataSchema;
use databend_common_expression::RemoteExpr;
use databend_common_expression::Scalar;
Expand Down Expand Up @@ -69,12 +70,22 @@ pub struct PrewhereInfo {
pub virtual_columns: Option<Vec<VirtualColumnInfo>>,
}

/// Information about inverted index.
#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)]
pub struct InvertedIndexInfo {
/// The index name.
pub index_name: String,
/// The index version.
pub index_version: String,
/// The index schema.
pub index_schema: DataSchema,
pub query_columns: Vec<String>,
/// The query field names and optional boost value,
/// if boost is set, the score for the field is multiplied by the boost value.
/// For example, if set `title^5.0, description^2.0`,
/// it means that the score for `title` field is multiplied by 5.0,
/// and the score for `description` field is multiplied by 2.0.
pub query_fields: Vec<(String, Option<F32>)>,
/// The search query text with query syntax.
pub query_text: String,
}

Expand Down
4 changes: 2 additions & 2 deletions src/query/ee/tests/it/inverted_index/index_refresh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ async fn test_fuse_do_refresh_inverted_index() -> Result<()> {

let dal = new_fuse_table.get_operator_ref();
let schema = DataSchema::from(table_schema);
let query_columns = vec!["title".to_string(), "content".to_string()];
let query_fields = vec![("title".to_string(), None), ("content".to_string(), None)];

let index_loc = TableMetaLocationGenerator::gen_inverted_index_location_from_block_location(
&block_meta.location.0,
Expand All @@ -135,7 +135,7 @@ async fn test_fuse_do_refresh_inverted_index() -> Result<()> {
);

let index_reader =
InvertedIndexReader::try_create(dal.clone(), &schema, &query_columns, &index_loc).await?;
InvertedIndexReader::try_create(dal.clone(), &schema, &query_fields, &index_loc).await?;

let query = "rust";
let matched_rows = index_reader.do_filter(query, block_meta.row_count)?;
Expand Down
50 changes: 40 additions & 10 deletions src/query/ee/tests/it/inverted_index/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use databend_common_exception::Result;
use databend_common_expression::types::number::UInt64Type;
use databend_common_expression::types::NumberDataType;
use databend_common_expression::types::StringType;
use databend_common_expression::types::F32;
use databend_common_expression::DataBlock;
use databend_common_expression::DataSchema;
use databend_common_expression::FromData;
Expand Down Expand Up @@ -446,7 +447,7 @@ async fn test_block_pruner() -> Result<()> {
index_name: index_name.clone(),
index_version: index_version.clone(),
index_schema: index_schema.clone(),
query_columns: vec!["idiom".to_string()],
query_fields: vec![("idiom".to_string(), None)],
query_text: "test".to_string(),
}),
..Default::default()
Expand All @@ -456,7 +457,7 @@ async fn test_block_pruner() -> Result<()> {
index_name: index_name.clone(),
index_version: index_version.clone(),
index_schema: index_schema.clone(),
query_columns: vec!["idiom".to_string()],
query_fields: vec![("idiom".to_string(), None)],
query_text: "save".to_string(),
}),
..Default::default()
Expand All @@ -466,7 +467,7 @@ async fn test_block_pruner() -> Result<()> {
index_name: index_name.clone(),
index_version: index_version.clone(),
index_schema: index_schema.clone(),
query_columns: vec!["idiom".to_string()],
query_fields: vec![("idiom".to_string(), None)],
query_text: "one".to_string(),
}),
..Default::default()
Expand All @@ -476,7 +477,7 @@ async fn test_block_pruner() -> Result<()> {
index_name: index_name.clone(),
index_version: index_version.clone(),
index_schema: index_schema.clone(),
query_columns: vec!["idiom".to_string()],
query_fields: vec![("idiom".to_string(), None)],
query_text: "the".to_string(),
}),
..Default::default()
Expand All @@ -486,7 +487,7 @@ async fn test_block_pruner() -> Result<()> {
index_name: index_name.clone(),
index_version: index_version.clone(),
index_schema: index_schema.clone(),
query_columns: vec!["idiom".to_string()],
query_fields: vec![("idiom".to_string(), None)],
query_text: "光阴".to_string(),
}),
..Default::default()
Expand All @@ -496,7 +497,7 @@ async fn test_block_pruner() -> Result<()> {
index_name: index_name.clone(),
index_version: index_version.clone(),
index_schema: index_schema.clone(),
query_columns: vec!["idiom".to_string()],
query_fields: vec![("idiom".to_string(), None)],
query_text: "人生".to_string(),
}),
..Default::default()
Expand All @@ -506,7 +507,7 @@ async fn test_block_pruner() -> Result<()> {
index_name: index_name.clone(),
index_version: index_version.clone(),
index_schema: index_schema.clone(),
query_columns: vec!["meaning".to_string()],
query_fields: vec![("meaning".to_string(), None)],
query_text: "people".to_string(),
}),
..Default::default()
Expand All @@ -516,7 +517,7 @@ async fn test_block_pruner() -> Result<()> {
index_name: index_name.clone(),
index_version: index_version.clone(),
index_schema: index_schema.clone(),
query_columns: vec!["meaning".to_string()],
query_fields: vec![("meaning".to_string(), None)],
query_text: "bad".to_string(),
}),
..Default::default()
Expand All @@ -526,7 +527,7 @@ async fn test_block_pruner() -> Result<()> {
index_name: index_name.clone(),
index_version: index_version.clone(),
index_schema: index_schema.clone(),
query_columns: vec!["meaning".to_string()],
query_fields: vec![("meaning".to_string(), None)],
query_text: "黄金".to_string(),
}),
..Default::default()
Expand All @@ -536,11 +537,37 @@ async fn test_block_pruner() -> Result<()> {
index_name: index_name.clone(),
index_version: index_version.clone(),
index_schema: index_schema.clone(),
query_columns: vec!["meaning".to_string()],
query_fields: vec![("meaning".to_string(), None)],
query_text: "时间".to_string(),
}),
..Default::default()
};
let e11 = PushDownInfo {
inverted_index: Some(InvertedIndexInfo {
index_name: index_name.clone(),
index_version: index_version.clone(),
index_schema: index_schema.clone(),
query_fields: vec![
("idiom".to_string(), Some(F32::from(5.0))),
("meaning".to_string(), Some(F32::from(1.0))),
],
query_text: "you".to_string(),
}),
..Default::default()
};
let e12 = PushDownInfo {
inverted_index: Some(InvertedIndexInfo {
index_name: index_name.clone(),
index_version: index_version.clone(),
index_schema: index_schema.clone(),
query_fields: vec![
("idiom".to_string(), Some(F32::from(5.0))),
("meaning".to_string(), Some(F32::from(1.0))),
],
query_text: "光阴".to_string(),
}),
..Default::default()
};
let extras = vec![
(Some(e1), 0, 0),
(Some(e2), 2, 2),
Expand All @@ -552,7 +579,10 @@ async fn test_block_pruner() -> Result<()> {
(Some(e8), 4, 4),
(Some(e9), 1, 2),
(Some(e10), 2, 2),
(Some(e11), 9, 15),
(Some(e12), 2, 2),
];

for (extra, expected_blocks, expected_rows) in extras {
let block_metas = apply_block_pruning(
snapshot.clone(),
Expand Down
2 changes: 1 addition & 1 deletion src/query/functions/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ pub const GENERAL_LAMBDA_FUNCTIONS: [&str; 5] = [
"array_reduce",
];

pub const GENERAL_SEARCH_FUNCTIONS: [&str; 2] = ["match", "score"];
pub const GENERAL_SEARCH_FUNCTIONS: [&str; 3] = ["match", "query", "score"];

fn builtin_functions() -> FunctionRegistry {
let mut registry = FunctionRegistry::empty();
Expand Down
Loading

0 comments on commit 1c5286b

Please sign in to comment.