From 6628c41c3663bb22b41fee3dd10f0edd0769802d Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 20 Feb 2024 10:38:35 +0800 Subject: [PATCH] feat(metric-engine): set index options for data region (#3330) Signed-off-by: Zhenchi --- Cargo.lock | 1 + src/metric-engine/Cargo.toml | 1 + src/metric-engine/src/engine.rs | 1 + src/metric-engine/src/engine/create.rs | 4 +++ src/metric-engine/src/engine/open.rs | 6 +++- src/metric-engine/src/engine/options.rs | 44 +++++++++++++++++++++++++ 6 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 src/metric-engine/src/engine/options.rs diff --git a/Cargo.lock b/Cargo.lock index 939dd99813fe..3cc6ead1d7a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5210,6 +5210,7 @@ dependencies = [ "common-time", "datafusion", "datatypes", + "itertools 0.10.5", "lazy_static", "mito2", "mur3", diff --git a/src/metric-engine/Cargo.toml b/src/metric-engine/Cargo.toml index def5885cf908..4722fa81e2fc 100644 --- a/src/metric-engine/Cargo.toml +++ b/src/metric-engine/Cargo.toml @@ -17,6 +17,7 @@ common-telemetry.workspace = true common-time.workspace = true datafusion.workspace = true datatypes.workspace = true +itertools.workspace = true lazy_static = "1.4" mito2.workspace = true mur3 = "0.1" diff --git a/src/metric-engine/src/engine.rs b/src/metric-engine/src/engine.rs index 7cf5dc4e266f..1240b7cd6f0b 100644 --- a/src/metric-engine/src/engine.rs +++ b/src/metric-engine/src/engine.rs @@ -17,6 +17,7 @@ mod close; mod create; mod drop; mod open; +mod options; mod put; mod read; mod region_metadata; diff --git a/src/metric-engine/src/engine/create.rs b/src/metric-engine/src/engine/create.rs index 9ba33121c8f3..fbadbf5d2e18 100644 --- a/src/metric-engine/src/engine/create.rs +++ b/src/metric-engine/src/engine/create.rs @@ -36,6 +36,7 @@ use store_api::region_request::{AffectedRows, RegionCreateRequest, RegionRequest use store_api::storage::consts::ReservedColumnId; use store_api::storage::RegionId; +use crate::engine::options::set_index_options_for_data_region; use crate::engine::MetricEngineInner; use crate::error::{ ConflictRegionOptionSnafu, CreateMitoRegionSnafu, InternalColumnOccupiedSnafu, @@ -376,6 +377,9 @@ impl MetricEngineInner { data_region_request.primary_key = vec![ReservedColumnId::table_id(), ReservedColumnId::tsid()]; + // set index options + set_index_options_for_data_region(&mut data_region_request.options); + data_region_request } diff --git a/src/metric-engine/src/engine/open.rs b/src/metric-engine/src/engine/open.rs index df41d1cf12ef..952c923487bf 100644 --- a/src/metric-engine/src/engine/open.rs +++ b/src/metric-engine/src/engine/open.rs @@ -26,6 +26,7 @@ use store_api::region_request::{AffectedRows, RegionOpenRequest, RegionRequest}; use store_api::storage::RegionId; use super::MetricEngineInner; +use crate::engine::options::set_index_options_for_data_region; use crate::error::{OpenMitoRegionSnafu, Result}; use crate::metrics::{LOGICAL_REGION_COUNT, PHYSICAL_REGION_COUNT}; use crate::utils; @@ -77,9 +78,12 @@ impl MetricEngineInner { engine: MITO_ENGINE_NAME.to_string(), skip_wal_replay: request.skip_wal_replay, }; + + let mut data_region_options = request.options; + set_index_options_for_data_region(&mut data_region_options); let open_data_region_request = RegionOpenRequest { region_dir: data_region_dir, - options: request.options.clone(), + options: data_region_options, engine: MITO_ENGINE_NAME.to_string(), skip_wal_replay: request.skip_wal_replay, }; diff --git a/src/metric-engine/src/engine/options.rs b/src/metric-engine/src/engine/options.rs new file mode 100644 index 000000000000..ee071e8d48e5 --- /dev/null +++ b/src/metric-engine/src/engine/options.rs @@ -0,0 +1,44 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Specific options for the metric engine to create or open a region. + +use std::collections::HashMap; + +use itertools::Itertools as _; +use store_api::storage::consts::ReservedColumnId; +use store_api::storage::ColumnId; + +/// Ignore building index on the column `tsid` which is unfriendly to the inverted index and +/// will occupy excessive space if indexed. +const IGNORE_COLUMN_IDS_FOR_DATA_REGION: [ColumnId; 1] = [ReservedColumnId::tsid()]; + +/// The empirical value for the seg row count of the metric data region. +/// Compared to the mito engine, the pattern of the metric engine constructs smaller indices. +/// Therefore, compared to the default seg row count of 1024, by adjusting it to a smaller +/// value and appropriately increasing the size of the index, it results in an improved indexing effect. +const SEG_ROW_COUNT_FOR_DATA_REGION: u32 = 256; + +/// Set the index options for the data region. +pub fn set_index_options_for_data_region(options: &mut HashMap) { + options.insert( + "index.inverted_index.ignore_column_ids".to_string(), + IGNORE_COLUMN_IDS_FOR_DATA_REGION.iter().join(","), + ); + + options.insert( + "index.inverted_index.segment_row_count".to_string(), + SEG_ROW_COUNT_FOR_DATA_REGION.to_string(), + ); +}