From 754b66a02d71c1ebd14e1b7efa3d850746cbfcfe Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Thu, 26 Sep 2024 16:09:21 +0800 Subject: [PATCH 1/6] rename expected_vnode_count to max_parallelism --- proto/catalog.proto | 2 +- proto/stream_plan.proto | 5 +++-- .../stream_fragmenter/graph/fragment_graph.rs | 2 +- src/frontend/src/stream_fragmenter/mod.rs | 2 +- src/meta/src/rpc/ddl_controller.rs | 2 +- src/meta/src/stream/stream_graph/actor.rs | 2 +- src/meta/src/stream/stream_graph/fragment.rs | 17 +++++++++-------- src/meta/src/stream/test_fragmenter.rs | 2 +- 8 files changed, 18 insertions(+), 16 deletions(-) diff --git a/proto/catalog.proto b/proto/catalog.proto index 2bbfa39c10350..f792eccc0cab6 100644 --- a/proto/catalog.proto +++ b/proto/catalog.proto @@ -428,7 +428,7 @@ message Table { // Use `VnodeCountCompat::vnode_count` to access it. // // Please note that this field is not intended to describe the expected vnode count - // for a streaming job. Instead, refer to `stream_plan.StreamFragmentGraph.expected_vnode_count`. + // for a streaming job. Instead, refer to `stream_plan.StreamFragmentGraph.max_parallelism`. optional uint32 maybe_vnode_count = 40; // Per-table catalog version, used by schema change. `None` for internal diff --git a/proto/stream_plan.proto b/proto/stream_plan.proto index 7fe63054c565c..2323bc8b8ba5f 100644 --- a/proto/stream_plan.proto +++ b/proto/stream_plan.proto @@ -1041,7 +1041,8 @@ message StreamFragmentGraph { // If none, default parallelism will be applied. Parallelism parallelism = 6; - // Expected vnode count for the graph. + // Specified max parallelism, i.e., expected vnode count for the graph. + // // The scheduler on the meta service will use this as a hint to decide the vnode count // for each fragment. // @@ -1049,5 +1050,5 @@ message StreamFragmentGraph { // For example, a no-shuffle exchange between current fragment graph and an existing // upstream fragment graph requires two fragments to be in the same distribution, // thus the same vnode count. - uint32 expected_vnode_count = 7; + uint32 max_parallelism = 7; } diff --git a/src/frontend/src/stream_fragmenter/graph/fragment_graph.rs b/src/frontend/src/stream_fragmenter/graph/fragment_graph.rs index 6f2a07a5f3d72..78f79f1b13012 100644 --- a/src/frontend/src/stream_fragmenter/graph/fragment_graph.rs +++ b/src/frontend/src/stream_fragmenter/graph/fragment_graph.rs @@ -108,7 +108,7 @@ impl StreamFragmentGraph { dependent_table_ids: vec![], table_ids_cnt: 0, parallelism: None, - expected_vnode_count: 0, + max_parallelism: 0, } } diff --git a/src/frontend/src/stream_fragmenter/mod.rs b/src/frontend/src/stream_fragmenter/mod.rs index b671d0792e073..daa48d99969ca 100644 --- a/src/frontend/src/stream_fragmenter/mod.rs +++ b/src/frontend/src/stream_fragmenter/mod.rs @@ -144,7 +144,7 @@ pub fn build_graph(plan_node: PlanRef) -> SchedulerResult MetaResult { - let expected_vnode_count = fragment_graph.expected_vnode_count(); + let expected_vnode_count = fragment_graph.max_parallelism(); let existing_distributions = fragment_graph.existing_distribution(); // Schedule the distribution of all building fragments. diff --git a/src/meta/src/stream/stream_graph/fragment.rs b/src/meta/src/stream/stream_graph/fragment.rs index b997b54a3bbe4..a28567560b4c1 100644 --- a/src/meta/src/stream/stream_graph/fragment.rs +++ b/src/meta/src/stream/stream_graph/fragment.rs @@ -326,7 +326,8 @@ pub struct StreamFragmentGraph { /// variable. If not specified, all active worker slots will be used. specified_parallelism: Option, - /// Expected vnode count for the graph. + /// Specified max parallelism, i.e., expected vnode count for the graph. + /// /// The scheduler on the meta service will use this as a hint to decide the vnode count /// for each fragment. /// @@ -334,7 +335,7 @@ pub struct StreamFragmentGraph { /// For example, a no-shuffle exchange between current fragment graph and an existing /// upstream fragment graph requires two fragments to be in the same distribution, /// thus the same vnode count. - expected_vnode_count: usize, + max_parallelism: usize, } impl StreamFragmentGraph { @@ -410,7 +411,7 @@ impl StreamFragmentGraph { None }; - let expected_vnode_count = proto.expected_vnode_count as usize; + let max_parallelism = proto.max_parallelism as usize; Ok(Self { fragments, @@ -418,7 +419,7 @@ impl StreamFragmentGraph { upstreams, dependent_table_ids, specified_parallelism, - expected_vnode_count, + max_parallelism, }) } @@ -519,8 +520,8 @@ impl StreamFragmentGraph { } /// Get the expected vnode count of the graph. See documentation of the field for more details. - pub fn expected_vnode_count(&self) -> usize { - self.expected_vnode_count + pub fn max_parallelism(&self) -> usize { + self.max_parallelism } /// Get downstreams of a fragment. @@ -1184,7 +1185,7 @@ impl CompleteStreamFragmentGraph { } /// Get the expected vnode count of the building graph. See documentation of the field for more details. - pub(super) fn expected_vnode_count(&self) -> usize { - self.building_graph.expected_vnode_count() + pub(super) fn max_parallelism(&self) -> usize { + self.building_graph.max_parallelism() } } diff --git a/src/meta/src/stream/test_fragmenter.rs b/src/meta/src/stream/test_fragmenter.rs index 040ca0046e30f..f4bbd9ad186fa 100644 --- a/src/meta/src/stream/test_fragmenter.rs +++ b/src/meta/src/stream/test_fragmenter.rs @@ -416,7 +416,7 @@ fn make_stream_graph() -> StreamFragmentGraphProto { dependent_table_ids: vec![], table_ids_cnt: 3, parallelism: None, - expected_vnode_count: VirtualNode::COUNT_FOR_TEST as _, + max_parallelism: VirtualNode::COUNT_FOR_TEST as _, } } From 59e6c8e073ae062f00ce62b0fb98b96c8d0162ec Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Thu, 26 Sep 2024 17:35:55 +0800 Subject: [PATCH 2/6] persist table fragments max parallelism Signed-off-by: Bugen Zhao --- proto/meta.proto | 15 +++++++ src/ctl/src/cmd_impl/meta/migration.rs | 1 + .../m20240911_083152_variable_vnode_count.rs | 44 +++++++++++++------ src/meta/model_v2/src/streaming_job.rs | 1 + src/meta/src/controller/fragment.rs | 4 ++ src/meta/src/controller/streaming_job.rs | 10 +++++ src/meta/src/model/stream.rs | 21 ++++++++- src/meta/src/rpc/ddl_controller.rs | 3 ++ src/meta/src/rpc/ddl_controller_v2.rs | 9 +++- src/meta/src/stream/stream_manager.rs | 1 + 10 files changed, 94 insertions(+), 15 deletions(-) diff --git a/proto/meta.proto b/proto/meta.proto index 75a9b5d5caf0b..19f8eeb025a93 100644 --- a/proto/meta.proto +++ b/proto/meta.proto @@ -111,7 +111,22 @@ message TableFragments { map actor_splits = 5; stream_plan.StreamContext ctx = 6; + TableParallelism parallelism = 7; + // The max parallelism specified when the streaming job was created, i.e., expected vnode count. + // + // The reason for persisting this value is mainly to check if a parallelism change (via `ALTER + // .. SET PARALLELISM`) is valid, so that the behavior can be consistent with the creation of + // the streaming job. + // + // Note that the actual vnode count, denoted by `vnode_count` in `fragments`, may be different + // from this value (see `StreamFragmentGraph.max_parallelism` for more details.). As a result, + // checking the parallelism change with this value can be inaccurate in some cases. However, + // when generating resizing plans, we still take the `vnode_count` of each fragment into account. + // + // Can be unset if the fragment is created in older versions where variable vnode count is not + // supported, in which case a default value of 256 should be used. + optional uint32 max_parallelism = 10; // Actors of a materialize view, sink, or table can only be scheduled on nodes with matching node_label. string node_label = 8; diff --git a/src/ctl/src/cmd_impl/meta/migration.rs b/src/ctl/src/cmd_impl/meta/migration.rs index 3e8da33e044ff..868e6a218fe78 100644 --- a/src/ctl/src/cmd_impl/meta/migration.rs +++ b/src/ctl/src/cmd_impl/meta/migration.rs @@ -439,6 +439,7 @@ pub async fn migrate(from: EtcdBackend, target: String, force_clean: bool) -> an create_type: Set(CreateType::Foreground), timezone: Set(table_fragment.timezone()), parallelism: Set(streaming_parallelism), + max_parallelism: Set(table_fragment.max_parallelism as _), }) .exec(&meta_store_sql.conn) .await?; diff --git a/src/meta/model_v2/migration/src/m20240911_083152_variable_vnode_count.rs b/src/meta/model_v2/migration/src/m20240911_083152_variable_vnode_count.rs index 4a30b0828f9ba..5b0aad3ea5586 100644 --- a/src/meta/model_v2/migration/src/m20240911_083152_variable_vnode_count.rs +++ b/src/meta/model_v2/migration/src/m20240911_083152_variable_vnode_count.rs @@ -1,6 +1,10 @@ use sea_orm_migration::prelude::{Table as MigrationTable, *}; -const VNODE_COUNT: i32 = 256; +macro_rules! col { + ($name:expr) => { + ColumnDef::new($name).integer().not_null().default(256) // compat vnode count + }; +} #[derive(DeriveMigrationName)] pub struct Migration; @@ -12,12 +16,7 @@ impl MigrationTrait for Migration { .alter_table( MigrationTable::alter() .table(Table::Table) - .add_column( - ColumnDef::new(Table::VnodeCount) - .integer() - .not_null() - .default(VNODE_COUNT), - ) + .add_column(col!(Table::VnodeCount)) .to_owned(), ) .await?; @@ -26,12 +25,16 @@ impl MigrationTrait for Migration { .alter_table( MigrationTable::alter() .table(Fragment::Table) - .add_column( - ColumnDef::new(Fragment::VnodeCount) - .integer() - .not_null() - .default(VNODE_COUNT), - ) + .add_column(col!(Fragment::VnodeCount)) + .to_owned(), + ) + .await?; + + manager + .alter_table( + MigrationTable::alter() + .table(StreamingJob::Table) + .add_column(col!(StreamingJob::MaxParallelism)) .to_owned(), ) .await @@ -54,6 +57,15 @@ impl MigrationTrait for Migration { .drop_column(Fragment::VnodeCount) .to_owned(), ) + .await?; + + manager + .alter_table( + MigrationTable::alter() + .table(StreamingJob::Table) + .drop_column(StreamingJob::MaxParallelism) + .to_owned(), + ) .await } } @@ -69,3 +81,9 @@ enum Table { Table, VnodeCount, } + +#[derive(DeriveIden)] +enum StreamingJob { + Table, + MaxParallelism, +} diff --git a/src/meta/model_v2/src/streaming_job.rs b/src/meta/model_v2/src/streaming_job.rs index 1ab9225f43cda..d3d0b2b48cc13 100644 --- a/src/meta/model_v2/src/streaming_job.rs +++ b/src/meta/model_v2/src/streaming_job.rs @@ -26,6 +26,7 @@ pub struct Model { pub create_type: CreateType, pub timezone: Option, pub parallelism: StreamingParallelism, + pub max_parallelism: i32, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] diff --git a/src/meta/src/controller/fragment.rs b/src/meta/src/controller/fragment.rs index 6d1d9ceba40e2..89e293540bd80 100644 --- a/src/meta/src/controller/fragment.rs +++ b/src/meta/src/controller/fragment.rs @@ -315,6 +315,7 @@ impl CatalogController { HashMap>, )>, parallelism: StreamingParallelism, + max_parallelism: usize, ) -> MetaResult { let mut pb_fragments = HashMap::new(); let mut pb_actor_splits = HashMap::new(); @@ -347,6 +348,7 @@ impl CatalogController { ), node_label: "".to_string(), backfill_done: true, + max_parallelism: Some(max_parallelism as _), }; Ok(table_fragments) @@ -669,6 +671,7 @@ impl CatalogController { job_info.timezone.map(|tz| PbStreamContext { timezone: tz }), fragment_info, job_info.parallelism.clone(), + job_info.max_parallelism as _, ) } @@ -790,6 +793,7 @@ impl CatalogController { job.timezone.map(|tz| PbStreamContext { timezone: tz }), fragment_info, job.parallelism.clone(), + job.max_parallelism as _, )?, ); } diff --git a/src/meta/src/controller/streaming_job.rs b/src/meta/src/controller/streaming_job.rs index e3f4f711bb277..6bf2606a9354b 100644 --- a/src/meta/src/controller/streaming_job.rs +++ b/src/meta/src/controller/streaming_job.rs @@ -83,6 +83,7 @@ impl CatalogController { create_type: PbCreateType, ctx: &StreamContext, streaming_parallelism: StreamingParallelism, + max_parallelism: usize, ) -> MetaResult { let obj = Self::create_object(txn, obj_type, owner_id, database_id, schema_id).await?; let job = streaming_job::ActiveModel { @@ -91,6 +92,7 @@ impl CatalogController { create_type: Set(create_type.into()), timezone: Set(ctx.timezone.clone()), parallelism: Set(streaming_parallelism), + max_parallelism: Set(max_parallelism as _), }; job.insert(txn).await?; @@ -102,6 +104,7 @@ impl CatalogController { streaming_job: &mut StreamingJob, ctx: &StreamContext, parallelism: &Option, + max_parallelism: usize, ) -> MetaResult<()> { let inner = self.inner.write().await; let txn = inner.db.begin().await?; @@ -169,6 +172,7 @@ impl CatalogController { create_type, ctx, streaming_parallelism, + max_parallelism, ) .await?; table.id = job_id as _; @@ -204,6 +208,7 @@ impl CatalogController { create_type, ctx, streaming_parallelism, + max_parallelism, ) .await?; sink.id = job_id as _; @@ -220,6 +225,7 @@ impl CatalogController { create_type, ctx, streaming_parallelism, + max_parallelism, ) .await?; table.id = job_id as _; @@ -255,6 +261,7 @@ impl CatalogController { create_type, ctx, streaming_parallelism, + max_parallelism, ) .await?; // to be compatible with old implementation. @@ -285,6 +292,7 @@ impl CatalogController { create_type, ctx, streaming_parallelism, + max_parallelism, ) .await?; src.id = job_id as _; @@ -631,6 +639,7 @@ impl CatalogController { ctx: &StreamContext, version: &PbTableVersion, specified_parallelism: &Option, + max_parallelism: usize, ) -> MetaResult { let id = streaming_job.id(); let inner = self.inner.write().await; @@ -685,6 +694,7 @@ impl CatalogController { PbCreateType::Foreground, ctx, parallelism, + max_parallelism, ) .await?; diff --git a/src/meta/src/model/stream.rs b/src/meta/src/model/stream.rs index aaff076688785..954ee820e6a7f 100644 --- a/src/meta/src/model/stream.rs +++ b/src/meta/src/model/stream.rs @@ -17,7 +17,7 @@ use std::ops::AddAssign; use itertools::Itertools; use risingwave_common::catalog::TableId; -use risingwave_common::hash::WorkerSlotId; +use risingwave_common::hash::{VirtualNode, WorkerSlotId}; use risingwave_connector::source::SplitImpl; use risingwave_pb::common::PbActorLocation; use risingwave_pb::meta::table_fragments::actor_status::ActorState; @@ -115,6 +115,18 @@ pub struct TableFragments { /// The parallelism assigned to this table fragments pub assigned_parallelism: TableParallelism, + + /// The max parallelism specified when the streaming job was created, i.e., expected vnode count. + /// + /// The reason for persisting this value is mainly to check if a parallelism change (via `ALTER + /// .. SET PARALLELISM`) is valid, so that the behavior can be consistent with the creation of + /// the streaming job. + /// + /// Note that the actual vnode count, denoted by `vnode_count` in `fragments`, may be different + /// from this value (see `StreamFragmentGraph.max_parallelism` for more details.). As a result, + /// checking the parallelism change with this value can be inaccurate in some cases. However, + /// when generating resizing plans, we still take the `vnode_count` of each fragment into account. + pub max_parallelism: usize, } #[derive(Debug, Clone, Default)] @@ -167,6 +179,7 @@ impl MetadataModel for TableFragments { parallelism: Some(self.assigned_parallelism.into()), node_label: "".to_string(), backfill_done: true, + max_parallelism: Some(self.max_parallelism as _), } } @@ -187,6 +200,9 @@ impl MetadataModel for TableFragments { actor_splits: build_actor_split_impls(&prost.actor_splits), ctx, assigned_parallelism: prost.parallelism.unwrap_or(default_parallelism).into(), + max_parallelism: prost + .max_parallelism + .map_or(VirtualNode::COUNT_FOR_COMPAT, |v| v as _), } } @@ -204,6 +220,7 @@ impl TableFragments { &BTreeMap::new(), StreamContext::default(), TableParallelism::Adaptive, + VirtualNode::COUNT_FOR_TEST, ) } @@ -215,6 +232,7 @@ impl TableFragments { actor_locations: &BTreeMap, ctx: StreamContext, table_parallelism: TableParallelism, + max_parallelism: usize, ) -> Self { let actor_status = actor_locations .iter() @@ -237,6 +255,7 @@ impl TableFragments { actor_splits: HashMap::default(), ctx, assigned_parallelism: table_parallelism, + max_parallelism, } } diff --git a/src/meta/src/rpc/ddl_controller.rs b/src/meta/src/rpc/ddl_controller.rs index 6e4b89591868e..2b288c25a8a1a 100644 --- a/src/meta/src/rpc/ddl_controller.rs +++ b/src/meta/src/rpc/ddl_controller.rs @@ -1693,6 +1693,7 @@ impl DdlController { &building_locations.actor_locations, stream_ctx.clone(), table_parallelism, + max_parallelism.get(), ); if let Some(mview_fragment) = table_fragments.mview_fragment() { @@ -1727,6 +1728,7 @@ impl DdlController { &stream_ctx, table.get_version()?, &fragment_graph.specified_parallelism(), + fragment_graph.max_parallelism(), ) .await? as u32 } @@ -2178,6 +2180,7 @@ impl DdlController { &building_locations.actor_locations, stream_ctx, old_table_fragments.assigned_parallelism, + old_table_fragments.max_parallelism, ); // Note: no need to set `vnode_count` as it's already set by the frontend. diff --git a/src/meta/src/rpc/ddl_controller_v2.rs b/src/meta/src/rpc/ddl_controller_v2.rs index 5e83e49b767a7..43abd22fba8dd 100644 --- a/src/meta/src/rpc/ddl_controller_v2.rs +++ b/src/meta/src/rpc/ddl_controller_v2.rs @@ -46,7 +46,12 @@ impl DdlController { let ctx = StreamContext::from_protobuf(fragment_graph.get_ctx().unwrap()); mgr.catalog_controller - .create_job_catalog(&mut streaming_job, &ctx, &fragment_graph.parallelism) + .create_job_catalog( + &mut streaming_job, + &ctx, + &fragment_graph.parallelism, + fragment_graph.max_parallelism as _, + ) .await?; let job_id = streaming_job.id(); @@ -296,6 +301,7 @@ impl DdlController { &stream_ctx, table.get_version()?, &fragment_graph.specified_parallelism(), + fragment_graph.max_parallelism(), ) .await? as u32; @@ -440,6 +446,7 @@ impl DdlController { &ctx, table.get_version()?, &fragment_graph.specified_parallelism(), + fragment_graph.max_parallelism(), ) .await?; diff --git a/src/meta/src/stream/stream_manager.rs b/src/meta/src/stream/stream_manager.rs index f12de49cccbbe..0511e0e4de379 100644 --- a/src/meta/src/stream/stream_manager.rs +++ b/src/meta/src/stream/stream_manager.rs @@ -1098,6 +1098,7 @@ mod tests { &locations.actor_locations, Default::default(), TableParallelism::Adaptive, + VirtualNode::COUNT_FOR_TEST, ); let ctx = CreateStreamingJobContext { building_locations: locations, From 4d745eae5f5c9f842172216f3aa99f46e8d9177c Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Thu, 26 Sep 2024 18:01:05 +0800 Subject: [PATCH 3/6] fetch max parallelism to check alter parallelism Signed-off-by: Bugen Zhao --- src/meta/src/controller/fragment.rs | 9 +++++++++ src/meta/src/manager/metadata.rs | 20 +++++++++++++++++++- src/meta/src/stream/stream_manager.rs | 11 +++++++---- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/meta/src/controller/fragment.rs b/src/meta/src/controller/fragment.rs index 89e293540bd80..22cab97ce6941 100644 --- a/src/meta/src/controller/fragment.rs +++ b/src/meta/src/controller/fragment.rs @@ -692,6 +692,15 @@ impl CatalogController { Ok(job_states) } + pub async fn get_max_parallelism_by_id(&self, job_id: ObjectId) -> MetaResult { + let inner = self.inner.read().await; + let job = StreamingJob::find_by_id(job_id) + .one(&inner.db) + .await? + .ok_or_else(|| anyhow::anyhow!("job {} not found in database", job_id))?; + Ok(job.max_parallelism as usize) + } + /// Get all actor ids in the target streaming jobs. pub async fn get_job_actor_mapping( &self, diff --git a/src/meta/src/manager/metadata.rs b/src/meta/src/manager/metadata.rs index 78df862e1d8e5..a3d7f6492c6d8 100644 --- a/src/meta/src/manager/metadata.rs +++ b/src/meta/src/manager/metadata.rs @@ -16,7 +16,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::pin::pin; use std::time::Duration; -use anyhow::anyhow; +use anyhow::{anyhow, Context}; use futures::future::{select, Either}; use risingwave_common::catalog::{TableId, TableOption}; use risingwave_meta_model_v2::{ObjectId, SourceId}; @@ -892,6 +892,24 @@ impl MetadataManager { } } + pub async fn get_table_max_parallelism(&self, table_id: TableId) -> MetaResult { + match self { + MetadataManager::V1(mgr) => { + let fragments = mgr.fragment_manager.get_fragment_read_guard().await; + Ok(fragments + .table_fragments() + .get(&table_id) + .map(|tf| tf.max_parallelism) + .with_context(|| format!("job {table_id} not found"))?) + } + MetadataManager::V2(mgr) => { + mgr.catalog_controller + .get_max_parallelism_by_id(table_id.table_id as _) + .await + } + } + } + pub fn cluster_id(&self) -> &ClusterId { match self { MetadataManager::V1(mgr) => mgr.cluster_manager.cluster_id(), diff --git a/src/meta/src/stream/stream_manager.rs b/src/meta/src/stream/stream_manager.rs index 0511e0e4de379..2e035cfafd51d 100644 --- a/src/meta/src/stream/stream_manager.rs +++ b/src/meta/src/stream/stream_manager.rs @@ -19,7 +19,6 @@ use futures::future::join_all; use itertools::Itertools; use risingwave_common::bail; use risingwave_common::catalog::TableId; -use risingwave_common::hash::VirtualNode; use risingwave_meta_model_v2::ObjectId; use risingwave_pb::catalog::{CreateType, Subscription, Table}; use risingwave_pb::stream_plan::update_mutation::MergeUpdate; @@ -665,6 +664,8 @@ impl GlobalStreamManager { ) -> MetaResult<()> { let _reschedule_job_lock = self.reschedule_lock_write_guard().await; + let table_id = TableId::new(table_id); + let worker_nodes = self .metadata_manager .list_active_streaming_compute_nodes() @@ -683,8 +684,10 @@ impl GlobalStreamManager { .iter() .map(|w| w.parallelism as usize) .sum::(); - // TODO(var-vnode): get correct max parallelism from catalogs. - let max_parallelism = VirtualNode::COUNT_FOR_COMPAT; + let max_parallelism = self + .metadata_manager + .get_table_max_parallelism(table_id) + .await?; match parallelism { TableParallelism::Adaptive => { @@ -709,7 +712,7 @@ impl GlobalStreamManager { } } - let table_parallelism_assignment = HashMap::from([(TableId::new(table_id), parallelism)]); + let table_parallelism_assignment = HashMap::from([(table_id, parallelism)]); if deferred { tracing::debug!( From 48f5f3c6c9536a6a808af5e12dae030f19520ac8 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Fri, 27 Sep 2024 16:44:17 +0800 Subject: [PATCH 4/6] update test Signed-off-by: Bugen Zhao --- .../scale/streaming_parallelism.rs | 41 +++++++++++++++---- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/src/tests/simulation/tests/integration_tests/scale/streaming_parallelism.rs b/src/tests/simulation/tests/integration_tests/scale/streaming_parallelism.rs index 1604de034b6da..4dc1564ead2bd 100644 --- a/src/tests/simulation/tests/integration_tests/scale/streaming_parallelism.rs +++ b/src/tests/simulation/tests/integration_tests/scale/streaming_parallelism.rs @@ -177,12 +177,20 @@ async fn test_parallelism_exceed_virtual_node_max_create() -> Result<()> { #[tokio::test] async fn test_parallelism_exceed_virtual_node_max_alter_fixed() -> Result<()> { - let vnode_max = VirtualNode::COUNT_FOR_COMPAT; + const MAX_PARALLELISM: usize = 512; + let mut configuration = Configuration::for_scale(); configuration.compute_nodes = 1; - configuration.compute_node_cores = vnode_max + 100; + configuration.compute_node_cores = MAX_PARALLELISM + 100; let mut cluster = Cluster::start(configuration).await?; let mut session = cluster.start_session(); + + session + .run(format!( + "set streaming_max_parallelism = {}", + MAX_PARALLELISM + )) + .await?; session.run("set streaming_parallelism = 1").await?; session.run("create table t(v int)").await?; session @@ -190,13 +198,30 @@ async fn test_parallelism_exceed_virtual_node_max_alter_fixed() -> Result<()> { .await? .assert_result_eq("FIXED(1)"); - let result = session - .run(format!("alter table t set parallelism = {}", vnode_max + 1)) - .await; + { + // Set to the max parallelism, should be accepted. + session + .run(format!( + "alter table t set parallelism = {}", + MAX_PARALLELISM + )) + .await?; + session + .run("select parallelism from rw_streaming_parallelism where name = 't'") + .await? + .assert_result_eq(format!("FIXED({})", MAX_PARALLELISM)); + } - // This should be rejected. - // TODO(var-vnode): showing that it's rejected for different vnode counts. - assert!(result.is_err(), "{result:?}"); + { + // Exceed the max parallelism, should be rejected. + let result = session + .run(format!( + "alter table t set parallelism = {}", + MAX_PARALLELISM + 1 + )) + .await; + assert!(result.is_err(), "{result:?}"); + } Ok(()) } From 9736d32b23d27612c9ef5aa87e8f4232856b4947 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Fri, 27 Sep 2024 17:10:46 +0800 Subject: [PATCH 5/6] add e2e test Signed-off-by: Bugen Zhao --- e2e_test/ddl/max_parallelism.slt | 70 ++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 e2e_test/ddl/max_parallelism.slt diff --git a/e2e_test/ddl/max_parallelism.slt b/e2e_test/ddl/max_parallelism.slt new file mode 100644 index 0000000000000..45aa58185f31b --- /dev/null +++ b/e2e_test/ddl/max_parallelism.slt @@ -0,0 +1,70 @@ +statement ok +create view table_parallelism as select t.name, tf.parallelism from rw_tables t, rw_table_fragments tf where t.id = tf.table_id; + +#### BEGIN + + +statement ok +set streaming_max_parallelism to 4; + +# When the parallelism is specified to a value greater than the max parallelism, return an error. +statement ok +set streaming_parallelism to 6; + +statement error specified parallelism 6 should not exceed max parallelism 4 +create table t; + +# When the parallelism is specified to an valid value, ok. +statement ok +set streaming_parallelism to 4; + +statement ok +create table t; + +query T +select parallelism from table_parallelism where name = 't'; +---- +FIXED(4) + +statement ok +drop table t; + +# When no parallelism is specified, ok, and the parallelism will be adaptive. + +statement ok +set streaming_parallelism to default; + +statement ok +create table t; + +query T +select parallelism from table_parallelism where name = 't'; +---- +ADAPTIVE + +# Alter parallelism to a valid value, ok. +statement ok +alter table t set parallelism to 4; + +query T +select parallelism from table_parallelism where name = 't'; +---- +FIXED(4) + +# Alter parallelism to an invalid value, return an error. +statement error specified parallelism 8 should not exceed max parallelism 4 +alter table t set parallelism to 8; + +statement ok +drop table t; + +#### END + +statement ok +set streaming_max_parallelism to default; + +statement ok +set streaming_parallelism to default; + +statement ok +drop view table_parallelism; From 10f8a87606c2f85acb2aa895665a5203063ec4e2 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Sun, 29 Sep 2024 17:20:31 +0800 Subject: [PATCH 6/6] rename method Signed-off-by: Bugen Zhao --- src/meta/src/manager/metadata.rs | 2 +- src/meta/src/stream/stream_manager.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/meta/src/manager/metadata.rs b/src/meta/src/manager/metadata.rs index a3d7f6492c6d8..11d407ff39e1b 100644 --- a/src/meta/src/manager/metadata.rs +++ b/src/meta/src/manager/metadata.rs @@ -892,7 +892,7 @@ impl MetadataManager { } } - pub async fn get_table_max_parallelism(&self, table_id: TableId) -> MetaResult { + pub async fn get_job_max_parallelism(&self, table_id: TableId) -> MetaResult { match self { MetadataManager::V1(mgr) => { let fragments = mgr.fragment_manager.get_fragment_read_guard().await; diff --git a/src/meta/src/stream/stream_manager.rs b/src/meta/src/stream/stream_manager.rs index 2e035cfafd51d..63a6b3e228b1a 100644 --- a/src/meta/src/stream/stream_manager.rs +++ b/src/meta/src/stream/stream_manager.rs @@ -686,7 +686,7 @@ impl GlobalStreamManager { .sum::(); let max_parallelism = self .metadata_manager - .get_table_max_parallelism(table_id) + .get_job_max_parallelism(table_id) .await?; match parallelism {