From 780cccb5201f9c37eba647419c63a2a55625adfc Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 1 Sep 2024 09:16:14 -0400 Subject: [PATCH] Minor: Improve ExecutionMode documentation (#12214) * Minor: Improve ExecutionMode documentation * Update datafusion/physical-plan/src/execution_plan.rs Co-authored-by: Mehmet Ozan Kabak * Update datafusion/physical-plan/src/execution_plan.rs Co-authored-by: Mehmet Ozan Kabak --------- Co-authored-by: Mehmet Ozan Kabak --- .../physical-plan/src/execution_plan.rs | 45 +++++++++++++------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs index c1c66f6d3923..53ae59f7072f 100644 --- a/datafusion/physical-plan/src/execution_plan.rs +++ b/datafusion/physical-plan/src/execution_plan.rs @@ -482,24 +482,43 @@ impl ExecutionPlanProperties for &dyn ExecutionPlan { } } -/// Describes the execution mode of an operator's resulting stream with respect -/// to its size and behavior. There are three possible execution modes: `Bounded`, -/// `Unbounded` and `PipelineBreaking`. +/// Describes the execution mode of the result of calling +/// [`ExecutionPlan::execute`] with respect to its size and behavior. +/// +/// The mode of the execution plan is determined by the mode of its input +/// execution plans and the details of the operator itself. For example, a +/// `FilterExec` operator will have the same execution mode as its input, but a +/// `SortExec` operator may have a different execution mode than its input, +/// depending on how the input stream is sorted. +/// +/// There are three possible execution modes: `Bounded`, `Unbounded` and +/// `PipelineBreaking`. #[derive(Clone, Copy, PartialEq, Debug)] pub enum ExecutionMode { - /// Represents the mode where generated stream is bounded, e.g. finite. + /// The stream is bounded / finite. + /// + /// In this case the stream will eventually return `None` to indicate that + /// there are no more records to process. Bounded, - /// Represents the mode where generated stream is unbounded, e.g. infinite. - /// Even though the operator generates an unbounded stream of results, it - /// works with bounded memory and execution can still continue successfully. + /// The stream is unbounded / infinite. /// - /// The stream that results from calling `execute` on an `ExecutionPlan` that is `Unbounded` - /// will never be done (return `None`), except in case of error. + /// In this case, the stream will never be done (never return `None`), + /// except in case of error. + /// + /// This mode is often used in "Steaming" use cases where data is + /// incrementally processed as it arrives. + /// + /// Note that even though the operator generates an unbounded stream of + /// results, it can execute with bounded memory and incrementally produces + /// output. Unbounded, - /// Represents the mode where some of the operator's input stream(s) are - /// unbounded; however, the operator cannot generate streaming results from - /// these streaming inputs. In this case, the execution mode will be pipeline - /// breaking, e.g. the operator requires unbounded memory to generate results. + /// Some of the operator's input stream(s) are unbounded, but the operator + /// cannot generate streaming results from these streaming inputs. + /// + /// In this case, the execution mode will be pipeline breaking, e.g. the + /// operator requires unbounded memory to generate results. This + /// information is used by the planner when performing sanity checks + /// on plans processings unbounded data sources. PipelineBreaking, }