From 018afe8aceb4f149e62f4315a47294dffcfc18d3 Mon Sep 17 00:00:00 2001 From: Adrian Tanase Date: Thu, 23 May 2024 12:11:08 +0300 Subject: [PATCH] clippy-tracing --- datafusion/core/src/bin/print_config_docs.rs | 1 + .../core/src/catalog/information_schema.rs | 40 ++++ datafusion/core/src/catalog/listing_schema.rs | 9 + datafusion/core/src/catalog/mod.rs | 16 ++ datafusion/core/src/catalog/schema.rs | 8 + datafusion/core/src/dataframe/mod.rs | 61 ++++++ datafusion/core/src/dataframe/parquet.rs | 1 + .../avro_to_arrow/arrow_array_reader.rs | 26 +++ .../core/src/datasource/avro_to_arrow/mod.rs | 2 + .../src/datasource/avro_to_arrow/reader.rs | 12 ++ .../src/datasource/avro_to_arrow/schema.rs | 7 + .../core/src/datasource/cte_worktable.rs | 7 + .../src/datasource/default_table_source.rs | 9 + datafusion/core/src/datasource/empty.rs | 6 + .../core/src/datasource/file_format/arrow.rs | 14 ++ .../core/src/datasource/file_format/avro.rs | 5 + .../core/src/datasource/file_format/csv.rs | 37 ++++ .../file_format/file_compression_type.rs | 9 + .../core/src/datasource/file_format/json.rs | 23 +++ .../core/src/datasource/file_format/mod.rs | 16 ++ .../src/datasource/file_format/options.rs | 42 +++++ .../src/datasource/file_format/parquet.rs | 60 ++++++ .../src/datasource/file_format/write/demux.rs | 9 + .../src/datasource/file_format/write/mod.rs | 4 + .../file_format/write/orchestration.rs | 3 + datafusion/core/src/datasource/function.rs | 3 + .../core/src/datasource/listing/helpers.rs | 7 + datafusion/core/src/datasource/listing/mod.rs | 6 + .../core/src/datasource/listing/table.rs | 42 +++++ datafusion/core/src/datasource/listing/url.rs | 18 ++ .../src/datasource/listing_table_factory.rs | 3 + datafusion/core/src/datasource/memory.rs | 20 ++ datafusion/core/src/datasource/mod.rs | 1 + .../datasource/physical_plan/arrow_file.rs | 16 ++ .../core/src/datasource/physical_plan/avro.rs | 16 ++ .../core/src/datasource/physical_plan/csv.rs | 29 +++ .../datasource/physical_plan/file_groups.rs | 14 ++ .../physical_plan/file_scan_config.rs | 18 ++ .../datasource/physical_plan/file_stream.rs | 20 ++ .../core/src/datasource/physical_plan/json.rs | 21 +++ .../core/src/datasource/physical_plan/mod.rs | 17 ++ .../physical_plan/parquet/metrics.rs | 1 + .../datasource/physical_plan/parquet/mod.rs | 55 ++++++ .../physical_plan/parquet/page_filter.rs | 13 ++ .../physical_plan/parquet/row_filter.rs | 12 ++ .../physical_plan/parquet/row_groups.rs | 31 ++++ .../physical_plan/parquet/statistics.rs | 23 +++ .../datasource/physical_plan/statistics.rs | 8 + datafusion/core/src/datasource/provider.rs | 2 + datafusion/core/src/datasource/statistics.rs | 7 + datafusion/core/src/datasource/stream.rs | 23 +++ datafusion/core/src/datasource/streaming.rs | 6 + datafusion/core/src/datasource/view.rs | 10 + datafusion/core/src/execution/context/avro.rs | 2 + datafusion/core/src/execution/context/csv.rs | 3 + datafusion/core/src/execution/context/json.rs | 3 + datafusion/core/src/execution/context/mod.rs | 174 ++++++++++++++++++ .../core/src/execution/context/parquet.rs | 3 + .../aggregate_statistics.rs | 17 ++ .../physical_optimizer/coalesce_batches.rs | 4 + .../combine_partial_final_agg.rs | 13 ++ .../physical_optimizer/convert_first_last.rs | 7 + .../enforce_distribution.rs | 54 ++++++ .../src/physical_optimizer/enforce_sorting.rs | 18 ++ .../src/physical_optimizer/join_selection.rs | 33 ++++ .../limited_distinct_aggregation.rs | 11 ++ .../core/src/physical_optimizer/optimizer.rs | 3 + .../physical_optimizer/output_requirements.rs | 22 +++ .../physical_optimizer/pipeline_checker.rs | 6 + .../physical_optimizer/projection_pushdown.rs | 49 +++++ .../core/src/physical_optimizer/pruning.rs | 84 +++++++++ .../replace_with_order_preserving_variants.rs | 18 ++ .../src/physical_optimizer/sort_pushdown.rs | 7 + .../core/src/physical_optimizer/test_utils.rs | 28 +++ .../physical_optimizer/topk_aggregation.rs | 7 + .../core/src/physical_optimizer/utils.rs | 9 + datafusion/core/src/physical_planner.rs | 54 ++++++ datafusion/core/src/test/mod.rs | 24 +++ datafusion/core/src/test/object_store.rs | 3 + datafusion/core/src/test/variable.rs | 6 + datafusion/core/src/test_util/mod.rs | 24 +++ datafusion/core/src/test_util/parquet.rs | 6 + .../src/aggregates/group_values/bytes.rs | 7 + .../src/aggregates/group_values/mod.rs | 1 + .../src/aggregates/group_values/primitive.rs | 8 + .../src/aggregates/group_values/row.rs | 7 + .../physical-plan/src/aggregates/mod.rs | 75 ++++++++ .../src/aggregates/no_grouping.rs | 4 + .../src/aggregates/order/full.rs | 6 + .../physical-plan/src/aggregates/order/mod.rs | 6 + .../src/aggregates/order/partial.rs | 7 + .../physical-plan/src/aggregates/row_hash.rs | 15 ++ .../src/aggregates/topk/hash_table.rs | 25 +++ .../physical-plan/src/aggregates/topk/heap.rs | 32 ++++ .../src/aggregates/topk/priority_map.rs | 6 + .../src/aggregates/topk_stream.rs | 4 + datafusion/physical-plan/src/analyze.rs | 14 ++ .../physical-plan/src/coalesce_batches.rs | 25 +++ .../physical-plan/src/coalesce_partitions.rs | 13 ++ datafusion/physical-plan/src/common.rs | 15 ++ datafusion/physical-plan/src/display.rs | 32 ++++ datafusion/physical-plan/src/empty.rs | 13 ++ datafusion/physical-plan/src/explain.rs | 12 ++ datafusion/physical-plan/src/filter.rs | 24 +++ datafusion/physical-plan/src/insert.rs | 22 +++ .../physical-plan/src/joins/cross_join.rs | 26 +++ .../physical-plan/src/joins/hash_join.rs | 62 +++++++ .../src/joins/nested_loop_join.rs | 33 ++++ .../src/joins/sort_merge_join.rs | 66 +++++++ .../src/joins/stream_join_utils.rs | 29 +++ .../src/joins/symmetric_hash_join.rs | 62 +++++++ .../physical-plan/src/joins/test_utils.rs | 10 + datafusion/physical-plan/src/joins/utils.rs | 47 +++++ datafusion/physical-plan/src/lib.rs | 44 +++++ datafusion/physical-plan/src/limit.rs | 43 +++++ datafusion/physical-plan/src/memory.rs | 21 +++ .../physical-plan/src/metrics/baseline.rs | 15 ++ .../physical-plan/src/metrics/builder.rs | 18 ++ datafusion/physical-plan/src/metrics/mod.rs | 30 +++ datafusion/physical-plan/src/metrics/value.rs | 44 +++++ .../physical-plan/src/placeholder_row.rs | 13 ++ datafusion/physical-plan/src/projection.rs | 23 +++ .../physical-plan/src/recursive_query.rs | 21 +++ .../src/repartition/distributor_channels.rs | 20 ++ .../physical-plan/src/repartition/mod.rs | 43 +++++ datafusion/physical-plan/src/sorts/builder.rs | 7 + datafusion/physical-plan/src/sorts/cursor.rs | 25 +++ datafusion/physical-plan/src/sorts/merge.rs | 12 ++ .../physical-plan/src/sorts/partial_sort.rs | 27 +++ datafusion/physical-plan/src/sorts/sort.rs | 42 +++++ .../src/sorts/sort_preserving_merge.rs | 26 +++ datafusion/physical-plan/src/sorts/stream.rs | 11 ++ .../src/sorts/streaming_merge.rs | 1 + datafusion/physical-plan/src/stream.rs | 25 +++ datafusion/physical-plan/src/streaming.rs | 23 +++ datafusion/physical-plan/src/test.rs | 10 + datafusion/physical-plan/src/test/exec.rs | 69 +++++++ datafusion/physical-plan/src/topk/mod.rs | 31 ++++ datafusion/physical-plan/src/tree_node.rs | 9 + datafusion/physical-plan/src/union.rs | 37 ++++ datafusion/physical-plan/src/unnest.rs | 31 ++++ datafusion/physical-plan/src/values.rs | 12 ++ datafusion/physical-plan/src/visitor.rs | 2 + .../src/windows/bounded_window_agg_exec.rs | 63 +++++++ datafusion/physical-plan/src/windows/mod.rs | 25 +++ .../src/windows/window_agg_exec.rs | 24 +++ datafusion/physical-plan/src/work_table.rs | 18 ++ 147 files changed, 3134 insertions(+) diff --git a/datafusion/core/src/bin/print_config_docs.rs b/datafusion/core/src/bin/print_config_docs.rs index f0390f2f668f9..a29648c7e3a11 100644 --- a/datafusion/core/src/bin/print_config_docs.rs +++ b/datafusion/core/src/bin/print_config_docs.rs @@ -17,6 +17,7 @@ use datafusion::config::ConfigOptions; +#[tracing::instrument(level = "trace", skip())] fn main() { let docs = ConfigOptions::generate_config_markdown(); println!("{docs}"); diff --git a/datafusion/core/src/catalog/information_schema.rs b/datafusion/core/src/catalog/information_schema.rs index a9d4590a5e282..5e5f6bd07705b 100644 --- a/datafusion/core/src/catalog/information_schema.rs +++ b/datafusion/core/src/catalog/information_schema.rs @@ -64,6 +64,7 @@ pub struct InformationSchemaProvider { } impl InformationSchemaProvider { + #[tracing::instrument(level = "trace", skip(catalog_list))] /// Creates a new [`InformationSchemaProvider`] for the provided `catalog_list` pub fn new(catalog_list: Arc) -> Self { Self { @@ -78,6 +79,7 @@ struct InformationSchemaConfig { } impl InformationSchemaConfig { + #[tracing::instrument(level = "trace", skip(self, builder))] /// Construct the `information_schema.tables` virtual table async fn make_tables( &self, @@ -120,6 +122,7 @@ impl InformationSchemaConfig { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, builder))] async fn make_schemata(&self, builder: &mut InformationSchemataBuilder) { for catalog_name in self.catalog_list.catalog_names() { let catalog = self.catalog_list.catalog(&catalog_name).unwrap(); @@ -135,6 +138,7 @@ impl InformationSchemaConfig { } } + #[tracing::instrument(level = "trace", skip(self, builder))] async fn make_views( &self, builder: &mut InformationSchemaViewBuilder, @@ -164,6 +168,7 @@ impl InformationSchemaConfig { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, builder))] /// Construct the `information_schema.columns` virtual table async fn make_columns( &self, @@ -199,6 +204,7 @@ impl InformationSchemaConfig { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, config_options, builder))] /// Construct the `information_schema.df_settings` virtual table fn make_df_settings( &self, @@ -213,10 +219,12 @@ impl InformationSchemaConfig { #[async_trait] impl SchemaProvider for InformationSchemaProvider { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn table_names(&self) -> Vec { INFORMATION_SCHEMA_TABLES .iter() @@ -224,6 +232,7 @@ impl SchemaProvider for InformationSchemaProvider { .collect() } + #[tracing::instrument(level = "trace", skip(self, name))] async fn table( &self, name: &str, @@ -243,6 +252,7 @@ impl SchemaProvider for InformationSchemaProvider { ))) } + #[tracing::instrument(level = "trace", skip(self, name))] fn table_exist(&self, name: &str) -> bool { INFORMATION_SCHEMA_TABLES.contains(&name.to_ascii_lowercase().as_str()) } @@ -254,6 +264,7 @@ struct InformationSchemaTables { } impl InformationSchemaTables { + #[tracing::instrument(level = "trace", skip(config))] fn new(config: InformationSchemaConfig) -> Self { let schema = Arc::new(Schema::new(vec![ Field::new("table_catalog", DataType::Utf8, false), @@ -265,6 +276,7 @@ impl InformationSchemaTables { Self { schema, config } } + #[tracing::instrument(level = "trace", skip(self))] fn builder(&self) -> InformationSchemaTablesBuilder { InformationSchemaTablesBuilder { catalog_names: StringBuilder::new(), @@ -277,10 +289,12 @@ impl InformationSchemaTables { } impl PartitionStream for InformationSchemaTables { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> &SchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self, _ctx))] fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { let mut builder = self.builder(); let config = self.config.clone(); @@ -307,6 +321,7 @@ struct InformationSchemaTablesBuilder { } impl InformationSchemaTablesBuilder { + #[tracing::instrument(level = "trace", skip(self, catalog_name, schema_name, table_name, table_type))] fn add_table( &mut self, catalog_name: impl AsRef, @@ -325,6 +340,7 @@ impl InformationSchemaTablesBuilder { }); } + #[tracing::instrument(level = "trace", skip(self))] fn finish(&mut self) -> RecordBatch { RecordBatch::try_new( self.schema.clone(), @@ -345,6 +361,7 @@ struct InformationSchemaViews { } impl InformationSchemaViews { + #[tracing::instrument(level = "trace", skip(config))] fn new(config: InformationSchemaConfig) -> Self { let schema = Arc::new(Schema::new(vec![ Field::new("table_catalog", DataType::Utf8, false), @@ -356,6 +373,7 @@ impl InformationSchemaViews { Self { schema, config } } + #[tracing::instrument(level = "trace", skip(self))] fn builder(&self) -> InformationSchemaViewBuilder { InformationSchemaViewBuilder { catalog_names: StringBuilder::new(), @@ -368,10 +386,12 @@ impl InformationSchemaViews { } impl PartitionStream for InformationSchemaViews { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> &SchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self, _ctx))] fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { let mut builder = self.builder(); let config = self.config.clone(); @@ -398,6 +418,7 @@ struct InformationSchemaViewBuilder { } impl InformationSchemaViewBuilder { + #[tracing::instrument(level = "trace", skip(self, catalog_name, schema_name, table_name, definition))] fn add_view( &mut self, catalog_name: impl AsRef, @@ -412,6 +433,7 @@ impl InformationSchemaViewBuilder { self.definitions.append_option(definition.as_ref()); } + #[tracing::instrument(level = "trace", skip(self))] fn finish(&mut self) -> RecordBatch { RecordBatch::try_new( self.schema.clone(), @@ -432,6 +454,7 @@ struct InformationSchemaColumns { } impl InformationSchemaColumns { + #[tracing::instrument(level = "trace", skip(config))] fn new(config: InformationSchemaConfig) -> Self { let schema = Arc::new(Schema::new(vec![ Field::new("table_catalog", DataType::Utf8, false), @@ -454,6 +477,7 @@ impl InformationSchemaColumns { Self { schema, config } } + #[tracing::instrument(level = "trace", skip(self))] fn builder(&self) -> InformationSchemaColumnsBuilder { // StringBuilder requires providing an initial capacity, so // pick 10 here arbitrarily as this is not performance @@ -482,10 +506,12 @@ impl InformationSchemaColumns { } impl PartitionStream for InformationSchemaColumns { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> &SchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self, _ctx))] fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { let mut builder = self.builder(); let config = self.config.clone(); @@ -523,6 +549,7 @@ struct InformationSchemaColumnsBuilder { } impl InformationSchemaColumnsBuilder { + #[tracing::instrument(level = "trace", skip(self, catalog_name, schema_name, table_name, field_position, field))] fn add_column( &mut self, catalog_name: &str, @@ -617,6 +644,7 @@ impl InformationSchemaColumnsBuilder { self.interval_types.append_null(); } + #[tracing::instrument(level = "trace", skip(self))] fn finish(&mut self) -> RecordBatch { RecordBatch::try_new( self.schema.clone(), @@ -648,6 +676,7 @@ struct InformationSchemata { } impl InformationSchemata { + #[tracing::instrument(level = "trace", skip(config))] fn new(config: InformationSchemaConfig) -> Self { let schema = Arc::new(Schema::new(vec![ Field::new("catalog_name", DataType::Utf8, false), @@ -661,6 +690,7 @@ impl InformationSchemata { Self { schema, config } } + #[tracing::instrument(level = "trace", skip(self))] fn builder(&self) -> InformationSchemataBuilder { InformationSchemataBuilder { schema: self.schema.clone(), @@ -687,6 +717,7 @@ struct InformationSchemataBuilder { } impl InformationSchemataBuilder { + #[tracing::instrument(level = "trace", skip(self, catalog_name, schema_name, schema_owner))] fn add_schemata( &mut self, catalog_name: &str, @@ -707,6 +738,7 @@ impl InformationSchemataBuilder { self.sql_path.append_null(); } + #[tracing::instrument(level = "trace", skip(self))] fn finish(&mut self) -> RecordBatch { RecordBatch::try_new( self.schema.clone(), @@ -725,10 +757,12 @@ impl InformationSchemataBuilder { } impl PartitionStream for InformationSchemata { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> &SchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self, _ctx))] fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { let mut builder = self.builder(); let config = self.config.clone(); @@ -749,6 +783,7 @@ struct InformationSchemaDfSettings { } impl InformationSchemaDfSettings { + #[tracing::instrument(level = "trace", skip(config))] fn new(config: InformationSchemaConfig) -> Self { let schema = Arc::new(Schema::new(vec![ Field::new("name", DataType::Utf8, false), @@ -759,6 +794,7 @@ impl InformationSchemaDfSettings { Self { schema, config } } + #[tracing::instrument(level = "trace", skip(self))] fn builder(&self) -> InformationSchemaDfSettingsBuilder { InformationSchemaDfSettingsBuilder { names: StringBuilder::new(), @@ -770,10 +806,12 @@ impl InformationSchemaDfSettings { } impl PartitionStream for InformationSchemaDfSettings { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> &SchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self, ctx))] fn execute(&self, ctx: Arc) -> SendableRecordBatchStream { let config = self.config.clone(); let mut builder = self.builder(); @@ -797,12 +835,14 @@ struct InformationSchemaDfSettingsBuilder { } impl InformationSchemaDfSettingsBuilder { + #[tracing::instrument(level = "trace", skip(self, entry))] fn add_setting(&mut self, entry: ConfigEntry) { self.names.append_value(entry.key); self.values.append_option(entry.value); self.descriptions.append_value(entry.description); } + #[tracing::instrument(level = "trace", skip(self))] fn finish(&mut self) -> RecordBatch { RecordBatch::try_new( self.schema.clone(), diff --git a/datafusion/core/src/catalog/listing_schema.rs b/datafusion/core/src/catalog/listing_schema.rs index 29f3e4ad81819..c020eb259798f 100644 --- a/datafusion/core/src/catalog/listing_schema.rs +++ b/datafusion/core/src/catalog/listing_schema.rs @@ -60,6 +60,7 @@ pub struct ListingSchemaProvider { } impl ListingSchemaProvider { + #[tracing::instrument(level = "trace", skip(authority, path, factory, store, format))] /// Create a new `ListingSchemaProvider` /// /// Arguments: @@ -86,6 +87,7 @@ impl ListingSchemaProvider { } } + #[tracing::instrument(level = "trace", skip(self, state))] /// Reload table information from ObjectStore pub async fn refresh(&self, state: &SessionState) -> datafusion_common::Result<()> { let entries: Vec<_> = self.store.list(Some(&self.path)).try_collect().await?; @@ -155,10 +157,12 @@ impl ListingSchemaProvider { #[async_trait] impl SchemaProvider for ListingSchemaProvider { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn table_names(&self) -> Vec { self.tables .lock() @@ -168,6 +172,7 @@ impl SchemaProvider for ListingSchemaProvider { .collect() } + #[tracing::instrument(level = "trace", skip(self, name))] async fn table( &self, name: &str, @@ -180,6 +185,7 @@ impl SchemaProvider for ListingSchemaProvider { .cloned()) } + #[tracing::instrument(level = "trace", skip(self, name, table))] fn register_table( &self, name: String, @@ -192,6 +198,7 @@ impl SchemaProvider for ListingSchemaProvider { Ok(Some(table)) } + #[tracing::instrument(level = "trace", skip(self, name))] fn deregister_table( &self, name: &str, @@ -199,6 +206,7 @@ impl SchemaProvider for ListingSchemaProvider { Ok(self.tables.lock().expect("Can't lock tables").remove(name)) } + #[tracing::instrument(level = "trace", skip(self, name))] fn table_exist(&self, name: &str) -> bool { self.tables .lock() @@ -216,6 +224,7 @@ struct TablePath<'a> { } impl TablePath<'_> { + #[tracing::instrument(level = "trace", skip(self))] /// Format the path with a '/' appended if its a directory. /// Clients (eg. object_store listing) can and will use the presence of trailing slash as a heuristic fn to_string(&self) -> Option { diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs index 209d9b2af297b..c52299e07a0f8 100644 --- a/datafusion/core/src/catalog/mod.rs +++ b/datafusion/core/src/catalog/mod.rs @@ -64,6 +64,7 @@ pub struct MemoryCatalogProviderList { } impl MemoryCatalogProviderList { + #[tracing::instrument(level = "trace", skip())] /// Instantiates a new `MemoryCatalogProviderList` with an empty collection of catalogs pub fn new() -> Self { Self { @@ -73,16 +74,19 @@ impl MemoryCatalogProviderList { } impl Default for MemoryCatalogProviderList { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl CatalogProviderList for MemoryCatalogProviderList { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self, name, catalog))] fn register_catalog( &self, name: String, @@ -91,10 +95,12 @@ impl CatalogProviderList for MemoryCatalogProviderList { self.catalogs.insert(name, catalog) } + #[tracing::instrument(level = "trace", skip(self))] fn catalog_names(&self) -> Vec { self.catalogs.iter().map(|c| c.key().clone()).collect() } + #[tracing::instrument(level = "trace", skip(self, name))] fn catalog(&self, name: &str) -> Option> { self.catalogs.get(name).map(|c| c.value().clone()) } @@ -236,6 +242,7 @@ pub struct MemoryCatalogProvider { } impl MemoryCatalogProvider { + #[tracing::instrument(level = "trace", skip())] /// Instantiates a new MemoryCatalogProvider with an empty collection of schemas. pub fn new() -> Self { Self { @@ -245,24 +252,29 @@ impl MemoryCatalogProvider { } impl Default for MemoryCatalogProvider { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl CatalogProvider for MemoryCatalogProvider { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn schema_names(&self) -> Vec { self.schemas.iter().map(|s| s.key().clone()).collect() } + #[tracing::instrument(level = "trace", skip(self, name))] fn schema(&self, name: &str) -> Option> { self.schemas.get(name).map(|s| s.value().clone()) } + #[tracing::instrument(level = "trace", skip(self, name, schema))] fn register_schema( &self, name: &str, @@ -271,6 +283,7 @@ impl CatalogProvider for MemoryCatalogProvider { Ok(self.schemas.insert(name.into(), schema)) } + #[tracing::instrument(level = "trace", skip(self, name, cascade))] fn deregister_schema( &self, name: &str, @@ -308,14 +321,17 @@ mod tests { // mimic a new CatalogProvider and ensure it does not support registering schemas struct TestProvider {} impl CatalogProvider for TestProvider { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn schema_names(&self) -> Vec { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, _name))] fn schema(&self, _name: &str) -> Option> { unimplemented!() } diff --git a/datafusion/core/src/catalog/schema.rs b/datafusion/core/src/catalog/schema.rs index 8249c3a5330f9..5f9f1c0065abc 100644 --- a/datafusion/core/src/catalog/schema.rs +++ b/datafusion/core/src/catalog/schema.rs @@ -87,6 +87,7 @@ pub struct MemorySchemaProvider { } impl MemorySchemaProvider { + #[tracing::instrument(level = "trace", skip())] /// Instantiates a new MemorySchemaProvider with an empty collection of tables. pub fn new() -> Self { Self { @@ -96,6 +97,7 @@ impl MemorySchemaProvider { } impl Default for MemorySchemaProvider { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } @@ -103,10 +105,12 @@ impl Default for MemorySchemaProvider { #[async_trait] impl SchemaProvider for MemorySchemaProvider { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn table_names(&self) -> Vec { self.tables .iter() @@ -114,6 +118,7 @@ impl SchemaProvider for MemorySchemaProvider { .collect() } + #[tracing::instrument(level = "trace", skip(self, name))] async fn table( &self, name: &str, @@ -121,6 +126,7 @@ impl SchemaProvider for MemorySchemaProvider { Ok(self.tables.get(name).map(|table| table.value().clone())) } + #[tracing::instrument(level = "trace", skip(self, name, table))] fn register_table( &self, name: String, @@ -132,10 +138,12 @@ impl SchemaProvider for MemorySchemaProvider { Ok(self.tables.insert(name, table)) } + #[tracing::instrument(level = "trace", skip(self, name))] fn deregister_table(&self, name: &str) -> Result>> { Ok(self.tables.remove(name).map(|(_, table)| table)) } + #[tracing::instrument(level = "trace", skip(self, name))] fn table_exist(&self, name: &str) -> bool { self.tables.contains_key(name) } diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 04aaf5a890a80..062ef4dfb3977 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -71,6 +71,7 @@ pub struct DataFrameWriteOptions { } impl DataFrameWriteOptions { + #[tracing::instrument(level = "trace", skip())] /// Create a new DataFrameWriteOptions with default values pub fn new() -> Self { DataFrameWriteOptions { @@ -79,18 +80,21 @@ impl DataFrameWriteOptions { partition_by: vec![], } } + #[tracing::instrument(level = "trace", skip(self, overwrite))] /// Set the overwrite option to true or false pub fn with_overwrite(mut self, overwrite: bool) -> Self { self.overwrite = overwrite; self } + #[tracing::instrument(level = "trace", skip(self, single_file_output))] /// Set the single_file_output value to true or false pub fn with_single_file_output(mut self, single_file_output: bool) -> Self { self.single_file_output = single_file_output; self } + #[tracing::instrument(level = "trace", skip(self, partition_by))] /// Sets the partition_by columns for output partitioning pub fn with_partition_by(mut self, partition_by: Vec) -> Self { self.partition_by = partition_by; @@ -99,6 +103,7 @@ impl DataFrameWriteOptions { } impl Default for DataFrameWriteOptions { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } @@ -164,6 +169,7 @@ pub struct DataFrame { } impl DataFrame { + #[tracing::instrument(level = "trace", skip(session_state, plan))] /// Create a new `DataFrame ` based on an existing `LogicalPlan` /// /// This is a low-level method and is not typically used by end users. See @@ -176,11 +182,13 @@ impl DataFrame { } } + #[tracing::instrument(level = "trace", skip(self))] /// Consume the DataFrame and produce a physical plan pub async fn create_physical_plan(self) -> Result> { self.session_state.create_physical_plan(&self.plan).await } + #[tracing::instrument(level = "trace", skip(self, columns))] /// Filter the DataFrame by column. Returns a new DataFrame only containing the /// specified columns. /// @@ -211,6 +219,7 @@ impl DataFrame { self.select(expr) } + #[tracing::instrument(level = "trace", skip(self, expr_list))] /// Project arbitrary expressions (like SQL SELECT expressions) into a new /// `DataFrame`. /// @@ -243,12 +252,14 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, column))] /// Expand each list element of a column to multiple rows. #[deprecated(since = "37.0.0", note = "use unnest_columns instead")] pub fn unnest_column(self, column: &str) -> Result { self.unnest_columns(&[column]) } + #[tracing::instrument(level = "trace", skip(self, column, options))] /// Expand each list element of a column to multiple rows, with /// behavior controlled by [`UnnestOptions`]. /// @@ -263,6 +274,7 @@ impl DataFrame { self.unnest_columns_with_options(&[column], options) } + #[tracing::instrument(level = "trace", skip(self, columns))] /// Expand multiple list columns into a set of rows. /// /// See also: @@ -286,6 +298,7 @@ impl DataFrame { self.unnest_columns_with_options(columns, UnnestOptions::new()) } + #[tracing::instrument(level = "trace", skip(self, columns, options))] /// Expand multiple list columns into a set of rows, with /// behavior controlled by [`UnnestOptions`]. /// @@ -306,6 +319,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, predicate))] /// Return a DataFrame with only rows for which `predicate` evaluates to /// `true`. /// @@ -334,6 +348,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, group_expr, aggr_expr))] /// Return a new `DataFrame` that aggregates the rows of the current /// `DataFrame`, first optionally grouping by the given expressions. /// @@ -368,6 +383,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, window_exprs))] /// Return a new DataFrame that adds the result of evaluating one or more /// window functions ([`Expr::WindowFunction`]) to the existing columns pub fn window(self, window_exprs: Vec) -> Result { @@ -380,6 +396,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, skip, fetch))] /// Returns a new `DataFrame` with a limited number of rows. /// /// # Arguments @@ -408,6 +425,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, dataframe))] /// Calculate the union of two [`DataFrame`]s, preserving duplicate rows. /// /// The two [`DataFrame`]s must have exactly the same schema @@ -435,6 +453,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, dataframe))] /// Calculate the distinct union of two [`DataFrame`]s. /// /// The two [`DataFrame`]s must have exactly the same schema. Any duplicate @@ -463,6 +482,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Return a new `DataFrame` with all duplicated rows removed. /// /// # Example @@ -485,6 +505,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Return a new `DataFrame` that has statistics for a DataFrame. /// /// Only summarizes numeric datatypes at the moment and returns nulls for @@ -670,6 +691,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, expr))] /// Sort the DataFrame by the specified sorting expressions. /// /// Note that any expression can be turned into @@ -699,6 +721,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, right, join_type, left_cols, right_cols, filter))] /// Join this `DataFrame` with another `DataFrame` using explicitly specified /// columns and an optional filter expression. /// @@ -756,6 +779,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, right, join_type, on_exprs))] /// Join this `DataFrame` with another `DataFrame` using the specified /// expressions. /// @@ -809,6 +833,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, partitioning_scheme))] /// Repartition a DataFrame based on a logical partitioning scheme. /// /// # Example @@ -833,6 +858,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Return the total number of rows in this `DataFrame`. /// /// Note that this method will actually run a plan to calculate the count, @@ -869,6 +895,7 @@ impl DataFrame { Ok(len) } + #[tracing::instrument(level = "trace", skip(self))] /// Execute this `DataFrame` and buffer all resulting `RecordBatch`es into memory. /// /// Prior to calling `collect`, modifying a DataFrame simply updates a plan @@ -894,6 +921,7 @@ impl DataFrame { collect(plan, task_ctx).await } + #[tracing::instrument(level = "trace", skip(self))] /// Execute the `DataFrame` and print the results to the console. /// /// # Example @@ -913,6 +941,7 @@ impl DataFrame { Ok(pretty::print_batches(&results)?) } + #[tracing::instrument(level = "trace", skip(self, num))] /// Execute the `DataFrame` and print only the first `num` rows of the /// result to the console. /// @@ -933,11 +962,13 @@ impl DataFrame { Ok(pretty::print_batches(&results)?) } + #[tracing::instrument(level = "trace", skip(self))] /// Return a new [`TaskContext`] which would be used to execute this DataFrame pub fn task_ctx(&self) -> TaskContext { TaskContext::from(self.session_state.as_ref()) } + #[tracing::instrument(level = "trace", skip(self))] /// Executes this DataFrame and returns a stream over a single partition /// /// See [Self::collect] to buffer the `RecordBatch`es in memory. @@ -965,6 +996,7 @@ impl DataFrame { execute_stream(plan, task_ctx) } + #[tracing::instrument(level = "trace", skip(self))] /// Executes this DataFrame and collects all results into a vector of vector of RecordBatch /// maintaining the input partitioning. /// @@ -986,6 +1018,7 @@ impl DataFrame { collect_partitioned(plan, task_ctx).await } + #[tracing::instrument(level = "trace", skip(self))] /// Executes this DataFrame and returns one stream per partition. /// /// # Example @@ -1012,6 +1045,7 @@ impl DataFrame { execute_stream_partitioned(plan, task_ctx) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the `DFSchema` describing the output of this DataFrame. /// /// The output `DFSchema` contains information on the name, data type, and @@ -1033,17 +1067,20 @@ impl DataFrame { self.plan.schema() } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to the unoptimized [`LogicalPlan`] that comprises /// this DataFrame. See [`Self::into_unoptimized_plan`] for more details. pub fn logical_plan(&self) -> &LogicalPlan { &self.plan } + #[tracing::instrument(level = "trace", skip(self))] /// Returns both the [`LogicalPlan`] and [`SessionState`] that comprise this [`DataFrame`] pub fn into_parts(self) -> (SessionState, LogicalPlan) { (*self.session_state, self.plan) } + #[tracing::instrument(level = "trace", skip(self))] /// Return the [`LogicalPlan`] represented by this DataFrame without running /// any optimizers /// @@ -1055,6 +1092,7 @@ impl DataFrame { self.plan } + #[tracing::instrument(level = "trace", skip(self))] /// Return the optimized [`LogicalPlan`] represented by this DataFrame. /// /// Note: This method should not be used outside testing -- see @@ -1064,6 +1102,7 @@ impl DataFrame { self.session_state.optimize(&self.plan) } + #[tracing::instrument(level = "trace", skip(self))] /// Converts this [`DataFrame`] into a [`TableProvider`] that can be registered /// as a table view using [`SessionContext::register_table`]. /// @@ -1073,6 +1112,7 @@ impl DataFrame { Arc::new(DataFrameTableProvider { plan: self.plan }) } + #[tracing::instrument(level = "trace", skip(self, verbose, analyze))] /// Return a DataFrame with the explanation of its plan so far. /// /// if `analyze` is specified, runs the plan and reports metrics @@ -1101,6 +1141,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Return a `FunctionRegistry` used to plan udf's calls /// /// # Example @@ -1120,6 +1161,7 @@ impl DataFrame { self.session_state.as_ref() } + #[tracing::instrument(level = "trace", skip(self, dataframe))] /// Calculate the intersection of two [`DataFrame`]s. The two [`DataFrame`]s must have exactly the same schema /// /// ``` @@ -1144,6 +1186,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, dataframe))] /// Calculate the exception of two [`DataFrame`]s. The two [`DataFrame`]s must have exactly the same schema /// /// ``` @@ -1168,6 +1211,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, table_name, write_options))] /// Execute this `DataFrame` and write the results to `table_name`. /// /// Returns a single [RecordBatch] containing a single column and @@ -1199,6 +1243,7 @@ impl DataFrame { .await } + #[tracing::instrument(level = "trace", skip(self, path, options, writer_options))] /// Execute the `DataFrame` and write the results to CSV file(s). /// /// # Example @@ -1253,6 +1298,7 @@ impl DataFrame { .await } + #[tracing::instrument(level = "trace", skip(self, path, options, writer_options))] /// Execute the `DataFrame` and write the results to JSON file(s). /// /// # Example @@ -1308,6 +1354,7 @@ impl DataFrame { .await } + #[tracing::instrument(level = "trace", skip(self, name, expr))] /// Add an additional column to the DataFrame. /// /// # Example @@ -1357,6 +1404,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, old_name, new_name))] /// Rename one column by applying a new projection. This is a no-op if the column to be /// renamed does not exist. /// @@ -1425,6 +1473,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self, query_values))] /// Replace all parameters in logical plan with the specified /// values, in preparation for execution. /// @@ -1490,6 +1539,7 @@ impl DataFrame { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Cache DataFrame as a memory table. /// /// ``` @@ -1521,14 +1571,17 @@ struct DataFrameTableProvider { #[async_trait] impl TableProvider for DataFrameTableProvider { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn get_logical_plan(&self) -> Option<&LogicalPlan> { Some(&self.plan) } + #[tracing::instrument(level = "trace", skip(self, filters))] fn supports_filters_pushdown( &self, filters: &[&Expr], @@ -1537,15 +1590,18 @@ impl TableProvider for DataFrameTableProvider { Ok(vec![TableProviderFilterPushDown::Exact; filters.len()]) } + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { let schema: Schema = self.plan.schema().as_ref().into(); Arc::new(schema) } + #[tracing::instrument(level = "trace", skip(self))] fn table_type(&self) -> TableType { TableType::View } + #[tracing::instrument(level = "trace", skip(self, state, projection, filters, limit))] async fn scan( &self, state: &SessionState, @@ -1595,6 +1651,7 @@ mod tests { use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties}; // Get string representation of the plan + #[tracing::instrument(level = "trace", skip(df, expected))] async fn assert_physical_plan(df: &DataFrame, expected: Vec<&str>) { let physical_plan = df .clone() @@ -1609,6 +1666,7 @@ mod tests { ); } + #[tracing::instrument(level = "trace", skip())] pub fn table_with_constraints() -> Arc { let dual_schema = Arc::new(Schema::new(vec![ Field::new("id", DataType::Int32, false), @@ -1630,6 +1688,7 @@ mod tests { Arc::new(provider) } + #[tracing::instrument(level = "trace", skip(df))] async fn assert_logical_expr_schema_eq_physical_expr_schema( df: DataFrame, ) -> Result<()> { @@ -2419,11 +2478,13 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(plan1, plan2))] /// Compare the formatted string representation of two plans for equality fn assert_same_plan(plan1: &LogicalPlan, plan2: &LogicalPlan) { assert_eq!(format!("{plan1:?}"), format!("{plan2:?}")); } + #[tracing::instrument(level = "trace", skip(sql))] /// Create a logical plan from a SQL query async fn create_plan(sql: &str) -> Result { let mut ctx = SessionContext::new(); diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs index 0ec46df0ae5d3..2bb2c8ec6371a 100644 --- a/datafusion/core/src/dataframe/parquet.rs +++ b/datafusion/core/src/dataframe/parquet.rs @@ -22,6 +22,7 @@ use super::{ use datafusion_common::config::{FormatOptions, TableParquetOptions}; impl DataFrame { + #[tracing::instrument(level = "trace", skip(self, path, options, writer_options))] /// Execute the `DataFrame` and write the results to Parquet file(s). /// /// # Example diff --git a/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs index 3a5d50bba07fc..739594d6bad55 100644 --- a/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs +++ b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs @@ -61,6 +61,7 @@ pub struct AvroArrowArrayReader<'a, R: Read> { } impl<'a, R: Read> AvroArrowArrayReader<'a, R> { + #[tracing::instrument(level = "trace", skip(reader, schema, projection))] pub fn try_new( reader: R, schema: SchemaRef, @@ -77,6 +78,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { }) } + #[tracing::instrument(level = "trace", skip(schema))] pub fn schema_lookup(schema: AvroSchema) -> Result> { match schema { AvroSchema::Record(RecordSchema { @@ -93,6 +95,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { } } + #[tracing::instrument(level = "trace", skip(parent_field_name, schema, schema_lookup))] fn child_schema_lookup<'b>( parent_field_name: &str, schema: &AvroSchema, @@ -145,6 +148,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { Ok(schema_lookup) } + #[tracing::instrument(level = "trace", skip(self, batch_size))] /// Read the next batch of records pub fn next_batch(&mut self, batch_size: usize) -> Option> { let rows_result = self @@ -187,6 +191,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { Some(arrays.and_then(|arr| RecordBatch::try_new(projected_schema, arr))) } + #[tracing::instrument(level = "trace", skip(self, rows, col_name))] fn build_boolean_array(&self, rows: RecordSlice, col_name: &str) -> ArrayRef { let mut builder = BooleanBuilder::with_capacity(rows.len()); for row in rows { @@ -203,6 +208,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { Arc::new(builder.finish()) } + #[tracing::instrument(level = "trace", skip(self, rows, col_name))] fn build_primitive_array(&self, rows: RecordSlice, col_name: &str) -> ArrayRef where T: ArrowNumericType + Resolver, @@ -218,6 +224,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { ) } + #[tracing::instrument(level = "trace", skip(self, row_len))] #[inline(always)] fn build_string_dictionary_builder( &self, @@ -229,6 +236,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { StringDictionaryBuilder::with_capacity(row_len, row_len, row_len) } + #[tracing::instrument(level = "trace", skip(self, rows, col_name, key_type))] fn build_wrapped_list_array( &self, rows: RecordSlice, @@ -298,6 +306,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { } } + #[tracing::instrument(level = "trace", skip(self, data_type, col_name, rows))] #[inline(always)] fn list_array_string_array_builder( &self, @@ -395,6 +404,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { Ok(builder.finish() as ArrayRef) } + #[tracing::instrument(level = "trace", skip(self, rows, col_name))] #[inline(always)] fn build_dictionary_array( &self, @@ -421,6 +431,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { Ok(Arc::new(builder.finish()) as ArrayRef) } + #[tracing::instrument(level = "trace", skip(self, rows, col_name, key_type, value_type))] #[inline(always)] fn build_string_dictionary_array( &self, @@ -464,6 +475,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { } } + #[tracing::instrument(level = "trace", skip(self, parent_field_name, rows, list_field))] /// Build a nested GenericListArray from a list of unnested `Value`s fn build_nested_list_array( &self, @@ -639,6 +651,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { Ok(Arc::new(GenericListArray::::from(list_data))) } + #[tracing::instrument(level = "trace", skip(self, rows, parent_field_name, struct_fields, projection))] /// Builds the child values of a `StructArray`, falling short of constructing the StructArray. /// The function does not construct the StructArray as some callers would want the child arrays. /// @@ -866,6 +879,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { arrays } + #[tracing::instrument(level = "trace", skip(self, rows))] /// Read the primitive list's values into ArrayData fn read_primitive_list_values(&self, rows: &[&Value]) -> ArrayData where @@ -892,6 +906,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { array.to_data() } + #[tracing::instrument(level = "trace", skip(self, name, row))] fn field_lookup<'b>( &self, name: &str, @@ -904,6 +919,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { } } +#[tracing::instrument(level = "trace", skip(values))] /// Flattens a list of Avro values, by flattening lists, and treating all other values as /// single-value lists. /// This is used to read into nested lists (list of list, list of struct) and non-dictionary lists. @@ -923,6 +939,7 @@ fn flatten_values<'a>(values: &[&'a Value]) -> Vec<&'a Value> { .collect() } +#[tracing::instrument(level = "trace", skip(values))] /// Flattens a list into string values, dropping Value::Null in the process. /// This is useful for interpreting any Avro array as string, dropping nulls. /// See `value_as_string`. @@ -946,6 +963,7 @@ fn flatten_string_values(values: &[&Value]) -> Vec> { .collect::>>() } +#[tracing::instrument(level = "trace", skip(v))] /// Reads an Avro value as a string, regardless of its type. /// This is useful if the expected datatype is a string, in which case we preserve /// all the values regardless of they type. @@ -963,6 +981,7 @@ fn resolve_string(v: &Value) -> ArrowResult> { .map_err(|e| SchemaError(format!("expected resolvable string : {e:?}"))) } +#[tracing::instrument(level = "trace", skip(v))] fn resolve_u8(v: &Value) -> AvroResult { let int = match v { Value::Int(n) => Ok(Value::Int(*n)), @@ -978,6 +997,7 @@ fn resolve_u8(v: &Value) -> AvroResult { Err(AvroError::GetU8(int.into())) } +#[tracing::instrument(level = "trace", skip(v))] fn resolve_bytes(v: &Value) -> Option> { let v = if let Value::Union(_, b) = v { b } else { v }; match v { @@ -999,6 +1019,7 @@ fn resolve_bytes(v: &Value) -> Option> { }) } +#[tracing::instrument(level = "trace", skip(v, size))] fn resolve_fixed(v: &Value, size: usize) -> Option> { let v = if let Value::Union(_, b) = v { b } else { v }; match v { @@ -1013,6 +1034,7 @@ fn resolve_fixed(v: &Value, size: usize) -> Option> { } } +#[tracing::instrument(level = "trace", skip(value))] fn resolve_boolean(value: &Value) -> Option { let v = if let Value::Union(_, b) = value { b @@ -1029,10 +1051,12 @@ trait Resolver: ArrowPrimitiveType { fn resolve(value: &Value) -> Option; } +#[tracing::instrument(level = "trace", skip(value))] fn resolve_item(value: &Value) -> Option { T::resolve(value) } +#[tracing::instrument(level = "trace", skip(value))] fn maybe_resolve_union(value: &Value) -> &Value { if SchemaKind::from(value) == SchemaKind::Union { // Pull out the Union, and attempt to resolve against it. @@ -1050,6 +1074,7 @@ where N: ArrowNumericType, N::Native: num_traits::cast::NumCast, { + #[tracing::instrument(level = "trace", skip(value))] fn resolve(value: &Value) -> Option { let value = maybe_resolve_union(value); match value { @@ -1080,6 +1105,7 @@ mod test { use std::fs::File; use std::sync::Arc; + #[tracing::instrument(level = "trace", skip(name, batch_size))] fn build_reader(name: &str, batch_size: usize) -> Reader { let testdata = crate::test_util::arrow_test_data(); let filename = format!("{testdata}/avro/{name}"); diff --git a/datafusion/core/src/datasource/avro_to_arrow/mod.rs b/datafusion/core/src/datasource/avro_to_arrow/mod.rs index af0bb86a3e273..af26ce13de6ab 100644 --- a/datafusion/core/src/datasource/avro_to_arrow/mod.rs +++ b/datafusion/core/src/datasource/avro_to_arrow/mod.rs @@ -32,6 +32,7 @@ use crate::error::Result; pub use reader::{Reader, ReaderBuilder}; use std::io::Read; +#[tracing::instrument(level = "trace", skip(reader))] #[cfg(feature = "avro")] /// Read Avro schema given a reader pub fn read_avro_schema_from_reader(reader: &mut R) -> Result { @@ -40,6 +41,7 @@ pub fn read_avro_schema_from_reader(reader: &mut R) -> Result { schema::to_arrow_schema(schema) } +#[tracing::instrument(level = "trace", skip())] #[cfg(not(feature = "avro"))] /// Read Avro schema given a reader (requires the avro feature) pub fn read_avro_schema_from_reader(_: &mut R) -> Result { diff --git a/datafusion/core/src/datasource/avro_to_arrow/reader.rs b/datafusion/core/src/datasource/avro_to_arrow/reader.rs index 5dc53c5c86c87..098de2f206962 100644 --- a/datafusion/core/src/datasource/avro_to_arrow/reader.rs +++ b/datafusion/core/src/datasource/avro_to_arrow/reader.rs @@ -39,6 +39,7 @@ pub struct ReaderBuilder { } impl Default for ReaderBuilder { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self { schema: None, @@ -49,6 +50,7 @@ impl Default for ReaderBuilder { } impl ReaderBuilder { + #[tracing::instrument(level = "trace", skip())] /// Create a new builder for configuring Avro parsing options. /// /// To convert a builder into a reader, call `Reader::from_builder` @@ -79,12 +81,14 @@ impl ReaderBuilder { Self::default() } + #[tracing::instrument(level = "trace", skip(self, schema))] /// Set the Avro file's schema pub fn with_schema(mut self, schema: SchemaRef) -> Self { self.schema = Some(schema); self } + #[tracing::instrument(level = "trace", skip(self))] /// Set the Avro reader to infer the schema of the file pub fn read_schema(mut self) -> Self { // remove any schema that is set @@ -92,18 +96,21 @@ impl ReaderBuilder { self } + #[tracing::instrument(level = "trace", skip(self, batch_size))] /// Set the batch size (number of records to load at one time) pub fn with_batch_size(mut self, batch_size: usize) -> Self { self.batch_size = batch_size; self } + #[tracing::instrument(level = "trace", skip(self, projection))] /// Set the reader's column projection pub fn with_projection(mut self, projection: Vec) -> Self { self.projection = Some(projection); self } + #[tracing::instrument(level = "trace", skip(self, source))] /// Create a new `Reader` from the `ReaderBuilder` pub fn build<'a, R>(self, source: R) -> Result> where @@ -129,6 +136,7 @@ pub struct Reader<'a, R: Read> { } impl<'a, R: Read> Reader<'a, R> { + #[tracing::instrument(level = "trace", skip(reader, schema, batch_size, projection))] /// Create a new Avro Reader from any value that implements the `Read` trait. /// /// If reading a `File`, you can customise the Reader, such as to enable schema @@ -150,6 +158,7 @@ impl<'a, R: Read> Reader<'a, R> { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the schema of the reader, useful for getting the schema without reading /// record batches pub fn schema(&self) -> SchemaRef { @@ -160,6 +169,7 @@ impl<'a, R: Read> Reader<'a, R> { impl<'a, R: Read> Iterator for Reader<'a, R> { type Item = ArrowResult; + #[tracing::instrument(level = "trace", skip(self))] /// Returns the next batch of results (defined by `self.batch_size`), or `None` if there /// are no more results. fn next(&mut self) -> Option { @@ -175,6 +185,7 @@ mod tests { use arrow::datatypes::TimeUnit; use std::fs::File; + #[tracing::instrument(level = "trace", skip(name))] fn build_reader(name: &str) -> Reader { let testdata = crate::test_util::arrow_test_data(); let filename = format!("{testdata}/avro/{name}"); @@ -182,6 +193,7 @@ mod tests { builder.build(File::open(filename).unwrap()).unwrap() } + #[tracing::instrument(level = "trace", skip(batch, col))] fn get_col<'a, T: 'static>( batch: &'a RecordBatch, col: (usize, &Field), diff --git a/datafusion/core/src/datasource/avro_to_arrow/schema.rs b/datafusion/core/src/datasource/avro_to_arrow/schema.rs index 039a6aacc07eb..90052759d0e33 100644 --- a/datafusion/core/src/datasource/avro_to_arrow/schema.rs +++ b/datafusion/core/src/datasource/avro_to_arrow/schema.rs @@ -26,6 +26,7 @@ use arrow::datatypes::{Field, UnionFields}; use std::collections::HashMap; use std::sync::Arc; +#[tracing::instrument(level = "trace", skip(avro_schema))] /// Converts an avro schema to an arrow schema pub fn to_arrow_schema(avro_schema: &apache_avro::Schema) -> Result { let mut schema_fields = vec![]; @@ -47,6 +48,7 @@ pub fn to_arrow_schema(avro_schema: &apache_avro::Schema) -> Result { Ok(schema) } +#[tracing::instrument(level = "trace", skip(schema, name, nullable))] fn schema_to_field( schema: &apache_avro::Schema, name: Option<&str>, @@ -55,6 +57,7 @@ fn schema_to_field( schema_to_field_with_props(schema, name, nullable, Default::default()) } +#[tracing::instrument(level = "trace", skip(schema, name, nullable, props))] fn schema_to_field_with_props( schema: &AvroSchema, name: Option<&str>, @@ -163,6 +166,7 @@ fn schema_to_field_with_props( Ok(field) } +#[tracing::instrument(level = "trace", skip(dt))] fn default_field_name(dt: &DataType) -> &str { match dt { DataType::Null => "null", @@ -235,6 +239,7 @@ fn default_field_name(dt: &DataType) -> &str { } } +#[tracing::instrument(level = "trace", skip(schema))] fn external_props(schema: &AvroSchema) -> HashMap { let mut props = HashMap::new(); match &schema { @@ -281,6 +286,7 @@ fn external_props(schema: &AvroSchema) -> HashMap { props } +#[tracing::instrument(level = "trace", skip(alias, namespace, default_namespace))] /// Returns the fully qualified name for a field pub fn aliased( alias: &Alias, @@ -309,6 +315,7 @@ mod test { use apache_avro::Schema as AvroSchema; use arrow::datatypes::DataType::{Boolean, Int32, Int64}; + #[tracing::instrument(level = "trace", skip(name))] fn alias(name: &str) -> Alias { Alias::new(name).unwrap() } diff --git a/datafusion/core/src/datasource/cte_worktable.rs b/datafusion/core/src/datasource/cte_worktable.rs index afc4536f068e2..639be3c034d0b 100644 --- a/datafusion/core/src/datasource/cte_worktable.rs +++ b/datafusion/core/src/datasource/cte_worktable.rs @@ -46,6 +46,7 @@ pub struct CteWorkTable { } impl CteWorkTable { + #[tracing::instrument(level = "trace", skip(name, table_schema))] /// construct a new CteWorkTable with the given name and schema /// This schema must match the schema of the recursive term of the query /// Since the scan method will contain an physical plan that assumes this schema @@ -59,22 +60,27 @@ impl CteWorkTable { #[async_trait] impl TableProvider for CteWorkTable { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn get_logical_plan(&self) -> Option<&LogicalPlan> { None } + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.table_schema.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn table_type(&self) -> TableType { TableType::Temporary } + #[tracing::instrument(level = "trace", skip(self, _state, _projection, _filters, _limit))] async fn scan( &self, _state: &SessionState, @@ -89,6 +95,7 @@ impl TableProvider for CteWorkTable { ))) } + #[tracing::instrument(level = "trace", skip(self, filters))] fn supports_filters_pushdown( &self, filters: &[&Expr], diff --git a/datafusion/core/src/datasource/default_table_source.rs b/datafusion/core/src/datasource/default_table_source.rs index 977e681d66413..6ec4dd7140ab1 100644 --- a/datafusion/core/src/datasource/default_table_source.rs +++ b/datafusion/core/src/datasource/default_table_source.rs @@ -38,6 +38,7 @@ pub struct DefaultTableSource { } impl DefaultTableSource { + #[tracing::instrument(level = "trace", skip(table_provider))] /// Create a new DefaultTableSource to wrap a TableProvider pub fn new(table_provider: Arc) -> Self { Self { table_provider } @@ -45,22 +46,26 @@ impl DefaultTableSource { } impl TableSource for DefaultTableSource { + #[tracing::instrument(level = "trace", skip(self))] /// Returns the table source as [`Any`] so that it can be /// downcast to a specific implementation. fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] /// Get a reference to the schema for this table fn schema(&self) -> SchemaRef { self.table_provider.schema() } + #[tracing::instrument(level = "trace", skip(self))] /// Get a reference to applicable constraints, if any exists. fn constraints(&self) -> Option<&Constraints> { self.table_provider.constraints() } + #[tracing::instrument(level = "trace", skip(self, filter))] /// Tests whether the table provider can make use of any or all filter expressions /// to optimise data retrieval. fn supports_filters_pushdown( @@ -70,15 +75,18 @@ impl TableSource for DefaultTableSource { self.table_provider.supports_filters_pushdown(filter) } + #[tracing::instrument(level = "trace", skip(self))] fn get_logical_plan(&self) -> Option<&datafusion_expr::LogicalPlan> { self.table_provider.get_logical_plan() } + #[tracing::instrument(level = "trace", skip(self, column))] fn get_column_default(&self, column: &str) -> Option<&Expr> { self.table_provider.get_column_default(column) } } +#[tracing::instrument(level = "trace", skip(table_provider))] /// Wrap TableProvider in TableSource pub fn provider_as_source( table_provider: Arc, @@ -86,6 +94,7 @@ pub fn provider_as_source( Arc::new(DefaultTableSource::new(table_provider)) } +#[tracing::instrument(level = "trace", skip(source))] /// Attempt to downcast a TableSource to DefaultTableSource and access the /// TableProvider. This will only work with a TableSource created by DataFusion. pub fn source_as_provider( diff --git a/datafusion/core/src/datasource/empty.rs b/datafusion/core/src/datasource/empty.rs index 5100987520ee1..34809617bca9f 100644 --- a/datafusion/core/src/datasource/empty.rs +++ b/datafusion/core/src/datasource/empty.rs @@ -38,6 +38,7 @@ pub struct EmptyTable { } impl EmptyTable { + #[tracing::instrument(level = "trace", skip(schema))] /// Initialize a new `EmptyTable` from a schema. pub fn new(schema: SchemaRef) -> Self { Self { @@ -46,6 +47,7 @@ impl EmptyTable { } } + #[tracing::instrument(level = "trace", skip(self, partitions))] /// Creates a new EmptyTable with specified partition number. pub fn with_partitions(mut self, partitions: usize) -> Self { self.partitions = partitions; @@ -55,18 +57,22 @@ impl EmptyTable { #[async_trait] impl TableProvider for EmptyTable { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn table_type(&self) -> TableType { TableType::Base } + #[tracing::instrument(level = "trace", skip(self, _state, projection, _filters, _limit))] async fn scan( &self, _state: &SessionState, diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs index c09f8843932ac..de13ce3a21f58 100644 --- a/datafusion/core/src/datasource/file_format/arrow.rs +++ b/datafusion/core/src/datasource/file_format/arrow.rs @@ -67,10 +67,12 @@ pub struct ArrowFormat; #[async_trait] impl FileFormat for ArrowFormat { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self, _state, store, objects, _columns))] async fn infer_schema( &self, _state: &SessionState, @@ -96,6 +98,7 @@ impl FileFormat for ArrowFormat { Ok(Arc::new(merged_schema)) } + #[tracing::instrument(level = "trace", skip(self, _state, _store, table_schema, _object))] async fn infer_stats( &self, _state: &SessionState, @@ -106,6 +109,7 @@ impl FileFormat for ArrowFormat { Ok(Statistics::new_unknown(&table_schema)) } + #[tracing::instrument(level = "trace", skip(self, _state, conf, _filters))] async fn create_physical_plan( &self, _state: &SessionState, @@ -116,6 +120,7 @@ impl FileFormat for ArrowFormat { Ok(Arc::new(exec)) } + #[tracing::instrument(level = "trace", skip(self, input, _state, conf, order_requirements))] async fn create_writer_physical_plan( &self, input: Arc, @@ -145,10 +150,12 @@ struct ArrowFileSink { } impl ArrowFileSink { + #[tracing::instrument(level = "trace", skip(config))] fn new(config: FileSinkConfig) -> Self { Self { config } } + #[tracing::instrument(level = "trace", skip(self))] /// Converts table schema to writer schema, which may differ in the case /// of hive style partitioning where some columns are removed from the /// underlying files. @@ -176,12 +183,14 @@ impl ArrowFileSink { } impl Debug for ArrowFileSink { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ArrowFileSink").finish() } } impl DisplayAs for ArrowFileSink { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { @@ -195,14 +204,17 @@ impl DisplayAs for ArrowFileSink { #[async_trait] impl DataSink for ArrowFileSink { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { None } + #[tracing::instrument(level = "trace", skip(self, data, context))] async fn write_all( &self, data: SendableRecordBatchStream, @@ -291,6 +303,7 @@ impl DataSink for ArrowFileSink { const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1']; const CONTINUATION_MARKER: [u8; 4] = [0xff; 4]; +#[tracing::instrument(level = "trace", skip(stream))] /// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs. /// See async fn infer_schema_from_file_stream( @@ -354,6 +367,7 @@ async fn infer_schema_from_file_stream( Ok(Arc::new(schema)) } +#[tracing::instrument(level = "trace", skip(stream, n, extend_from))] async fn collect_at_least_n_bytes( stream: &mut BoxStream<'static, object_store::Result>, n: usize, diff --git a/datafusion/core/src/datasource/file_format/avro.rs b/datafusion/core/src/datasource/file_format/avro.rs index 0c56ead98da7c..ed794d52ad5af 100644 --- a/datafusion/core/src/datasource/file_format/avro.rs +++ b/datafusion/core/src/datasource/file_format/avro.rs @@ -40,10 +40,12 @@ pub struct AvroFormat; #[async_trait] impl FileFormat for AvroFormat { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self, _state, store, objects, _columns))] async fn infer_schema( &self, _state: &SessionState, @@ -70,6 +72,7 @@ impl FileFormat for AvroFormat { Ok(Arc::new(merged_schema)) } + #[tracing::instrument(level = "trace", skip(self, _state, _store, table_schema, _object))] async fn infer_stats( &self, _state: &SessionState, @@ -80,6 +83,7 @@ impl FileFormat for AvroFormat { Ok(Statistics::new_unknown(&table_schema)) } + #[tracing::instrument(level = "trace", skip(self, _state, conf, _filters))] async fn create_physical_plan( &self, _state: &SessionState, @@ -435,6 +439,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(state, file_name, projection, limit))] async fn get_exec( state: &SessionState, file_name: &str, diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs index 30008b4547169..5560d319bc320 100644 --- a/datafusion/core/src/datasource/file_format/csv.rs +++ b/datafusion/core/src/datasource/file_format/csv.rs @@ -60,6 +60,7 @@ pub struct CsvFormat { } impl CsvFormat { + #[tracing::instrument(level = "trace", skip(self, store, object))] /// Return a newline delimited stream from the specified file on /// Stream, decompressing if necessary /// Each returned `Bytes` has a whole number of newline delimited rows @@ -91,6 +92,7 @@ impl CsvFormat { stream.boxed() } + #[tracing::instrument(level = "trace", skip(self, stream))] async fn read_to_delimited_chunks_from_stream( &self, stream: BoxStream<'static, Result>, @@ -116,17 +118,20 @@ impl CsvFormat { steam.boxed() } + #[tracing::instrument(level = "trace", skip(self, options))] /// Set the csv options pub fn with_options(mut self, options: CsvOptions) -> Self { self.options = options; self } + #[tracing::instrument(level = "trace", skip(self))] /// Retrieve the csv options pub fn options(&self) -> &CsvOptions { &self.options } + #[tracing::instrument(level = "trace", skip(self, max_rec))] /// Set a limit in terms of records to scan to infer the schema /// - default to `DEFAULT_SCHEMA_INFER_MAX_RECORD` pub fn with_schema_infer_max_rec(mut self, max_rec: usize) -> Self { @@ -134,6 +139,7 @@ impl CsvFormat { self } + #[tracing::instrument(level = "trace", skip(self, has_header))] /// Set true to indicate that the first line is a header. /// - default to true pub fn with_has_header(mut self, has_header: bool) -> Self { @@ -141,12 +147,14 @@ impl CsvFormat { self } + #[tracing::instrument(level = "trace", skip(self))] /// Returns `Some(true)` if the first line is a header, `Some(false)` if /// it is not, and `None` if it is not specified. pub fn has_header(&self) -> Option { self.options.has_header } + #[tracing::instrument(level = "trace", skip(self, delimiter))] /// The character separating values within a row. /// - default to ',' pub fn with_delimiter(mut self, delimiter: u8) -> Self { @@ -154,6 +162,7 @@ impl CsvFormat { self } + #[tracing::instrument(level = "trace", skip(self, quote))] /// The quote character in a row. /// - default to '"' pub fn with_quote(mut self, quote: u8) -> Self { @@ -161,6 +170,7 @@ impl CsvFormat { self } + #[tracing::instrument(level = "trace", skip(self, escape))] /// The escape character in a row. /// - default is None pub fn with_escape(mut self, escape: Option) -> Self { @@ -168,6 +178,7 @@ impl CsvFormat { self } + #[tracing::instrument(level = "trace", skip(self, file_compression_type))] /// Set a `FileCompressionType` of CSV /// - defaults to `FileCompressionType::UNCOMPRESSED` pub fn with_file_compression_type( @@ -178,16 +189,19 @@ impl CsvFormat { self } + #[tracing::instrument(level = "trace", skip(self))] /// The delimiter character. pub fn delimiter(&self) -> u8 { self.options.delimiter } + #[tracing::instrument(level = "trace", skip(self))] /// The quote character. pub fn quote(&self) -> u8 { self.options.quote } + #[tracing::instrument(level = "trace", skip(self))] /// The escape character. pub fn escape(&self) -> Option { self.options.escape @@ -196,10 +210,12 @@ impl CsvFormat { #[async_trait] impl FileFormat for CsvFormat { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self, state, store, objects, _columns))] async fn infer_schema( &self, state: &SessionState, @@ -227,6 +243,7 @@ impl FileFormat for CsvFormat { Ok(Arc::new(merged_schema)) } + #[tracing::instrument(level = "trace", skip(self, _state, _store, table_schema, _object))] async fn infer_stats( &self, _state: &SessionState, @@ -237,6 +254,7 @@ impl FileFormat for CsvFormat { Ok(Statistics::new_unknown(&table_schema)) } + #[tracing::instrument(level = "trace", skip(self, state, conf, _filters))] async fn create_physical_plan( &self, state: &SessionState, @@ -258,6 +276,7 @@ impl FileFormat for CsvFormat { Ok(Arc::new(exec)) } + #[tracing::instrument(level = "trace", skip(self, input, _state, conf, order_requirements))] async fn create_writer_physical_plan( &self, input: Arc, @@ -284,6 +303,7 @@ impl FileFormat for CsvFormat { } impl CsvFormat { + #[tracing::instrument(level = "trace", skip(self, state, records_to_read, stream))] /// Return the inferred schema reading up to records_to_read from a /// stream of delimited chunks returning the inferred schema and the /// number of lines that were read @@ -358,6 +378,7 @@ impl CsvFormat { } } +#[tracing::instrument(level = "trace", skip(names, types))] fn build_schema_helper(names: Vec, types: &[HashSet]) -> Schema { let fields = names .into_iter() @@ -391,6 +412,7 @@ fn build_schema_helper(names: Vec, types: &[HashSet]) -> Schem } impl Default for CsvSerializer { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } @@ -405,6 +427,7 @@ pub struct CsvSerializer { } impl CsvSerializer { + #[tracing::instrument(level = "trace", skip())] /// Constructor for the CsvSerializer object pub fn new() -> Self { Self { @@ -413,12 +436,14 @@ impl CsvSerializer { } } + #[tracing::instrument(level = "trace", skip(self, builder))] /// Method for setting the CSV writer builder pub fn with_builder(mut self, builder: WriterBuilder) -> Self { self.builder = builder; self } + #[tracing::instrument(level = "trace", skip(self, header))] /// Method for setting the CSV writer header status pub fn with_header(mut self, header: bool) -> Self { self.header = header; @@ -427,6 +452,7 @@ impl CsvSerializer { } impl BatchSerializer for CsvSerializer { + #[tracing::instrument(level = "trace", skip(self, batch, initial))] fn serialize(&self, batch: RecordBatch, initial: bool) -> Result { let mut buffer = Vec::with_capacity(4096); let builder = self.builder.clone(); @@ -446,12 +472,14 @@ pub struct CsvSink { } impl Debug for CsvSink { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("CsvSink").finish() } } impl DisplayAs for CsvSink { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { @@ -464,6 +492,7 @@ impl DisplayAs for CsvSink { } impl CsvSink { + #[tracing::instrument(level = "trace", skip(config, writer_options))] /// Create from config. pub fn new(config: FileSinkConfig, writer_options: CsvWriterOptions) -> Self { Self { @@ -472,11 +501,13 @@ impl CsvSink { } } + #[tracing::instrument(level = "trace", skip(self))] /// Retrieve the inner [`FileSinkConfig`]. pub fn config(&self) -> &FileSinkConfig { &self.config } + #[tracing::instrument(level = "trace", skip(self, data, context))] async fn multipartput_all( &self, data: SendableRecordBatchStream, @@ -505,6 +536,7 @@ impl CsvSink { .await } + #[tracing::instrument(level = "trace", skip(self))] /// Retrieve the writer options pub fn writer_options(&self) -> &CsvWriterOptions { &self.writer_options @@ -513,14 +545,17 @@ impl CsvSink { #[async_trait] impl DataSink for CsvSink { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { None } + #[tracing::instrument(level = "trace", skip(self, data, context))] async fn write_all( &self, data: SendableRecordBatchStream, @@ -819,6 +854,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(state, file_name, projection, limit, has_header))] async fn get_exec( state: &SessionState, file_name: &str, @@ -879,6 +915,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(ctx, sql))] /// Explain the `sql` query under `ctx` to make sure the underlying csv scan is parallelized /// e.g. "CsvExec: file_groups={2 groups:" in plan means 2 CsvExec runs concurrently async fn count_query_csv_partitions( diff --git a/datafusion/core/src/datasource/file_format/file_compression_type.rs b/datafusion/core/src/datasource/file_format/file_compression_type.rs index c1fbe352d37bf..fb4e98a697f4f 100644 --- a/datafusion/core/src/datasource/file_format/file_compression_type.rs +++ b/datafusion/core/src/datasource/file_format/file_compression_type.rs @@ -59,6 +59,7 @@ pub struct FileCompressionType { } impl GetExt for FileCompressionType { + #[tracing::instrument(level = "trace", skip(self))] fn get_ext(&self) -> String { match self.variant { GZIP => ".gz".to_owned(), @@ -71,12 +72,14 @@ impl GetExt for FileCompressionType { } impl From for FileCompressionType { + #[tracing::instrument(level = "trace", skip(t))] fn from(t: CompressionTypeVariant) -> Self { Self { variant: t } } } impl From for CompressionTypeVariant { + #[tracing::instrument(level = "trace", skip(t))] fn from(t: FileCompressionType) -> Self { t.variant } @@ -85,6 +88,7 @@ impl From for CompressionTypeVariant { impl FromStr for FileCompressionType { type Err = DataFusionError; + #[tracing::instrument(level = "trace", skip(s))] fn from_str(s: &str) -> Result { let variant = CompressionTypeVariant::from_str(s).map_err(|_| { DataFusionError::NotImplemented(format!("Unknown FileCompressionType: {s}")) @@ -117,6 +121,7 @@ impl FileCompressionType { self.variant.is_compressed() } + #[tracing::instrument(level = "trace", skip(self, s))] /// Given a `Stream`, create a `Stream` which data are compressed with `FileCompressionType`. pub fn convert_to_compress_stream( &self, @@ -149,6 +154,7 @@ impl FileCompressionType { }) } + #[tracing::instrument(level = "trace", skip(self, w))] /// Wrap the given `BufWriter` so that it performs compressed writes /// according to this `FileCompressionType`. pub fn convert_async_writer( @@ -174,6 +180,7 @@ impl FileCompressionType { }) } + #[tracing::instrument(level = "trace", skip(self, s))] /// Given a `Stream`, create a `Stream` which data are decompressed with `FileCompressionType`. pub fn convert_stream( &self, @@ -211,6 +218,7 @@ impl FileCompressionType { }) } + #[tracing::instrument(level = "trace", skip(self, r))] /// Given a `Read`, create a `Read` which data are decompressed with `FileCompressionType`. pub fn convert_read( &self, @@ -246,6 +254,7 @@ pub trait FileTypeExt { } impl FileTypeExt for FileType { + #[tracing::instrument(level = "trace", skip(self, c))] fn get_ext_with_compression(&self, c: FileCompressionType) -> Result { let ext = self.get_ext(); diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs index 2f68ddde7e20d..f59685410b36b 100644 --- a/datafusion/core/src/datasource/file_format/json.rs +++ b/datafusion/core/src/datasource/file_format/json.rs @@ -60,17 +60,20 @@ pub struct JsonFormat { } impl JsonFormat { + #[tracing::instrument(level = "trace", skip(self, options))] /// Set JSON options pub fn with_options(mut self, options: JsonOptions) -> Self { self.options = options; self } + #[tracing::instrument(level = "trace", skip(self))] /// Retrieve JSON options pub fn options(&self) -> &JsonOptions { &self.options } + #[tracing::instrument(level = "trace", skip(self, max_rec))] /// Set a limit in terms of records to scan to infer the schema /// - defaults to `DEFAULT_SCHEMA_INFER_MAX_RECORD` pub fn with_schema_infer_max_rec(mut self, max_rec: usize) -> Self { @@ -78,6 +81,7 @@ impl JsonFormat { self } + #[tracing::instrument(level = "trace", skip(self, file_compression_type))] /// Set a `FileCompressionType` of JSON /// - defaults to `FileCompressionType::UNCOMPRESSED` pub fn with_file_compression_type( @@ -91,10 +95,12 @@ impl JsonFormat { #[async_trait] impl FileFormat for JsonFormat { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self, _state, store, objects, _columns))] async fn infer_schema( &self, _state: &SessionState, @@ -141,6 +147,7 @@ impl FileFormat for JsonFormat { Ok(Arc::new(schema)) } + #[tracing::instrument(level = "trace", skip(self, _state, _store, table_schema, _object))] async fn infer_stats( &self, _state: &SessionState, @@ -151,6 +158,7 @@ impl FileFormat for JsonFormat { Ok(Statistics::new_unknown(&table_schema)) } + #[tracing::instrument(level = "trace", skip(self, _state, conf, _filters))] async fn create_physical_plan( &self, _state: &SessionState, @@ -162,6 +170,7 @@ impl FileFormat for JsonFormat { Ok(Arc::new(exec)) } + #[tracing::instrument(level = "trace", skip(self, input, _state, conf, order_requirements))] async fn create_writer_physical_plan( &self, input: Arc, @@ -188,6 +197,7 @@ impl FileFormat for JsonFormat { } impl Default for JsonSerializer { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } @@ -197,6 +207,7 @@ impl Default for JsonSerializer { pub struct JsonSerializer {} impl JsonSerializer { + #[tracing::instrument(level = "trace", skip())] /// Constructor for the JsonSerializer object pub fn new() -> Self { Self {} @@ -204,6 +215,7 @@ impl JsonSerializer { } impl BatchSerializer for JsonSerializer { + #[tracing::instrument(level = "trace", skip(self, batch, _initial))] fn serialize(&self, batch: RecordBatch, _initial: bool) -> Result { let mut buffer = Vec::with_capacity(4096); let mut writer = json::LineDelimitedWriter::new(&mut buffer); @@ -221,12 +233,14 @@ pub struct JsonSink { } impl Debug for JsonSink { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("JsonSink").finish() } } impl DisplayAs for JsonSink { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { @@ -239,6 +253,7 @@ impl DisplayAs for JsonSink { } impl JsonSink { + #[tracing::instrument(level = "trace", skip(config, writer_options))] /// Create from config. pub fn new(config: FileSinkConfig, writer_options: JsonWriterOptions) -> Self { Self { @@ -247,11 +262,13 @@ impl JsonSink { } } + #[tracing::instrument(level = "trace", skip(self))] /// Retrieve the inner [`FileSinkConfig`]. pub fn config(&self) -> &FileSinkConfig { &self.config } + #[tracing::instrument(level = "trace", skip(self, data, context))] async fn multipartput_all( &self, data: SendableRecordBatchStream, @@ -269,6 +286,7 @@ impl JsonSink { ) .await } + #[tracing::instrument(level = "trace", skip(self))] /// Retrieve the writer options pub fn writer_options(&self) -> &JsonWriterOptions { &self.writer_options @@ -277,14 +295,17 @@ impl JsonSink { #[async_trait] impl DataSink for JsonSink { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { None } + #[tracing::instrument(level = "trace", skip(self, data, context))] async fn write_all( &self, data: SendableRecordBatchStream, @@ -403,6 +424,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(state, projection, limit))] async fn get_exec( state: &SessionState, projection: Option>, @@ -434,6 +456,7 @@ mod tests { assert_eq!(vec!["a: Int64", "b: Float64", "c: Boolean"], fields); } + #[tracing::instrument(level = "trace", skip(ctx, query))] async fn count_num_partitions(ctx: &SessionContext, query: &str) -> Result { let result = ctx .sql(&format!("EXPLAIN {query}")) diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs index 804159e6342ee..f1fde8c529a54 100644 --- a/datafusion/core/src/datasource/file_format/mod.rs +++ b/datafusion/core/src/datasource/file_format/mod.rs @@ -127,6 +127,7 @@ pub(crate) mod test_util { }; use tokio::io::AsyncWrite; + #[tracing::instrument(level = "trace", skip(state, format, store_root, file_name, projection, limit))] pub async fn scan_format( state: &SessionState, format: &dyn FileFormat, @@ -182,6 +183,7 @@ pub(crate) mod test_util { } impl std::fmt::Display for VariableStream { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "VariableStream") } @@ -189,6 +191,7 @@ pub(crate) mod test_util { #[async_trait] impl ObjectStore for VariableStream { + #[tracing::instrument(level = "trace", skip(self, _location, _bytes, _opts))] async fn put_opts( &self, _location: &Path, @@ -198,6 +201,7 @@ pub(crate) mod test_util { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, _location))] async fn put_multipart( &self, _location: &Path, @@ -206,6 +210,7 @@ pub(crate) mod test_util { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, _location, _multipart_id))] async fn abort_multipart( &self, _location: &Path, @@ -214,6 +219,7 @@ pub(crate) mod test_util { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, location))] async fn get(&self, location: &Path) -> object_store::Result { let bytes = self.bytes_to_repeat.clone(); let range = 0..bytes.len() * self.max_iterations; @@ -239,6 +245,7 @@ pub(crate) mod test_util { }) } + #[tracing::instrument(level = "trace", skip(self, _location, _opts))] async fn get_opts( &self, _location: &Path, @@ -247,6 +254,7 @@ pub(crate) mod test_util { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, _location, _ranges))] async fn get_ranges( &self, _location: &Path, @@ -255,14 +263,17 @@ pub(crate) mod test_util { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, _location))] async fn head(&self, _location: &Path) -> object_store::Result { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, _location))] async fn delete(&self, _location: &Path) -> object_store::Result<()> { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, _prefix))] fn list( &self, _prefix: Option<&Path>, @@ -270,6 +281,7 @@ pub(crate) mod test_util { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, _prefix))] async fn list_with_delimiter( &self, _prefix: Option<&Path>, @@ -277,10 +289,12 @@ pub(crate) mod test_util { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, _from, _to))] async fn copy(&self, _from: &Path, _to: &Path) -> object_store::Result<()> { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, _from, _to))] async fn copy_if_not_exists( &self, _from: &Path, @@ -291,6 +305,7 @@ pub(crate) mod test_util { } impl VariableStream { + #[tracing::instrument(level = "trace", skip(bytes_to_repeat, max_iterations))] pub fn new(bytes_to_repeat: Bytes, max_iterations: usize) -> Self { Self { bytes_to_repeat, @@ -299,6 +314,7 @@ pub(crate) mod test_util { } } + #[tracing::instrument(level = "trace", skip(self))] pub fn get_iterations_detected(&self) -> usize { *self.iterations_detected.lock().unwrap() } diff --git a/datafusion/core/src/datasource/file_format/options.rs b/datafusion/core/src/datasource/file_format/options.rs index c8399d780dfcb..c10e78b415978 100644 --- a/datafusion/core/src/datasource/file_format/options.rs +++ b/datafusion/core/src/datasource/file_format/options.rs @@ -78,12 +78,14 @@ pub struct CsvReadOptions<'a> { } impl<'a> Default for CsvReadOptions<'a> { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl<'a> CsvReadOptions<'a> { + #[tracing::instrument(level = "trace", skip())] /// Create a CSV read option with default presets pub fn new() -> Self { Self { @@ -100,36 +102,42 @@ impl<'a> CsvReadOptions<'a> { } } + #[tracing::instrument(level = "trace", skip(self, has_header))] /// Configure has_header setting pub fn has_header(mut self, has_header: bool) -> Self { self.has_header = has_header; self } + #[tracing::instrument(level = "trace", skip(self, delimiter))] /// Specify delimiter to use for CSV read pub fn delimiter(mut self, delimiter: u8) -> Self { self.delimiter = delimiter; self } + #[tracing::instrument(level = "trace", skip(self, quote))] /// Specify quote to use for CSV read pub fn quote(mut self, quote: u8) -> Self { self.quote = quote; self } + #[tracing::instrument(level = "trace", skip(self, escape))] /// Specify delimiter to use for CSV read pub fn escape(mut self, escape: u8) -> Self { self.escape = Some(escape); self } + #[tracing::instrument(level = "trace", skip(self, file_extension))] /// Specify the file extension for CSV file selection pub fn file_extension(mut self, file_extension: &'a str) -> Self { self.file_extension = file_extension; self } + #[tracing::instrument(level = "trace", skip(self, delimiter))] /// Configure delimiter setting with Option, None value will be ignored pub fn delimiter_option(mut self, delimiter: Option) -> Self { if let Some(d) = delimiter { @@ -138,12 +146,14 @@ impl<'a> CsvReadOptions<'a> { self } + #[tracing::instrument(level = "trace", skip(self, schema))] /// Specify schema to use for CSV read pub fn schema(mut self, schema: &'a Schema) -> Self { self.schema = Some(schema); self } + #[tracing::instrument(level = "trace", skip(self, table_partition_cols))] /// Specify table_partition_cols for partition pruning pub fn table_partition_cols( mut self, @@ -153,12 +163,14 @@ impl<'a> CsvReadOptions<'a> { self } + #[tracing::instrument(level = "trace", skip(self, max_records))] /// Configure number of max records to read for schema inference pub fn schema_infer_max_records(mut self, max_records: usize) -> Self { self.schema_infer_max_records = max_records; self } + #[tracing::instrument(level = "trace", skip(self, file_compression_type))] /// Configure file compression type pub fn file_compression_type( mut self, @@ -168,6 +180,7 @@ impl<'a> CsvReadOptions<'a> { self } + #[tracing::instrument(level = "trace", skip(self, file_sort_order))] /// Configure if file has known sort order pub fn file_sort_order(mut self, file_sort_order: Vec>) -> Self { self.file_sort_order = file_sort_order; @@ -207,6 +220,7 @@ pub struct ParquetReadOptions<'a> { } impl<'a> Default for ParquetReadOptions<'a> { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self { file_extension: DEFAULT_PARQUET_EXTENSION, @@ -221,12 +235,14 @@ impl<'a> Default for ParquetReadOptions<'a> { } impl<'a> ParquetReadOptions<'a> { + #[tracing::instrument(level = "trace", skip(self, parquet_pruning))] /// Specify parquet_pruning pub fn parquet_pruning(mut self, parquet_pruning: bool) -> Self { self.parquet_pruning = Some(parquet_pruning); self } + #[tracing::instrument(level = "trace", skip(self, skip_metadata))] /// Tell the parquet reader to skip any metadata that may be in /// the file Schema. This can help avoid schema conflicts due to /// metadata. Defaults to true. @@ -235,12 +251,14 @@ impl<'a> ParquetReadOptions<'a> { self } + #[tracing::instrument(level = "trace", skip(self, schema))] /// Specify schema to use for parquet read pub fn schema(mut self, schema: &'a Schema) -> Self { self.schema = Some(schema); self } + #[tracing::instrument(level = "trace", skip(self, table_partition_cols))] /// Specify table_partition_cols for partition pruning pub fn table_partition_cols( mut self, @@ -250,6 +268,7 @@ impl<'a> ParquetReadOptions<'a> { self } + #[tracing::instrument(level = "trace", skip(self, file_sort_order))] /// Configure if file has known sort order pub fn file_sort_order(mut self, file_sort_order: Vec>) -> Self { self.file_sort_order = file_sort_order; @@ -277,6 +296,7 @@ pub struct ArrowReadOptions<'a> { } impl<'a> Default for ArrowReadOptions<'a> { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self { schema: None, @@ -287,6 +307,7 @@ impl<'a> Default for ArrowReadOptions<'a> { } impl<'a> ArrowReadOptions<'a> { + #[tracing::instrument(level = "trace", skip(self, table_partition_cols))] /// Specify table_partition_cols for partition pruning pub fn table_partition_cols( mut self, @@ -296,6 +317,7 @@ impl<'a> ArrowReadOptions<'a> { self } + #[tracing::instrument(level = "trace", skip(self, schema))] /// Specify schema to use for AVRO read pub fn schema(mut self, schema: &'a Schema) -> Self { self.schema = Some(schema); @@ -322,6 +344,7 @@ pub struct AvroReadOptions<'a> { } impl<'a> Default for AvroReadOptions<'a> { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self { schema: None, @@ -332,6 +355,7 @@ impl<'a> Default for AvroReadOptions<'a> { } impl<'a> AvroReadOptions<'a> { + #[tracing::instrument(level = "trace", skip(self, table_partition_cols))] /// Specify table_partition_cols for partition pruning pub fn table_partition_cols( mut self, @@ -341,6 +365,7 @@ impl<'a> AvroReadOptions<'a> { self } + #[tracing::instrument(level = "trace", skip(self, schema))] /// Specify schema to use for AVRO read pub fn schema(mut self, schema: &'a Schema) -> Self { self.schema = Some(schema); @@ -374,6 +399,7 @@ pub struct NdJsonReadOptions<'a> { } impl<'a> Default for NdJsonReadOptions<'a> { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self { schema: None, @@ -388,6 +414,7 @@ impl<'a> Default for NdJsonReadOptions<'a> { } impl<'a> NdJsonReadOptions<'a> { + #[tracing::instrument(level = "trace", skip(self, table_partition_cols))] /// Specify table_partition_cols for partition pruning pub fn table_partition_cols( mut self, @@ -397,18 +424,21 @@ impl<'a> NdJsonReadOptions<'a> { self } + #[tracing::instrument(level = "trace", skip(self, file_extension))] /// Specify file_extension pub fn file_extension(mut self, file_extension: &'a str) -> Self { self.file_extension = file_extension; self } + #[tracing::instrument(level = "trace", skip(self, infinite))] /// Configure mark_infinite setting pub fn mark_infinite(mut self, infinite: bool) -> Self { self.infinite = infinite; self } + #[tracing::instrument(level = "trace", skip(self, file_compression_type))] /// Specify file_compression_type pub fn file_compression_type( mut self, @@ -418,12 +448,14 @@ impl<'a> NdJsonReadOptions<'a> { self } + #[tracing::instrument(level = "trace", skip(self, schema))] /// Specify schema to use for NdJson read pub fn schema(mut self, schema: &'a Schema) -> Self { self.schema = Some(schema); self } + #[tracing::instrument(level = "trace", skip(self, file_sort_order))] /// Configure if file has known sort order pub fn file_sort_order(mut self, file_sort_order: Vec>) -> Self { self.file_sort_order = file_sort_order; @@ -472,6 +504,7 @@ pub trait ReadOptions<'a> { #[async_trait] impl ReadOptions<'_> for CsvReadOptions<'_> { + #[tracing::instrument(level = "trace", skip(self, config, table_options))] fn to_listing_options( &self, config: &SessionConfig, @@ -493,6 +526,7 @@ impl ReadOptions<'_> for CsvReadOptions<'_> { .with_file_sort_order(self.file_sort_order.clone()) } + #[tracing::instrument(level = "trace", skip(self, config, state, table_path))] async fn get_resolved_schema( &self, config: &SessionConfig, @@ -507,6 +541,7 @@ impl ReadOptions<'_> for CsvReadOptions<'_> { #[cfg(feature = "parquet")] #[async_trait] impl ReadOptions<'_> for ParquetReadOptions<'_> { + #[tracing::instrument(level = "trace", skip(self, config, table_options))] fn to_listing_options( &self, config: &SessionConfig, @@ -529,6 +564,7 @@ impl ReadOptions<'_> for ParquetReadOptions<'_> { .with_column_hints(self.column_hints.clone()) } + #[tracing::instrument(level = "trace", skip(self, config, state, table_path))] async fn get_resolved_schema( &self, config: &SessionConfig, @@ -542,6 +578,7 @@ impl ReadOptions<'_> for ParquetReadOptions<'_> { #[async_trait] impl ReadOptions<'_> for NdJsonReadOptions<'_> { + #[tracing::instrument(level = "trace", skip(self, config, table_options))] fn to_listing_options( &self, config: &SessionConfig, @@ -559,6 +596,7 @@ impl ReadOptions<'_> for NdJsonReadOptions<'_> { .with_file_sort_order(self.file_sort_order.clone()) } + #[tracing::instrument(level = "trace", skip(self, config, state, table_path))] async fn get_resolved_schema( &self, config: &SessionConfig, @@ -572,6 +610,7 @@ impl ReadOptions<'_> for NdJsonReadOptions<'_> { #[async_trait] impl ReadOptions<'_> for AvroReadOptions<'_> { + #[tracing::instrument(level = "trace", skip(self, config, _table_options))] fn to_listing_options( &self, config: &SessionConfig, @@ -585,6 +624,7 @@ impl ReadOptions<'_> for AvroReadOptions<'_> { .with_table_partition_cols(self.table_partition_cols.clone()) } + #[tracing::instrument(level = "trace", skip(self, config, state, table_path))] async fn get_resolved_schema( &self, config: &SessionConfig, @@ -598,6 +638,7 @@ impl ReadOptions<'_> for AvroReadOptions<'_> { #[async_trait] impl ReadOptions<'_> for ArrowReadOptions<'_> { + #[tracing::instrument(level = "trace", skip(self, config, _table_options))] fn to_listing_options( &self, config: &SessionConfig, @@ -611,6 +652,7 @@ impl ReadOptions<'_> for ArrowReadOptions<'_> { .with_table_partition_cols(self.table_partition_cols.clone()) } + #[tracing::instrument(level = "trace", skip(self, config, state, table_path))] async fn get_resolved_schema( &self, config: &SessionConfig, diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index 7cc79c3fed053..bf59416230dc8 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -94,11 +94,13 @@ pub struct ParquetFormat { } impl ParquetFormat { + #[tracing::instrument(level = "trace", skip())] /// Construct a new Format with no local overrides pub fn new() -> Self { Self::default() } + #[tracing::instrument(level = "trace", skip(self, enable))] /// Activate statistics based row group level pruning /// - If `None`, defaults to value on `config_options` pub fn with_enable_pruning(mut self, enable: bool) -> Self { @@ -106,11 +108,13 @@ impl ParquetFormat { self } + #[tracing::instrument(level = "trace", skip(self))] /// Return `true` if pruning is enabled pub fn enable_pruning(&self) -> bool { self.options.global.pruning } + #[tracing::instrument(level = "trace", skip(self, size_hint))] /// Provide a hint to the size of the file metadata. If a hint is provided /// the reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. /// Without a hint, two read are required. One read to fetch the 8-byte parquet footer and then @@ -122,11 +126,13 @@ impl ParquetFormat { self } + #[tracing::instrument(level = "trace", skip(self))] /// Return the metadata size hint if set pub fn metadata_size_hint(&self) -> Option { self.options.global.metadata_size_hint } + #[tracing::instrument(level = "trace", skip(self, skip_metadata))] /// Tell the parquet reader to skip any metadata that may be in /// the file Schema. This can help avoid schema conflicts due to /// metadata. @@ -137,24 +143,28 @@ impl ParquetFormat { self } + #[tracing::instrument(level = "trace", skip(self))] /// Returns `true` if schema metadata will be cleared prior to /// schema merging. pub fn skip_metadata(&self) -> bool { self.options.global.skip_metadata } + #[tracing::instrument(level = "trace", skip(self, options))] /// Set Parquet options for the ParquetFormat pub fn with_options(mut self, options: TableParquetOptions) -> Self { self.options = options; self } + #[tracing::instrument(level = "trace", skip(self))] /// Parquet options pub fn options(&self) -> &TableParquetOptions { &self.options } } +#[tracing::instrument(level = "trace", skip(schemas))] /// Clears all metadata (Schema level and field level) on an iterator /// of Schemas fn clear_metadata( @@ -172,6 +182,7 @@ fn clear_metadata( }) } +#[tracing::instrument(level = "trace", skip(store, file, metadata_size_hint, columns))] async fn fetch_schema_with_location( store: &dyn ObjectStore, file: &ObjectMeta, @@ -185,10 +196,12 @@ async fn fetch_schema_with_location( #[async_trait] impl FileFormat for ParquetFormat { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self, state, store, objects, columns))] async fn infer_schema( &self, state: &SessionState, @@ -233,6 +246,7 @@ impl FileFormat for ParquetFormat { Ok(Arc::new(schema)) } + #[tracing::instrument(level = "trace", skip(self, _state, store, table_schema, object))] async fn infer_stats( &self, _state: &SessionState, @@ -250,6 +264,7 @@ impl FileFormat for ParquetFormat { Ok(stats) } + #[tracing::instrument(level = "trace", skip(self, _state, conf, filters))] async fn create_physical_plan( &self, _state: &SessionState, @@ -269,6 +284,7 @@ impl FileFormat for ParquetFormat { ))) } + #[tracing::instrument(level = "trace", skip(self, input, _state, conf, order_requirements))] async fn create_writer_physical_plan( &self, input: Arc, @@ -292,6 +308,7 @@ impl FileFormat for ParquetFormat { } } +#[tracing::instrument(level = "trace", skip(max_values, min_values, fields, i, stat))] fn summarize_min_max( max_values: &mut [Option], min_values: &mut [Option], @@ -372,6 +389,7 @@ fn summarize_min_max( } } +#[tracing::instrument(level = "trace", skip(store, meta, size_hint))] /// Fetches parquet metadata from ObjectStore for given object /// /// This component is a subject to **change** in near future and is exposed for low level integrations @@ -436,6 +454,7 @@ pub async fn fetch_parquet_metadata( } } +#[tracing::instrument(level = "trace", skip(schema_desc_ptr, name))] /// Returns the index from the schema descriptor for a certain column path pub fn find_leaf_id(schema_desc_ptr: &SchemaDescriptor, name: &str) -> Result { let pos = schema_desc_ptr @@ -446,6 +465,7 @@ pub fn find_leaf_id(schema_desc_ptr: &SchemaDescriptor, name: &str) -> Result) -> fmt::Result { f.debug_struct("ParquetSink").finish() } } impl DisplayAs for ParquetSink { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { @@ -600,6 +624,7 @@ impl DisplayAs for ParquetSink { } impl ParquetSink { + #[tracing::instrument(level = "trace", skip(config, parquet_options))] /// Create from config. pub fn new(config: FileSinkConfig, parquet_options: TableParquetOptions) -> Self { Self { @@ -609,17 +634,20 @@ impl ParquetSink { } } + #[tracing::instrument(level = "trace", skip(self))] /// Retrieve the inner [`FileSinkConfig`]. pub fn config(&self) -> &FileSinkConfig { &self.config } + #[tracing::instrument(level = "trace", skip(self))] /// Retrieve the file metadata for the written files, keyed to the path /// which may be partitioned (in the case of hive style partitioning). pub fn written(&self) -> HashMap { self.written.lock().clone() } + #[tracing::instrument(level = "trace", skip(self))] /// Converts table schema to writer schema, which may differ in the case /// of hive style partitioning where some columns are removed from the /// underlying files. @@ -645,6 +673,7 @@ impl ParquetSink { } } + #[tracing::instrument(level = "trace", skip(self, location, object_store, parquet_props))] /// Creates an AsyncArrowWriter which serializes a parquet file to an ObjectStore /// AsyncArrowWriters are used when individual parquet file serialization is not parallelized async fn create_async_arrow_writer( @@ -662,6 +691,7 @@ impl ParquetSink { Ok(writer) } + #[tracing::instrument(level = "trace", skip(self))] /// Parquet options pub fn parquet_options(&self) -> &TableParquetOptions { &self.parquet_options @@ -670,14 +700,17 @@ impl ParquetSink { #[async_trait] impl DataSink for ParquetSink { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { None } + #[tracing::instrument(level = "trace", skip(self, data, context))] async fn write_all( &self, data: SendableRecordBatchStream, @@ -793,6 +826,7 @@ impl DataSink for ParquetSink { } } +#[tracing::instrument(level = "trace", skip(rx, writer))] /// Consumes a stream of [ArrowLeafColumn] via a channel and serializes them using an [ArrowColumnWriter] /// Once the channel is exhausted, returns the ArrowColumnWriter. async fn column_serializer_task( @@ -808,6 +842,7 @@ async fn column_serializer_task( type ColumnWriterTask = SpawnedTask>; type ColSender = Sender; +#[tracing::instrument(level = "trace", skip(schema, parquet_props, max_buffer_size))] /// Spawns a parallel serialization task for each column /// Returns join handles for each columns serialization task along with a send channel /// to send arrow arrays to each serialization task. @@ -846,6 +881,7 @@ struct ParallelParquetWriterOptions { /// i.e. the Vec of encoded columns which can be appended to a row group type RBStreamSerializeResult = Result<(Vec, usize)>; +#[tracing::instrument(level = "trace", skip(col_array_channels, rb, schema))] /// Sends the ArrowArrays in passed [RecordBatch] through the channels to their respective /// parallel column serializers. async fn send_arrays_to_col_writers( @@ -870,6 +906,7 @@ async fn send_arrays_to_col_writers( Ok(()) } +#[tracing::instrument(level = "trace", skip(column_writer_tasks, rg_rows))] /// Spawns a tokio task which joins the parallel column writer tasks, /// and finalizes the row group fn spawn_rg_join_and_finalize_task( @@ -888,6 +925,7 @@ fn spawn_rg_join_and_finalize_task( }) } +#[tracing::instrument(level = "trace", skip(data, serialize_tx, schema, writer_props, parallel_options))] /// This task coordinates the serialization of a parquet file in parallel. /// As the query produces RecordBatches, these are written to a RowGroup /// via parallel [ArrowColumnWriter] tasks. Once the desired max rows per @@ -975,6 +1013,7 @@ fn spawn_parquet_parallel_serialization_task( }) } +#[tracing::instrument(level = "trace", skip(serialize_rx, schema, writer_props, object_store_writer))] /// Consume RowGroups serialized by other parallel tasks and concatenate them in /// to the final parquet file, while flushing finalized bytes to an [ObjectStore] async fn concatenate_parallel_row_groups( @@ -1018,6 +1057,7 @@ async fn concatenate_parallel_row_groups( Ok(file_metadata) } +#[tracing::instrument(level = "trace", skip(object_store_writer, data, output_schema, parquet_props, parallel_options))] /// Parallelizes the serialization of a single parquet file, by first serializing N /// independent RecordBatch streams in parallel to RowGroups in memory. Another /// task then stitches these independent RowGroups together and streams this large @@ -1065,6 +1105,7 @@ pub(crate) mod test_util { /// How many rows per page should be written const ROWS_PER_PAGE: usize = 2; + #[tracing::instrument(level = "trace", skip(batches, multi_page))] /// Writes `batches` to a temporary parquet file /// /// If multi_page is set to `true`, the parquet file(s) are written @@ -1118,6 +1159,7 @@ pub(crate) mod test_util { Ok((meta, files)) } + #[tracing::instrument(level = "trace", skip(writer, batch, chunk_size))] /// write batches chunk_size rows at a time fn write_in_chunks( writer: &mut ArrowWriter, @@ -1259,12 +1301,14 @@ mod tests { } impl Display for RequestCountingObjectStore { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "RequestCounting({})", self.inner) } } impl RequestCountingObjectStore { + #[tracing::instrument(level = "trace", skip(inner))] pub fn new(inner: Arc) -> Self { Self { inner, @@ -1272,10 +1316,12 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(self))] pub fn request_count(&self) -> usize { self.request_count.load(Ordering::SeqCst) } + #[tracing::instrument(level = "trace", skip(self))] pub fn upcast(self: &Arc) -> Arc { self.clone() } @@ -1283,6 +1329,7 @@ mod tests { #[async_trait] impl ObjectStore for RequestCountingObjectStore { + #[tracing::instrument(level = "trace", skip(self, _location, _bytes, _opts))] async fn put_opts( &self, _location: &Path, @@ -1292,6 +1339,7 @@ mod tests { Err(object_store::Error::NotImplemented) } + #[tracing::instrument(level = "trace", skip(self, _location))] async fn put_multipart( &self, _location: &Path, @@ -1300,6 +1348,7 @@ mod tests { Err(object_store::Error::NotImplemented) } + #[tracing::instrument(level = "trace", skip(self, _location, _multipart_id))] async fn abort_multipart( &self, _location: &Path, @@ -1308,6 +1357,7 @@ mod tests { Err(object_store::Error::NotImplemented) } + #[tracing::instrument(level = "trace", skip(self, location, options))] async fn get_opts( &self, location: &Path, @@ -1317,14 +1367,17 @@ mod tests { self.inner.get_opts(location, options).await } + #[tracing::instrument(level = "trace", skip(self, _location))] async fn head(&self, _location: &Path) -> object_store::Result { Err(object_store::Error::NotImplemented) } + #[tracing::instrument(level = "trace", skip(self, _location))] async fn delete(&self, _location: &Path) -> object_store::Result<()> { Err(object_store::Error::NotImplemented) } + #[tracing::instrument(level = "trace", skip(self, _prefix))] fn list( &self, _prefix: Option<&Path>, @@ -1334,6 +1387,7 @@ mod tests { })) } + #[tracing::instrument(level = "trace", skip(self, _prefix))] async fn list_with_delimiter( &self, _prefix: Option<&Path>, @@ -1341,10 +1395,12 @@ mod tests { Err(object_store::Error::NotImplemented) } + #[tracing::instrument(level = "trace", skip(self, _from, _to))] async fn copy(&self, _from: &Path, _to: &Path) -> object_store::Result<()> { Err(object_store::Error::NotImplemented) } + #[tracing::instrument(level = "trace", skip(self, _from, _to))] async fn copy_if_not_exists( &self, _from: &Path, @@ -1788,6 +1844,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(page_index, offset_index))] fn check_page_index_validation( page_index: Option<&ParquetColumnIndex>, offset_index: Option<&ParquetOffsetIndex>, @@ -1829,6 +1886,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(exec, expected))] fn assert_bytes_scanned(exec: Arc, expected: usize) { let actual = exec .metrics() @@ -1840,6 +1898,7 @@ mod tests { assert_eq!(actual, expected); } + #[tracing::instrument(level = "trace", skip(state, file_name, projection, limit))] async fn get_exec( state: &SessionState, file_name: &str, @@ -1851,6 +1910,7 @@ mod tests { scan_format(state, &format, &testdata, file_name, projection, limit).await } + #[tracing::instrument(level = "trace", skip(store_url))] fn build_ctx(store_url: &url::Url) -> Arc { let tmp_dir = tempfile::TempDir::new().unwrap(); let local = Arc::new( diff --git a/datafusion/core/src/datasource/file_format/write/demux.rs b/datafusion/core/src/datasource/file_format/write/demux.rs index d82c2471c5963..9e85fe49f688e 100644 --- a/datafusion/core/src/datasource/file_format/write/demux.rs +++ b/datafusion/core/src/datasource/file_format/write/demux.rs @@ -46,6 +46,7 @@ use tokio::sync::mpsc::{self, Receiver, Sender, UnboundedReceiver, UnboundedSend type RecordBatchReceiver = Receiver; type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>; +#[tracing::instrument(level = "trace", skip(input, context, partition_by, base_output_path, file_extension))] /// Splits a single [SendableRecordBatchStream] into a dynamically determined /// number of partitions at execution time. The partitions are determined by /// factors known only at execution time, such as total number of rows and @@ -111,6 +112,7 @@ pub(crate) fn start_demuxer_task( (task, rx) } +#[tracing::instrument(level = "trace", skip(tx, input, context, base_output_path, file_extension, single_file_output))] /// Dynamically partitions input stream to acheive desired maximum rows per file async fn row_count_demuxer( mut tx: UnboundedSender<(Path, Receiver)>, @@ -189,6 +191,7 @@ async fn row_count_demuxer( Ok(()) } +#[tracing::instrument(level = "trace", skip(base_output_path, write_id, part_idx, file_extension, single_file_output))] /// Helper for row count demuxer fn generate_file_path( base_output_path: &ListingTableUrl, @@ -206,6 +209,7 @@ fn generate_file_path( } } +#[tracing::instrument(level = "trace", skip(base_output_path, write_id, part_idx, file_extension, single_file_output, max_buffered_batches, tx))] /// Helper for row count demuxer fn create_new_file_stream( base_output_path: &ListingTableUrl, @@ -230,6 +234,7 @@ fn create_new_file_stream( Ok(tx_file) } +#[tracing::instrument(level = "trace", skip(tx, input, context, partition_by, base_output_path, file_extension))] /// Splits an input stream based on the distinct values of a set of columns /// Assumes standard hive style partition paths such as /// /col1=val1/col2=val2/outputfile.parquet @@ -312,6 +317,7 @@ async fn hive_style_partitions_demuxer( Ok(()) } +#[tracing::instrument(level = "trace", skip(rb, partition_by))] fn compute_partition_keys_by_row<'a>( rb: &'a RecordBatch, partition_by: &'a [(String, DataType)], @@ -370,6 +376,7 @@ fn compute_partition_keys_by_row<'a>( Ok(all_partition_values) } +#[tracing::instrument(level = "trace", skip(rb, all_partition_values))] fn compute_take_arrays( rb: &RecordBatch, all_partition_values: Vec>, @@ -386,6 +393,7 @@ fn compute_take_arrays( take_map } +#[tracing::instrument(level = "trace", skip(parted_batch, partition_by))] fn remove_partition_by_columns( parted_batch: &RecordBatch, partition_by: &[(String, DataType)], @@ -411,6 +419,7 @@ fn remove_partition_by_columns( Ok(final_batch_to_send) } +#[tracing::instrument(level = "trace", skip(part_key, partition_by, write_id, file_extension, base_output_path))] fn compute_hive_style_file_path( part_key: &[String], partition_by: &[(String, DataType)], diff --git a/datafusion/core/src/datasource/file_format/write/mod.rs b/datafusion/core/src/datasource/file_format/write/mod.rs index 42115fc7b93fb..e130e088449a9 100644 --- a/datafusion/core/src/datasource/file_format/write/mod.rs +++ b/datafusion/core/src/datasource/file_format/write/mod.rs @@ -47,6 +47,7 @@ pub(crate) struct SharedBuffer { } impl SharedBuffer { + #[tracing::instrument(level = "trace", skip(capacity))] pub fn new(capacity: usize) -> Self { Self { buffer: Arc::new(futures::lock::Mutex::new(Vec::with_capacity(capacity))), @@ -55,11 +56,13 @@ impl SharedBuffer { } impl Write for SharedBuffer { + #[tracing::instrument(level = "trace", skip(self, buf))] fn write(&mut self, buf: &[u8]) -> std::io::Result { let mut buffer = self.buffer.try_lock().unwrap(); Write::write(&mut *buffer, buf) } + #[tracing::instrument(level = "trace", skip(self))] fn flush(&mut self) -> std::io::Result<()> { let mut buffer = self.buffer.try_lock().unwrap(); Write::flush(&mut *buffer) @@ -74,6 +77,7 @@ pub trait BatchSerializer: Sync + Send { fn serialize(&self, batch: RecordBatch, initial: bool) -> Result; } +#[tracing::instrument(level = "trace", skip(file_compression_type, location, object_store))] /// Returns an [`AsyncWrite`] which writes to the given object store location /// with the specified compression. /// We drop the `AbortableWrite` struct and the writer will not try to cleanup on failure. diff --git a/datafusion/core/src/datasource/file_format/write/orchestration.rs b/datafusion/core/src/datasource/file_format/write/orchestration.rs index 3ae2122de827a..5f67f554b6892 100644 --- a/datafusion/core/src/datasource/file_format/write/orchestration.rs +++ b/datafusion/core/src/datasource/file_format/write/orchestration.rs @@ -42,6 +42,7 @@ use tokio::task::JoinSet; type WriterType = Box; type SerializerType = Arc; +#[tracing::instrument(level = "trace", skip(data_rx, serializer, writer))] /// Serializes a single data stream in parallel and writes to an ObjectStore /// concurrently. Data order is preserved. In the event of an error, /// the ObjectStore writer is returned to the caller in addition to an error, @@ -121,6 +122,7 @@ pub(crate) async fn serialize_rb_stream_to_object_store( } type FileWriteBundle = (Receiver, SerializerType, WriterType); +#[tracing::instrument(level = "trace", skip(rx, tx))] /// Contains the common logic for serializing RecordBatches and /// writing the resulting bytes to an ObjectStore. /// Serialization is assumed to be stateless, i.e. @@ -196,6 +198,7 @@ pub(crate) async fn stateless_serialize_and_write_files( Ok(()) } +#[tracing::instrument(level = "trace", skip(data, context, file_extension, get_serializer, config, compression))] /// Orchestrates multipart put of a dynamic number of output files from a single input stream /// for any statelessly serialized file type. That is, any file type for which each [RecordBatch] /// can be serialized independently of all other [RecordBatch]s. diff --git a/datafusion/core/src/datasource/function.rs b/datafusion/core/src/datasource/function.rs index 2fd352ee4eb31..6f6432274ecdf 100644 --- a/datafusion/core/src/datasource/function.rs +++ b/datafusion/core/src/datasource/function.rs @@ -39,16 +39,19 @@ pub struct TableFunction { } impl TableFunction { + #[tracing::instrument(level = "trace", skip(name, fun))] /// Create a new table function pub fn new(name: String, fun: Arc) -> Self { Self { name, fun } } + #[tracing::instrument(level = "trace", skip(self))] /// Get the name of the table function pub fn name(&self) -> &str { &self.name } + #[tracing::instrument(level = "trace", skip(self, args))] /// Get the function implementation and generate a table pub fn create_table_provider(&self, args: &[Expr]) -> Result> { self.fun.call(args) diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index 5b87090096652..e66c70788aacf 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -43,6 +43,7 @@ use datafusion_physical_expr::create_physical_expr; use object_store::path::Path; use object_store::{ObjectMeta, ObjectStore}; +#[tracing::instrument(level = "trace", skip(col_names, expr))] /// Check whether the given expression can be resolved using only the columns `col_names`. /// This means that if this function returns true: /// - the table provider can filter the table partition values with this expression @@ -120,6 +121,7 @@ pub fn expr_applicable_for_cols(col_names: &[String], expr: &Expr) -> bool { /// The maximum number of concurrent listing requests const CONCURRENCY_LIMIT: usize = 100; +#[tracing::instrument(level = "trace", skip(partitioned_files, n))] /// Partition the list of files into `n` groups pub fn split_files( mut partitioned_files: Vec, @@ -153,6 +155,7 @@ struct Partition { } impl Partition { + #[tracing::instrument(level = "trace", skip(self, store))] /// List the direct children of this partition updating `self.files` with /// any child files, and returning a list of child "directories" async fn list(mut self, store: &dyn ObjectStore) -> Result<(Self, Vec)> { @@ -164,6 +167,7 @@ impl Partition { } } +#[tracing::instrument(level = "trace", skip(store, table_path, max_depth))] /// Returns a recursive list of the partitions in `table_path` up to `max_depth` async fn list_partitions( store: &dyn ObjectStore, @@ -210,6 +214,7 @@ async fn list_partitions( Ok(out) } +#[tracing::instrument(level = "trace", skip(table_path, partitions, filters, partition_cols))] async fn prune_partitions( table_path: &ListingTableUrl, partitions: Vec, @@ -305,6 +310,7 @@ async fn prune_partitions( Ok(filtered) } +#[tracing::instrument(level = "trace", skip(ctx, store, table_path, filters, file_extension, partition_cols))] /// Discover the partitions on the given path and prune out files /// that belong to irrelevant partitions using `filters` expressions. /// `filters` might contain expressions that can be resolved only at the @@ -381,6 +387,7 @@ pub async fn pruned_partition_list<'a>( Ok(stream) } +#[tracing::instrument(level = "trace", skip(table_path, file_path, table_partition_cols))] /// Extract the partition values for the given `file_path` (in the given `table_path`) /// associated to the partitions defined by `table_partition_cols` fn parse_partitions_for_path<'a, I>( diff --git a/datafusion/core/src/datasource/listing/mod.rs b/datafusion/core/src/datasource/listing/mod.rs index d0361d7b32c1b..29748ea16f351 100644 --- a/datafusion/core/src/datasource/listing/mod.rs +++ b/datafusion/core/src/datasource/listing/mod.rs @@ -76,6 +76,7 @@ pub struct PartitionedFile { pub extensions: Option>, } impl PartitionedFile { + #[tracing::instrument(level = "trace", skip(path, size))] /// Create a simple file without metadata or partition pub fn new(path: impl Into, size: u64) -> Self { Self { @@ -93,6 +94,7 @@ impl PartitionedFile { } } + #[tracing::instrument(level = "trace", skip(path, size, start, end))] /// Create a file range without metadata or partition pub fn new_with_range(path: String, size: u64, start: i64, end: i64) -> Self { Self { @@ -111,17 +113,20 @@ impl PartitionedFile { .with_range(start, end) } + #[tracing::instrument(level = "trace", skip(path))] /// Return a file reference from the given path pub fn from_path(path: String) -> Result { let size = std::fs::metadata(path.clone())?.len(); Ok(Self::new(path, size)) } + #[tracing::instrument(level = "trace", skip(self))] /// Return the path of this partitioned file pub fn path(&self) -> &Path { &self.object_meta.location } + #[tracing::instrument(level = "trace", skip(self, start, end))] /// Update the file to only scan the specified range (in bytes) pub fn with_range(mut self, start: i64, end: i64) -> Self { self.range = Some(FileRange { start, end }); @@ -130,6 +135,7 @@ impl PartitionedFile { } impl From for PartitionedFile { + #[tracing::instrument(level = "trace", skip(object_meta))] fn from(object_meta: ObjectMeta) -> Self { PartitionedFile { object_meta, diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 5af2c1ed0c789..e4223767b833c 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -77,6 +77,7 @@ pub struct ListingTableConfig { } impl ListingTableConfig { + #[tracing::instrument(level = "trace", skip(table_path))] /// Creates new [`ListingTableConfig`]. /// /// The [`SchemaRef`] and [`ListingOptions`] are inferred based on @@ -90,6 +91,7 @@ impl ListingTableConfig { } } + #[tracing::instrument(level = "trace", skip(table_paths))] /// Creates new [`ListingTableConfig`] with multiple table paths. /// /// The [`SchemaRef`] and [`ListingOptions`] are inferred based on @@ -101,6 +103,7 @@ impl ListingTableConfig { options: None, } } + #[tracing::instrument(level = "trace", skip(self, schema))] /// Add `schema` to [`ListingTableConfig`] pub fn with_schema(self, schema: SchemaRef) -> Self { Self { @@ -110,6 +113,7 @@ impl ListingTableConfig { } } + #[tracing::instrument(level = "trace", skip(self, listing_options))] /// Add `listing_options` to [`ListingTableConfig`] pub fn with_listing_options(self, listing_options: ListingOptions) -> Self { Self { @@ -119,6 +123,7 @@ impl ListingTableConfig { } } + #[tracing::instrument(level = "trace", skip(path))] fn infer_file_type(path: &str) -> Result<(FileType, String)> { let err_msg = format!("Unable to infer file type from path: {path}"); @@ -143,6 +148,7 @@ impl ListingTableConfig { Ok((file_type, ext)) } + #[tracing::instrument(level = "trace", skip(self, state))] /// Infer `ListingOptions` based on `table_path` suffix. pub async fn infer_options(self, state: &SessionState) -> Result { let store = if let Some(url) = self.table_paths.first() { @@ -192,6 +198,7 @@ impl ListingTableConfig { }) } + #[tracing::instrument(level = "trace", skip(self, state))] /// Infer the [`SchemaRef`] based on `table_path` suffix. Requires `self.options` to be set prior to using. pub async fn infer_schema(self, state: &SessionState) -> Result { match self.options { @@ -212,6 +219,7 @@ impl ListingTableConfig { } } + #[tracing::instrument(level = "trace", skip(self, state))] /// Convenience wrapper for calling `infer_options` and `infer_schema` pub async fn infer(self, state: &SessionState) -> Result { self.infer_options(state).await?.infer_schema(state).await @@ -256,6 +264,7 @@ pub struct ListingOptions { } impl ListingOptions { + #[tracing::instrument(level = "trace", skip(format))] /// Creates an options instance with the given format /// Default values: /// - no file extension filter @@ -274,6 +283,7 @@ impl ListingOptions { } } + #[tracing::instrument(level = "trace", skip(self, file_extension))] /// Set file extension on [`ListingOptions`] and returns self. /// /// ``` @@ -293,6 +303,7 @@ impl ListingOptions { self } + #[tracing::instrument(level = "trace", skip(self, table_partition_cols))] /// Set `table partition columns` on [`ListingOptions`] and returns self. /// /// "partition columns," used to support [Hive Partitioning], are @@ -361,6 +372,7 @@ impl ListingOptions { self } + #[tracing::instrument(level = "trace", skip(self, collect_stat))] /// Set stat collection on [`ListingOptions`] and returns self. /// /// ``` @@ -379,6 +391,7 @@ impl ListingOptions { self } + #[tracing::instrument(level = "trace", skip(self, target_partitions))] /// Set number of target partitions on [`ListingOptions`] and returns self. /// /// ``` @@ -397,6 +410,7 @@ impl ListingOptions { self } + #[tracing::instrument(level = "trace", skip(self, file_sort_order))] /// Set file sort order on [`ListingOptions`] and returns self. /// /// ``` @@ -421,6 +435,7 @@ impl ListingOptions { self } + #[tracing::instrument(level = "trace", skip(self, column_hints))] /// /// Set column_hints on [`ListingOptions`] and returns self pub fn with_column_hints(mut self, column_hints: Option>) -> Self { @@ -428,6 +443,7 @@ impl ListingOptions { self } + #[tracing::instrument(level = "trace", skip(self, state, table_path, column_hints))] /// Infer the schema of the files at the given path on the provided object store. /// The inferred schema does not include the partitioning columns. /// @@ -451,6 +467,7 @@ impl ListingOptions { self.format.infer_schema(state, &store, &files, column_hints).await } + #[tracing::instrument(level = "trace", skip(self, state, table_path))] /// Infers the partition columns stored in `LOCATION` and compares /// them with the columns provided in `PARTITIONED BY` to help prevent /// accidental corrupts of partitioned tables. @@ -507,6 +524,7 @@ impl ListingOptions { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, state, table_path))] /// Infer the partitioning at the given path on the provided object store. /// For performance reasons, it doesn't read all the files on disk /// and therefore may fail to detect invalid partitioning. @@ -637,6 +655,7 @@ pub struct ListingTable { } impl ListingTable { + #[tracing::instrument(level = "trace", skip(config))] /// Create new [`ListingTable`] that lists the FS to get the files /// to scan. See [`ListingTable`] for and example. /// @@ -676,12 +695,14 @@ impl ListingTable { Ok(table) } + #[tracing::instrument(level = "trace", skip(self, constraints))] /// Assign constraints pub fn with_constraints(mut self, constraints: Constraints) -> Self { self.constraints = constraints; self } + #[tracing::instrument(level = "trace", skip(self, column_defaults))] /// Assign column defaults pub fn with_column_defaults( mut self, @@ -691,6 +712,7 @@ impl ListingTable { self } + #[tracing::instrument(level = "trace", skip(self, cache))] /// Set the [`FileStatisticsCache`] used to cache parquet file statistics. /// /// Setting a statistics cache on the `SessionContext` can avoid refetching statistics @@ -703,22 +725,26 @@ impl ListingTable { self } + #[tracing::instrument(level = "trace", skip(self, defintion))] /// Specify the SQL definition for this table, if any pub fn with_definition(mut self, defintion: Option) -> Self { self.definition = defintion; self } + #[tracing::instrument(level = "trace", skip(self))] /// Get paths ref pub fn table_paths(&self) -> &Vec { &self.table_paths } + #[tracing::instrument(level = "trace", skip(self))] /// Get options ref pub fn options(&self) -> &ListingOptions { &self.options } + #[tracing::instrument(level = "trace", skip(self))] /// If file_sort_order is specified, creates the appropriate physical expressions fn try_create_output_ordering(&self) -> Result> { create_ordering(&self.table_schema, &self.options.file_sort_order) @@ -727,22 +753,27 @@ impl ListingTable { #[async_trait] impl TableProvider for ListingTable { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { Arc::clone(&self.table_schema) } + #[tracing::instrument(level = "trace", skip(self))] fn constraints(&self) -> Option<&Constraints> { Some(&self.constraints) } + #[tracing::instrument(level = "trace", skip(self))] fn table_type(&self) -> TableType { TableType::Base } + #[tracing::instrument(level = "trace", skip(self, state, projection, filters, limit))] async fn scan( &self, state: &SessionState, @@ -832,6 +863,7 @@ impl TableProvider for ListingTable { .await } + #[tracing::instrument(level = "trace", skip(self, filters))] fn supports_filters_pushdown( &self, filters: &[&Expr], @@ -860,10 +892,12 @@ impl TableProvider for ListingTable { Ok(support) } + #[tracing::instrument(level = "trace", skip(self))] fn get_table_definition(&self) -> Option<&str> { self.definition.as_deref() } + #[tracing::instrument(level = "trace", skip(self, state, input, overwrite))] async fn insert_into( &self, state: &SessionState, @@ -941,12 +975,14 @@ impl TableProvider for ListingTable { .await } + #[tracing::instrument(level = "trace", skip(self, column))] fn get_column_default(&self, column: &str) -> Option<&Expr> { self.column_defaults.get(column) } } impl ListingTable { + #[tracing::instrument(level = "trace", skip(self, ctx, filters, limit))] /// Get the list of files for a scan as well as the file level statistics. /// The list is grouped to let the execution plan know how the files should /// be distributed to different threads / executors. @@ -1005,6 +1041,7 @@ impl ListingTable { )) } + #[tracing::instrument(level = "trace", skip(self, ctx, store, part_file))] /// Collects statistics for a given partitioned file. /// /// This method first checks if the statistics for the given file are already cached. @@ -1409,6 +1446,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(ctx, name))] async fn load_table( ctx: &SessionContext, name: &str, @@ -1424,6 +1462,7 @@ mod tests { Ok(Arc::new(table)) } + #[tracing::instrument(level = "trace", skip(files, table_prefix, target_partitions, output_partitioning))] /// Check that the files listed by the table match the specified `output_partitioning` /// when the object store contains `files`. async fn assert_list_files_for_scan_grouping( @@ -1457,6 +1496,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(files, table_prefix, target_partitions, output_partitioning))] /// Check that the files listed by the table match the specified `output_partitioning` /// when the object store contains `files`. async fn assert_list_files_for_multi_paths( @@ -1772,6 +1812,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(file_type, file_compression_type, session_config_map, expected_n_files_per_insert))] async fn helper_test_append_new_files_to_table( file_type: FileType, file_compression_type: FileCompressionType, @@ -1966,6 +2007,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(file_type, _file_compression_type, external_table_options, session_config_map))] /// tests insert into with end to end sql /// create external table + insert into statements async fn helper_test_insert_into_sql( diff --git a/datafusion/core/src/datasource/listing/url.rs b/datafusion/core/src/datasource/listing/url.rs index 73fffd8abaeda..b1e8c2550ac3f 100644 --- a/datafusion/core/src/datasource/listing/url.rs +++ b/datafusion/core/src/datasource/listing/url.rs @@ -43,6 +43,7 @@ pub struct ListingTableUrl { } impl ListingTableUrl { + #[tracing::instrument(level = "trace", skip(s))] /// Parse a provided string as a `ListingTableUrl` /// /// A URL can either refer to a single object, or a collection of objects with a @@ -115,6 +116,7 @@ impl ListingTableUrl { } } + #[tracing::instrument(level = "trace", skip(s))] /// Creates a new [`ListingTableUrl`] interpreting `s` as a filesystem path #[cfg(not(target_arch = "wasm32"))] fn parse_path(s: &str) -> Result { @@ -136,17 +138,20 @@ impl ListingTableUrl { Self::try_new(url, glob) } + #[tracing::instrument(level = "trace", skip(url, glob))] /// Creates a new [`ListingTableUrl`] from a url and optional glob expression fn try_new(url: Url, glob: Option) -> Result { let prefix = Path::from_url_path(url.path())?; Ok(Self { url, prefix, glob }) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the URL scheme pub fn scheme(&self) -> &str { self.url.scheme() } + #[tracing::instrument(level = "trace", skip(self))] /// Return the URL path not excluding any glob expression /// /// If [`Self::is_collection`], this is the listing prefix @@ -155,6 +160,7 @@ impl ListingTableUrl { &self.prefix } + #[tracing::instrument(level = "trace", skip(self, path, ignore_subdirectory))] /// Returns `true` if `path` matches this [`ListingTableUrl`] pub fn contains(&self, path: &Path, ignore_subdirectory: bool) -> bool { let Some(all_segments) = self.strip_prefix(path) else { @@ -185,11 +191,13 @@ impl ListingTableUrl { } } + #[tracing::instrument(level = "trace", skip(self))] /// Returns `true` if `path` refers to a collection of objects pub fn is_collection(&self) -> bool { self.url.path().ends_with(DELIMITER) } + #[tracing::instrument(level = "trace", skip(self, path))] /// Strips the prefix of this [`ListingTableUrl`] from the provided path, returning /// an iterator of the remaining path segments pub(crate) fn strip_prefix<'a, 'b: 'a>( @@ -203,6 +211,7 @@ impl ListingTableUrl { Some(stripped.split_terminator(DELIMITER)) } + #[tracing::instrument(level = "trace", skip(self, ctx, store, file_extension))] /// List all files identified by this [`ListingTableUrl`] for the provided `file_extension` pub async fn list_all_files<'a>( &'a self, @@ -242,11 +251,13 @@ impl ListingTableUrl { .boxed()) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns this [`ListingTableUrl`] as a string pub fn as_str(&self) -> &str { self.as_ref() } + #[tracing::instrument(level = "trace", skip(self))] /// Return the [`ObjectStoreUrl`] for this [`ListingTableUrl`] pub fn object_store(&self) -> ObjectStoreUrl { let url = &self.url[url::Position::BeforeScheme..url::Position::BeforePath]; @@ -254,6 +265,7 @@ impl ListingTableUrl { } } +#[tracing::instrument(level = "trace", skip(s))] /// Creates a file URL from a potentially relative filesystem path #[cfg(not(target_arch = "wasm32"))] fn url_from_filesystem_path(s: &str) -> Option { @@ -284,18 +296,21 @@ fn url_from_filesystem_path(s: &str) -> Option { } impl AsRef for ListingTableUrl { + #[tracing::instrument(level = "trace", skip(self))] fn as_ref(&self) -> &str { self.url.as_ref() } } impl AsRef for ListingTableUrl { + #[tracing::instrument(level = "trace", skip(self))] fn as_ref(&self) -> &Url { &self.url } } impl std::fmt::Display for ListingTableUrl { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.as_str().fmt(f) } @@ -303,6 +318,7 @@ impl std::fmt::Display for ListingTableUrl { const GLOB_START_CHARS: [char; 3] = ['?', '*', '[']; +#[tracing::instrument(level = "trace", skip(path))] /// Splits `path` at the first path segment containing a glob expression, returning /// `None` if no glob expression found. /// @@ -431,6 +447,7 @@ mod tests { #[test] fn test_split_glob() { + #[tracing::instrument(level = "trace", skip(input, expected))] fn test(input: &str, expected: Option<(&str, &str)>) { assert_eq!( split_glob_expression(input), @@ -466,6 +483,7 @@ mod tests { #[test] fn test_is_collection() { + #[tracing::instrument(level = "trace", skip(input, expected, message))] fn test(input: &str, expected: bool, message: &str) { let url = ListingTableUrl::parse(input).unwrap(); assert_eq!(url.is_collection(), expected, "{message}"); diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index f4f3b0f222257..e9c1c7447114b 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -44,6 +44,7 @@ use async_trait::async_trait; pub struct ListingTableFactory {} impl ListingTableFactory { + #[tracing::instrument(level = "trace", skip())] /// Creates a new `ListingTableFactory` pub fn new() -> Self { Self::default() @@ -52,6 +53,7 @@ impl ListingTableFactory { #[async_trait] impl TableProviderFactory for ListingTableFactory { + #[tracing::instrument(level = "trace", skip(self, state, cmd))] async fn create( &self, state: &SessionState, @@ -151,6 +153,7 @@ impl TableProviderFactory for ListingTableFactory { } // Get file extension from path +#[tracing::instrument(level = "trace", skip(path))] fn get_extension(path: &str) -> String { let res = Path::new(path).extension().and_then(|ext| ext.to_str()); match res { diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory.rs index aab42285a0b2f..e87a328b3279c 100644 --- a/datafusion/core/src/datasource/memory.rs +++ b/datafusion/core/src/datasource/memory.rs @@ -67,6 +67,7 @@ pub struct MemTable { } impl MemTable { + #[tracing::instrument(level = "trace", skip(schema, partitions))] /// Create a new in-memory table from the provided schema and record batches pub fn try_new(schema: SchemaRef, partitions: Vec>) -> Result { for batches in partitions.iter().flatten() { @@ -92,12 +93,14 @@ impl MemTable { }) } + #[tracing::instrument(level = "trace", skip(self, constraints))] /// Assign constraints pub fn with_constraints(mut self, constraints: Constraints) -> Self { self.constraints = constraints; self } + #[tracing::instrument(level = "trace", skip(self, column_defaults))] /// Assign column defaults pub fn with_column_defaults( mut self, @@ -107,6 +110,7 @@ impl MemTable { self } + #[tracing::instrument(level = "trace", skip(self, sort_order))] /// Specify an optional pre-known sort order(s). Must be `SortExpr`s. /// /// If the data is not sorted by this order, DataFusion may produce @@ -122,6 +126,7 @@ impl MemTable { self } + #[tracing::instrument(level = "trace", skip(t, output_partitions, state))] /// Create a mem table by reading from another data source pub async fn load( t: Arc, @@ -188,22 +193,27 @@ impl MemTable { #[async_trait] impl TableProvider for MemTable { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn constraints(&self) -> Option<&Constraints> { Some(&self.constraints) } + #[tracing::instrument(level = "trace", skip(self))] fn table_type(&self) -> TableType { TableType::Base } + #[tracing::instrument(level = "trace", skip(self, state, projection, _filters, _limit))] async fn scan( &self, state: &SessionState, @@ -244,6 +254,7 @@ impl TableProvider for MemTable { Ok(Arc::new(exec)) } + #[tracing::instrument(level = "trace", skip(self, _state, input, overwrite))] /// Returns an ExecutionPlan that inserts the execution results of a given [`ExecutionPlan`] into this [`MemTable`]. /// /// The [`ExecutionPlan`] must have the same schema as this [`MemTable`]. @@ -287,6 +298,7 @@ impl TableProvider for MemTable { ))) } + #[tracing::instrument(level = "trace", skip(self, column))] fn get_column_default(&self, column: &str) -> Option<&Expr> { self.column_defaults.get(column) } @@ -299,6 +311,7 @@ struct MemSink { } impl Debug for MemSink { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("MemSink") .field("num_partitions", &self.batches.len()) @@ -307,6 +320,7 @@ impl Debug for MemSink { } impl DisplayAs for MemSink { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { @@ -318,6 +332,7 @@ impl DisplayAs for MemSink { } impl MemSink { + #[tracing::instrument(level = "trace", skip(batches))] fn new(batches: Vec) -> Self { Self { batches } } @@ -325,14 +340,17 @@ impl MemSink { #[async_trait] impl DataSink for MemSink { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { None } + #[tracing::instrument(level = "trace", skip(self, data, _context))] async fn write_all( &self, mut data: SendableRecordBatchStream, @@ -600,6 +618,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(schema, initial_data, inserted_data))] async fn experiment( schema: SchemaRef, initial_data: Vec>, @@ -644,6 +663,7 @@ mod tests { Ok(partitions) } + #[tracing::instrument(level = "trace", skip(res))] /// Returns the value of results. For example, returns 6 given the follwing /// /// ```text diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs index 351967d353245..ef69e1e388498 100644 --- a/datafusion/core/src/datasource/mod.rs +++ b/datafusion/core/src/datasource/mod.rs @@ -52,6 +52,7 @@ use datafusion_common::{plan_err, Result}; use datafusion_expr::Expr; use datafusion_physical_expr::{expressions, LexOrdering, PhysicalSortExpr}; +#[tracing::instrument(level = "trace", skip(schema, sort_order))] fn create_ordering( schema: &Schema, sort_order: &[Vec], diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs index 1e87757310159..86673f190285b 100644 --- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs +++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs @@ -57,6 +57,7 @@ pub struct ArrowExec { } impl ArrowExec { + #[tracing::instrument(level = "trace", skip(base_config))] /// Create a new Arrow reader execution plan provided base configurations pub fn new(base_config: FileScanConfig) -> Self { let (projected_schema, projected_statistics, projected_output_ordering) = @@ -75,15 +76,18 @@ impl ArrowExec { cache, } } + #[tracing::instrument(level = "trace", skip(self))] /// Ref to the base configs pub fn base_config(&self) -> &FileScanConfig { &self.base_config } + #[tracing::instrument(level = "trace", skip(file_scan_config))] fn output_partitioning_helper(file_scan_config: &FileScanConfig) -> Partitioning { Partitioning::UnknownPartitioning(file_scan_config.file_groups.len()) } + #[tracing::instrument(level = "trace", skip(schema, projected_output_ordering, file_scan_config))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( schema: SchemaRef, @@ -101,6 +105,7 @@ impl ArrowExec { ) } + #[tracing::instrument(level = "trace", skip(self, file_groups))] fn with_file_groups(mut self, file_groups: Vec>) -> Self { self.base_config.file_groups = file_groups; // Changing file groups may invalidate output partitioning. Update it also @@ -111,6 +116,7 @@ impl ArrowExec { } impl DisplayAs for ArrowExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -122,22 +128,27 @@ impl DisplayAs for ArrowExec { } impl ExecutionPlan for ArrowExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "ArrowExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { Vec::new() } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -145,6 +156,7 @@ impl ExecutionPlan for ArrowExec { Ok(self) } + #[tracing::instrument(level = "trace", skip(self, target_partitions, config))] /// Redistribute files across partitions according to their size /// See comments on [`FileGroupPartitioner`] for more detail. fn repartitioned( @@ -169,6 +181,7 @@ impl ExecutionPlan for ArrowExec { Ok(None) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -188,10 +201,12 @@ impl ExecutionPlan for ArrowExec { Ok(Box::pin(stream)) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { Ok(self.projected_statistics.clone()) } @@ -203,6 +218,7 @@ pub struct ArrowOpener { } impl FileOpener for ArrowOpener { + #[tracing::instrument(level = "trace", skip(self, file_meta))] fn open(&self, file_meta: FileMeta) -> Result { let object_store = self.object_store.clone(); let projection = self.projection.clone(); diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs index 763b60e409980..6ef39f6cbd961 100644 --- a/datafusion/core/src/datasource/physical_plan/avro.rs +++ b/datafusion/core/src/datasource/physical_plan/avro.rs @@ -46,6 +46,7 @@ pub struct AvroExec { } impl AvroExec { + #[tracing::instrument(level = "trace", skip(base_config))] /// Create a new Avro reader execution plan provided base configurations pub fn new(base_config: FileScanConfig) -> Self { let (projected_schema, projected_statistics, projected_output_ordering) = @@ -64,11 +65,13 @@ impl AvroExec { cache, } } + #[tracing::instrument(level = "trace", skip(self))] /// Ref to the base configs pub fn base_config(&self) -> &FileScanConfig { &self.base_config } + #[tracing::instrument(level = "trace", skip(schema, orderings, file_scan_config))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( schema: SchemaRef, @@ -88,6 +91,7 @@ impl AvroExec { } impl DisplayAs for AvroExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -99,22 +103,27 @@ impl DisplayAs for AvroExec { } impl ExecutionPlan for AvroExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "AvroExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { Vec::new() } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -122,6 +131,7 @@ impl ExecutionPlan for AvroExec { Ok(self) } + #[tracing::instrument(level = "trace", skip(self, _partition, _context))] #[cfg(not(feature = "avro"))] fn execute( &self, @@ -133,6 +143,7 @@ impl ExecutionPlan for AvroExec { )) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] #[cfg(feature = "avro")] fn execute( &self, @@ -157,10 +168,12 @@ impl ExecutionPlan for AvroExec { Ok(Box::pin(stream)) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { Ok(self.projected_statistics.clone()) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } @@ -185,6 +198,7 @@ mod private { } impl AvroConfig { + #[tracing::instrument(level = "trace", skip(self, reader))] fn open(&self, reader: R) -> Result> { AvroReader::try_new( reader, @@ -200,6 +214,7 @@ mod private { } impl FileOpener for AvroOpener { + #[tracing::instrument(level = "trace", skip(self, file_meta))] fn open(&self, file_meta: FileMeta) -> Result { let config = self.config.clone(); Ok(Box::pin(async move { @@ -256,6 +271,7 @@ mod tests { .await } + #[tracing::instrument(level = "trace", skip(store))] async fn test_with_stores(store: Arc) -> Result<()> { let session_ctx = SessionContext::new(); let state = session_ctx.state(); diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs index cc7c837e471e3..8af8b957d52a1 100644 --- a/datafusion/core/src/datasource/physical_plan/csv.rs +++ b/datafusion/core/src/datasource/physical_plan/csv.rs @@ -66,6 +66,7 @@ pub struct CsvExec { } impl CsvExec { + #[tracing::instrument(level = "trace", skip(base_config, has_header, delimiter, quote, escape, file_compression_type))] /// Create a new CSV reader execution plan provided base and specific configurations pub fn new( base_config: FileScanConfig, @@ -95,33 +96,40 @@ impl CsvExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// Ref to the base configs pub fn base_config(&self) -> &FileScanConfig { &self.base_config } + #[tracing::instrument(level = "trace", skip(self))] /// true if the first line of each file is a header pub fn has_header(&self) -> bool { self.has_header } + #[tracing::instrument(level = "trace", skip(self))] /// A column delimiter pub fn delimiter(&self) -> u8 { self.delimiter } + #[tracing::instrument(level = "trace", skip(self))] /// The quote character pub fn quote(&self) -> u8 { self.quote } + #[tracing::instrument(level = "trace", skip(self))] /// The escape character pub fn escape(&self) -> Option { self.escape } + #[tracing::instrument(level = "trace", skip(file_scan_config))] fn output_partitioning_helper(file_scan_config: &FileScanConfig) -> Partitioning { Partitioning::UnknownPartitioning(file_scan_config.file_groups.len()) } + #[tracing::instrument(level = "trace", skip(schema, orderings, file_scan_config))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( schema: SchemaRef, @@ -138,6 +146,7 @@ impl CsvExec { ) } + #[tracing::instrument(level = "trace", skip(self, file_groups))] fn with_file_groups(mut self, file_groups: Vec>) -> Self { self.base_config.file_groups = file_groups; // Changing file groups may invalidate output partitioning. Update it also @@ -148,6 +157,7 @@ impl CsvExec { } impl DisplayAs for CsvExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -160,24 +170,29 @@ impl DisplayAs for CsvExec { } impl ExecutionPlan for CsvExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "CsvExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { // this is a leaf node and has no children vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -185,6 +200,7 @@ impl ExecutionPlan for CsvExec { Ok(self) } + #[tracing::instrument(level = "trace", skip(self, target_partitions, config))] /// Redistribute files across partitions according to their size /// See comments on [`FileGroupPartitioner`] for more detail. /// @@ -216,6 +232,7 @@ impl ExecutionPlan for CsvExec { Ok(None) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -245,10 +262,12 @@ impl ExecutionPlan for CsvExec { Ok(Box::pin(stream) as SendableRecordBatchStream) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { Ok(self.projected_statistics.clone()) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } @@ -268,6 +287,7 @@ pub struct CsvConfig { } impl CsvConfig { + #[tracing::instrument(level = "trace", skip(batch_size, file_schema, file_projection, has_header, delimiter, quote, object_store))] /// Returns a [`CsvConfig`] pub fn new( batch_size: usize, @@ -292,10 +312,12 @@ impl CsvConfig { } impl CsvConfig { + #[tracing::instrument(level = "trace", skip(self, reader))] fn open(&self, reader: R) -> Result> { Ok(self.builder().build(reader)?) } + #[tracing::instrument(level = "trace", skip(self))] fn builder(&self) -> csv::ReaderBuilder { let mut builder = csv::ReaderBuilder::new(self.file_schema.clone()) .with_delimiter(self.delimiter) @@ -321,6 +343,7 @@ pub struct CsvOpener { } impl CsvOpener { + #[tracing::instrument(level = "trace", skip(config, file_compression_type))] /// Returns a [`CsvOpener`] pub fn new( config: Arc, @@ -334,6 +357,7 @@ impl CsvOpener { } impl FileOpener for CsvOpener { + #[tracing::instrument(level = "trace", skip(self, file_meta))] /// Open a partitioned CSV file. /// /// If `file_meta.range` is `None`, the entire file is opened. @@ -458,6 +482,7 @@ impl FileOpener for CsvOpener { } } +#[tracing::instrument(level = "trace", skip(task_ctx, plan, path))] pub async fn plan_to_csv( task_ctx: Arc, plan: Arc, @@ -860,6 +885,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(tmp_dir, partition_count, file_extension))] /// Generate CSV partitions within the supplied directory fn populate_csv_partitions( tmp_dir: &TempDir, @@ -889,6 +915,7 @@ mod tests { Ok(schema) } + #[tracing::instrument(level = "trace", skip(file_compression_type, store))] async fn test_additional_stores( file_compression_type: FileCompressionType, store: Arc, @@ -1106,6 +1133,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(metrics, metric_name))] fn get_value(metrics: &MetricsSet, metric_name: &str) -> usize { match metrics.sum_by_name(metric_name) { Some(v) => v.as_usize(), @@ -1117,6 +1145,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip())] /// Get the schema for the aggregate_test_* csv files with an additional filed not present in the files. fn aggr_test_schema_with_missing_col() -> SchemaRef { let fields = diff --git a/datafusion/core/src/datasource/physical_plan/file_groups.rs b/datafusion/core/src/datasource/physical_plan/file_groups.rs index 6456bd5c72766..d310545136e8e 100644 --- a/datafusion/core/src/datasource/physical_plan/file_groups.rs +++ b/datafusion/core/src/datasource/physical_plan/file_groups.rs @@ -132,12 +132,14 @@ pub struct FileGroupPartitioner { } impl Default for FileGroupPartitioner { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl FileGroupPartitioner { + #[tracing::instrument(level = "trace", skip())] /// Creates a new [`FileGroupPartitioner`] with default values: /// 1. `target_partitions = 1` /// 2. `repartition_file_min_size = 10MB` @@ -150,12 +152,14 @@ impl FileGroupPartitioner { } } + #[tracing::instrument(level = "trace", skip(self, target_partitions))] /// Set the target partitions pub fn with_target_partitions(mut self, target_partitions: usize) -> Self { self.target_partitions = target_partitions; self } + #[tracing::instrument(level = "trace", skip(self, repartition_file_min_size))] /// Set the minimum size at which to repartition a file pub fn with_repartition_file_min_size( mut self, @@ -165,6 +169,7 @@ impl FileGroupPartitioner { self } + #[tracing::instrument(level = "trace", skip(self, preserve_order_within_groups))] /// Set whether the order of tuples within a file must be preserved pub fn with_preserve_order_within_groups( mut self, @@ -174,6 +179,7 @@ impl FileGroupPartitioner { self } + #[tracing::instrument(level = "trace", skip(self, file_groups))] /// Repartition input files according to the settings on this [`FileGroupPartitioner`]. /// /// If no repartitioning is needed or possible, return `None`. @@ -199,6 +205,7 @@ impl FileGroupPartitioner { } } + #[tracing::instrument(level = "trace", skip(self, file_groups))] /// Evenly repartition files across partitions by size, ignoring any /// existing grouping / ordering fn repartition_evenly_by_size( @@ -264,6 +271,7 @@ impl FileGroupPartitioner { Some(repartitioned_files) } + #[tracing::instrument(level = "trace", skip(self, file_groups))] /// Redistribute file groups across size preserving order fn repartition_preserving_order( &self, @@ -368,12 +376,14 @@ struct ToRepartition { impl ToRepartition { // how big will each file range be when this file is read in its new groups? + #[tracing::instrument(level = "trace", skip(self))] fn range_size(&self) -> usize { self.file_size / self.new_groups.len() } } impl PartialOrd for ToRepartition { + #[tracing::instrument(level = "trace", skip(self, other))] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } @@ -381,6 +391,7 @@ impl PartialOrd for ToRepartition { /// Order based on individual range impl Ord for ToRepartition { + #[tracing::instrument(level = "trace", skip(self, other))] fn cmp(&self, other: &Self) -> std::cmp::Ordering { self.range_size().cmp(&other.range_size()) } @@ -782,6 +793,7 @@ mod test { assert_partitioned_files(expected, actual); } + #[tracing::instrument(level = "trace", skip(expected, actual))] /// Asserts that the two groups of `ParititonedFile` are the same /// (PartitionedFile doesn't implement PartialEq) fn assert_partitioned_files( @@ -800,11 +812,13 @@ mod test { } } + #[tracing::instrument(level = "trace", skip(path, file_size))] /// returns a partitioned file with the specified path and size fn pfile(path: impl Into, file_size: u64) -> PartitionedFile { PartitionedFile::new(path, file_size) } + #[tracing::instrument(level = "trace", skip(partitioner, file_groups))] /// repartition the file groups both with and without preserving order /// asserting they return the same value and returns that value fn repartition_test( diff --git a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs index 52a21edd842d8..3d8f5db64479e 100644 --- a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs +++ b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs @@ -39,6 +39,7 @@ use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr}; use log::warn; +#[tracing::instrument(level = "trace", skip(val_type))] /// Convert type to a type suitable for use as a [`ListingTable`] /// partition column. Returns `Dictionary(UInt16, val_type)`, which is /// a reasonable trade off between a reasonable number of partition @@ -55,6 +56,7 @@ pub fn wrap_partition_type_in_dict(val_type: DataType) -> DataType { DataType::Dictionary(Box::new(DataType::UInt16), Box::new(val_type)) } +#[tracing::instrument(level = "trace", skip(val))] /// Convert a [`ScalarValue`] of partition columns to a type, as /// decribed in the documentation of [`wrap_partition_type_in_dict`], /// which can wrap the types. @@ -103,6 +105,7 @@ pub struct FileScanConfig { } impl FileScanConfig { + #[tracing::instrument(level = "trace", skip(self))] /// Project the schema and the statistics on the given column indices pub fn project(&self) -> (SchemaRef, Statistics, Vec) { if self.projection.is_none() && self.table_partition_cols.is_empty() { @@ -152,6 +155,7 @@ impl FileScanConfig { (projected_schema, table_stats, projected_output_ordering) } + #[tracing::instrument(level = "trace", skip(self))] #[allow(unused)] // Only used by avro pub(crate) fn projected_file_column_names(&self) -> Option> { self.projection.as_ref().map(|p| { @@ -163,6 +167,7 @@ impl FileScanConfig { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Projects only file schema, ignoring partition columns pub(crate) fn projected_file_schema(&self) -> SchemaRef { let fields = self.file_column_projection_indices().map(|indices| { @@ -179,6 +184,7 @@ impl FileScanConfig { ) } + #[tracing::instrument(level = "trace", skip(self))] pub(crate) fn file_column_projection_indices(&self) -> Option> { self.projection.as_ref().map(|p| { p.iter() @@ -188,6 +194,7 @@ impl FileScanConfig { }) } + #[tracing::instrument(level = "trace", skip(file_groups, target_partitions, repartition_file_min_size))] #[allow(missing_docs)] #[deprecated(since = "33.0.0", note = "Use SessionContext::new_with_config")] pub fn repartition_file_groups( @@ -201,6 +208,7 @@ impl FileScanConfig { .repartition_file_groups(&file_groups) } + #[tracing::instrument(level = "trace", skip(table_schema, file_groups, sort_order))] /// Attempts to do a bin-packing on files into file groups, such that any two files /// in a file group are ordered and non-overlapping with respect to their statistics. /// It will produce the smallest number of file groups possible. @@ -290,6 +298,7 @@ impl PartitionColumnProjector { // Create a projector to insert the partitioning columns into batches read from files // - `projected_schema`: the target schema with both file and partitioning columns // - `table_partition_cols`: all the partitioning column names + #[tracing::instrument(level = "trace", skip(projected_schema, table_partition_cols))] pub fn new(projected_schema: SchemaRef, table_partition_cols: &[String]) -> Self { let mut idx_map = HashMap::new(); for (partition_idx, partition_name) in table_partition_cols.iter().enumerate() { @@ -312,6 +321,7 @@ impl PartitionColumnProjector { // to the right positions as deduced from `projected_schema` // - `file_batch`: batch read from the file, with internal projection applied // - `partition_values`: the list of partition values, one for each partition column + #[tracing::instrument(level = "trace", skip(self, file_batch, partition_values))] pub fn project( &mut self, file_batch: RecordBatch, @@ -400,6 +410,7 @@ where { const SIZE: usize = std::mem::size_of::(); + #[tracing::instrument(level = "trace", skip(self, n_vals))] fn get_buffer(&mut self, n_vals: usize) -> Buffer { match &mut self.cache { Some(buf) if buf.len() >= n_vals * Self::SIZE => { @@ -414,6 +425,7 @@ where } } +#[tracing::instrument(level = "trace", skip(buffer_gen, dict_val, len, data_type))] fn create_dict_array( buffer_gen: &mut ZeroBufferGenerator, dict_val: &ScalarValue, @@ -437,6 +449,7 @@ where ))) } +#[tracing::instrument(level = "trace", skip(key_buffer_cache, val, len))] fn create_output_array( key_buffer_cache: &mut ZeroBufferGenerators, val: &ScalarValue, @@ -854,6 +867,7 @@ mod tests { statistics: Vec>, } impl File { + #[tracing::instrument(level = "trace", skip(name, date, statistics))] fn new( name: &'static str, date: &'static str, @@ -1071,6 +1085,7 @@ mod tests { return Ok(()); impl From for PartitionedFile { + #[tracing::instrument(level = "trace", skip(file))] fn from(file: File) -> Self { PartitionedFile { object_meta: ObjectMeta { @@ -1113,6 +1128,7 @@ mod tests { } // sets default for configs that play no role in projections + #[tracing::instrument(level = "trace", skip(file_schema, projection, statistics, table_partition_cols))] fn config_for_projection( file_schema: SchemaRef, projection: Option>, @@ -1132,6 +1148,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(table_partition_cols))] /// Convert partition columns from Vec to Vec fn to_partition_cols(table_partition_cols: Vec<(String, DataType)>) -> Vec { table_partition_cols @@ -1140,6 +1157,7 @@ mod tests { .collect::>() } + #[tracing::instrument(level = "trace", skip(a, b, c))] /// returns record batch with 3 columns of i32 in memory pub fn build_table_i32( a: (&str, &Vec), diff --git a/datafusion/core/src/datasource/physical_plan/file_stream.rs b/datafusion/core/src/datasource/physical_plan/file_stream.rs index a11044775f7ee..59d28946f7200 100644 --- a/datafusion/core/src/datasource/physical_plan/file_stream.rs +++ b/datafusion/core/src/datasource/physical_plan/file_stream.rs @@ -58,6 +58,7 @@ pub enum OnError { } impl Default for OnError { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::Fail } @@ -143,11 +144,13 @@ pub struct StartableTime { } impl StartableTime { + #[tracing::instrument(level = "trace", skip(self))] pub(crate) fn start(&mut self) { assert!(self.start.is_none()); self.start = Some(Instant::now()); } + #[tracing::instrument(level = "trace", skip(self))] pub(crate) fn stop(&mut self) { if let Some(start) = self.start.take() { self.metrics.add_elapsed(start); @@ -200,6 +203,7 @@ struct FileStreamMetrics { } impl FileStreamMetrics { + #[tracing::instrument(level = "trace", skip(metrics, partition))] fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self { let time_opening = StartableTime { metrics: MetricBuilder::new(metrics) @@ -243,6 +247,7 @@ impl FileStreamMetrics { } impl FileStream { + #[tracing::instrument(level = "trace", skip(config, partition, file_opener, metrics))] /// Create a new `FileStream` using the give `FileOpener` to scan underlying files pub fn new( config: &FileScanConfig, @@ -275,6 +280,7 @@ impl FileStream { }) } + #[tracing::instrument(level = "trace", skip(self, on_error))] /// Specify the behavior when an error occurs opening or scanning a file /// /// If `OnError::Skip` the stream will skip files which encounter an error and continue @@ -284,6 +290,7 @@ impl FileStream { self } + #[tracing::instrument(level = "trace", skip(self))] /// Begin opening the next file in parallel while decoding the current file in FileStream. /// /// Since file opening is mostly IO (and may involve a @@ -304,6 +311,7 @@ impl FileStream { ) } + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_inner(&mut self, cx: &mut Context<'_>) -> Poll>> { loop { match &mut self.state { @@ -496,6 +504,7 @@ impl FileStream { impl Stream for FileStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -508,6 +517,7 @@ impl Stream for FileStream { } impl RecordBatchStream for FileStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.projected_schema.clone() } @@ -540,6 +550,7 @@ mod tests { } impl FileOpener for TestOpener { + #[tracing::instrument(level = "trace", skip(self, _file_meta))] fn open(&self, _file_meta: FileMeta) -> Result { let idx = self.current_idx.fetch_add(1, Ordering::SeqCst); @@ -572,22 +583,26 @@ mod tests { } impl FileStreamTest { + #[tracing::instrument(level = "trace", skip())] pub fn new() -> Self { Self::default() } + #[tracing::instrument(level = "trace", skip(self, num_files))] /// Specify the number of files in the stream pub fn with_num_files(mut self, num_files: usize) -> Self { self.num_files = num_files; self } + #[tracing::instrument(level = "trace", skip(self, limit))] /// Specify the limit pub fn with_limit(mut self, limit: Option) -> Self { self.limit = limit; self } + #[tracing::instrument(level = "trace", skip(self, idx))] /// Specify the index of files in the stream which should /// throw an error when opening pub fn with_open_errors(mut self, idx: Vec) -> Self { @@ -595,6 +610,7 @@ mod tests { self } + #[tracing::instrument(level = "trace", skip(self, idx))] /// Specify the index of files in the stream which should /// throw an error when scanning pub fn with_scan_errors(mut self, idx: Vec) -> Self { @@ -602,12 +618,14 @@ mod tests { self } + #[tracing::instrument(level = "trace", skip(self, on_error))] /// Specify the behavior of the stream when an error occurs pub fn with_on_error(mut self, on_error: OnError) -> Self { self.on_error = on_error; self } + #[tracing::instrument(level = "trace", skip(self, records))] /// Specify the record batches that should be returned from each /// file that is successfully scanned pub fn with_records(mut self, records: Vec) -> Self { @@ -615,6 +633,7 @@ mod tests { self } + #[tracing::instrument(level = "trace", skip(self))] /// Collect the results of the `FileStream` pub async fn result(self) -> Result> { let file_schema = self @@ -667,6 +686,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(limit))] /// helper that creates a stream of 2 files with the same pair of batches in each ([0,1,2] and [0,1]) async fn create_and_collect(limit: Option) -> Vec { FileStreamTest::new() diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs index e7be636bd7753..240df2a2d6fa0 100644 --- a/datafusion/core/src/datasource/physical_plan/json.rs +++ b/datafusion/core/src/datasource/physical_plan/json.rs @@ -60,6 +60,7 @@ pub struct NdJsonExec { } impl NdJsonExec { + #[tracing::instrument(level = "trace", skip(base_config, file_compression_type))] /// Create a new JSON reader execution plan provided base configurations pub fn new( base_config: FileScanConfig, @@ -81,15 +82,18 @@ impl NdJsonExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// Ref to the base configs pub fn base_config(&self) -> &FileScanConfig { &self.base_config } + #[tracing::instrument(level = "trace", skip(file_scan_config))] fn output_partitioning_helper(file_scan_config: &FileScanConfig) -> Partitioning { Partitioning::UnknownPartitioning(file_scan_config.file_groups.len()) } + #[tracing::instrument(level = "trace", skip(schema, orderings, file_scan_config))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( schema: SchemaRef, @@ -106,6 +110,7 @@ impl NdJsonExec { ) } + #[tracing::instrument(level = "trace", skip(self, file_groups))] fn with_file_groups(mut self, file_groups: Vec>) -> Self { self.base_config.file_groups = file_groups; // Changing file groups may invalidate output partitioning. Update it also @@ -116,6 +121,7 @@ impl NdJsonExec { } impl DisplayAs for NdJsonExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -127,21 +133,26 @@ impl DisplayAs for NdJsonExec { } impl ExecutionPlan for NdJsonExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "NdJsonExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { Vec::new() } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -149,6 +160,7 @@ impl ExecutionPlan for NdJsonExec { Ok(self) } + #[tracing::instrument(level = "trace", skip(self, target_partitions, config))] fn repartitioned( &self, target_partitions: usize, @@ -176,6 +188,7 @@ impl ExecutionPlan for NdJsonExec { Ok(None) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -199,10 +212,12 @@ impl ExecutionPlan for NdJsonExec { Ok(Box::pin(stream) as SendableRecordBatchStream) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { Ok(self.projected_statistics.clone()) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } @@ -217,6 +232,7 @@ pub struct JsonOpener { } impl JsonOpener { + #[tracing::instrument(level = "trace", skip(batch_size, projected_schema, file_compression_type, object_store))] /// Returns a [`JsonOpener`] pub fn new( batch_size: usize, @@ -234,6 +250,7 @@ impl JsonOpener { } impl FileOpener for JsonOpener { + #[tracing::instrument(level = "trace", skip(self, file_meta))] /// Open a partitioned NDJSON file. /// /// If `file_meta.range` is `None`, the entire file is opened. @@ -328,6 +345,7 @@ impl FileOpener for JsonOpener { } } +#[tracing::instrument(level = "trace", skip(task_ctx, plan, path))] pub async fn plan_to_json( task_ctx: Arc, plan: Arc, @@ -406,6 +424,7 @@ mod tests { const TEST_DATA_BASE: &str = "tests/data"; + #[tracing::instrument(level = "trace", skip(state, file_compression_type, work_dir))] async fn prepare_store( state: &SessionState, file_compression_type: FileCompressionType, @@ -440,6 +459,7 @@ mod tests { (store_url, file_groups, schema) } + #[tracing::instrument(level = "trace", skip(file_compression_type, store))] async fn test_additional_stores( file_compression_type: FileCompressionType, store: Arc, @@ -869,6 +889,7 @@ mod tests { #[tokio::test] async fn ndjson_schema_infer_max_records() -> Result<()> { + #[tracing::instrument(level = "trace", skip(schema_infer_max_records))] async fn read_test_data(schema_infer_max_records: usize) -> Result { let ctx = SessionContext::new(); diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs index 6e19961f60284..d7a4fb4dcd3b3 100644 --- a/datafusion/core/src/datasource/physical_plan/mod.rs +++ b/datafusion/core/src/datasource/physical_plan/mod.rs @@ -100,6 +100,7 @@ pub struct FileSinkConfig { } impl FileSinkConfig { + #[tracing::instrument(level = "trace", skip(self))] /// Get output schema pub fn output_schema(&self) -> &SchemaRef { &self.output_schema @@ -107,6 +108,7 @@ impl FileSinkConfig { } impl Debug for FileScanConfig { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { write!(f, "object_store_url={:?}, ", self.object_store_url)?; @@ -117,6 +119,7 @@ impl Debug for FileScanConfig { } impl DisplayAs for FileScanConfig { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult { let (schema, _, orderings) = self.project(); @@ -147,6 +150,7 @@ impl DisplayAs for FileScanConfig { struct FileGroupsDisplay<'a>(&'a [Vec]); impl<'a> DisplayAs for FileGroupsDisplay<'a> { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult { let n_groups = self.0.len(); let groups = if n_groups == 1 { "group" } else { "groups" }; @@ -179,6 +183,7 @@ impl<'a> DisplayAs for FileGroupsDisplay<'a> { pub(crate) struct FileGroupDisplay<'a>(pub &'a [PartitionedFile]); impl<'a> DisplayAs for FileGroupDisplay<'a> { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult { write!(f, "[")?; match t { @@ -207,6 +212,7 @@ impl<'a> DisplayAs for FileGroupDisplay<'a> { } } +#[tracing::instrument(level = "trace", skip(elements, n, f, format_element))] /// helper to format an array of up to N elements fn fmt_up_to_n_elements( elements: &[E], @@ -228,6 +234,7 @@ where Ok(()) } +#[tracing::instrument(level = "trace", skip(iter, f, format_element))] /// helper formatting array elements with a comma and a space between them fn fmt_elements_split_by_commas( iter: I, @@ -253,6 +260,7 @@ pub(crate) struct DefaultSchemaAdapterFactory {} #[cfg(feature = "parquet")] impl SchemaAdapterFactory for DefaultSchemaAdapterFactory { + #[tracing::instrument(level = "trace", skip(self, table_schema))] fn create(&self, table_schema: SchemaRef) -> Box { Box::new(DefaultSchemaAdapter { table_schema }) } @@ -267,6 +275,7 @@ pub(crate) struct DefaultSchemaAdapter { #[cfg(feature = "parquet")] impl SchemaAdapter for DefaultSchemaAdapter { + #[tracing::instrument(level = "trace", skip(self, index, file_schema))] /// Map a column index in the table schema to a column index in a particular /// file schema /// @@ -276,6 +285,7 @@ impl SchemaAdapter for DefaultSchemaAdapter { Some(file_schema.fields.find(field.name())?.0) } + #[tracing::instrument(level = "trace", skip(self, file_schema))] /// Creates a `SchemaMapping` that can be used to cast or map the columns from the file schema to the table schema. /// /// If the provided `file_schema` contains columns of a different type to the expected @@ -335,6 +345,7 @@ pub struct SchemaMapping { #[cfg(feature = "parquet")] impl SchemaMapper for SchemaMapping { + #[tracing::instrument(level = "trace", skip(self, batch))] /// Adapts a `RecordBatch` to match the `table_schema` using the stored mapping and conversions. fn map_batch(&self, batch: RecordBatch) -> Result { let batch_rows = batch.num_rows(); @@ -371,6 +382,7 @@ pub struct FileMeta { } impl FileMeta { + #[tracing::instrument(level = "trace", skip(self))] /// The full path to the object pub fn location(&self) -> &Path { &self.object_meta.location @@ -378,6 +390,7 @@ impl FileMeta { } impl From for FileMeta { + #[tracing::instrument(level = "trace", skip(object_meta))] fn from(object_meta: ObjectMeta) -> Self { Self { object_meta, @@ -387,6 +400,7 @@ impl From for FileMeta { } } +#[tracing::instrument(level = "trace", skip(base_config, projected_schema))] /// The various listing tables does not attempt to read all files /// concurrently, instead they will read files in sequence within a /// partition. This is an important property as it allows plans to @@ -530,6 +544,7 @@ enum RangeCalculation { TerminateEarly, } +#[tracing::instrument(level = "trace", skip(file_meta, store))] /// Calculates an appropriate byte range for reading from an object based on the /// provided metadata. /// @@ -575,6 +590,7 @@ async fn calculate_range( } } +#[tracing::instrument(level = "trace", skip(object_store, location, start, end))] /// Asynchronously finds the position of the first newline character in a specified byte range /// within an object, such as a file, in an object store. /// @@ -877,6 +893,7 @@ mod tests { ); } + #[tracing::instrument(level = "trace", skip(path))] /// create a PartitionedFile for testing fn partitioned_file(path: &str) -> PartitionedFile { let object_meta = ObjectMeta { diff --git a/datafusion/core/src/datasource/physical_plan/parquet/metrics.rs b/datafusion/core/src/datasource/physical_plan/parquet/metrics.rs index c2a7e4345a5bc..b25182791a4e0 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/metrics.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/metrics.rs @@ -50,6 +50,7 @@ pub struct ParquetFileMetrics { } impl ParquetFileMetrics { + #[tracing::instrument(level = "trace", skip(partition, filename, metrics))] /// Create new metrics pub fn new( partition: usize, diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs index d1b8dcc51bb9f..2d38f29dfc95c 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs @@ -103,6 +103,7 @@ pub struct ParquetExec { } impl ParquetExec { + #[tracing::instrument(level = "trace", skip(base_config, predicate, metadata_size_hint, table_parquet_options))] /// Create a new Parquet reader execution plan provided file list and schema. pub fn new( base_config: FileScanConfig, @@ -168,26 +169,31 @@ impl ParquetExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// [`FileScanConfig`] that controls this scan (such as which files to read) pub fn base_config(&self) -> &FileScanConfig { &self.base_config } + #[tracing::instrument(level = "trace", skip(self))] /// Options passed to the parquet reader for this scan pub fn table_parquet_options(&self) -> &TableParquetOptions { &self.table_parquet_options } + #[tracing::instrument(level = "trace", skip(self))] /// Optional predicate. pub fn predicate(&self) -> Option<&Arc> { self.predicate.as_ref() } + #[tracing::instrument(level = "trace", skip(self))] /// Optional reference to this parquet scan's pruning predicate pub fn pruning_predicate(&self) -> Option<&Arc> { self.pruning_predicate.as_ref() } + #[tracing::instrument(level = "trace", skip(self, parquet_file_reader_factory))] /// Optional user defined parquet file reader factory. /// /// `ParquetFileReaderFactory` complements `TableProvider`, It enables users to provide custom @@ -203,6 +209,7 @@ impl ParquetExec { self } + #[tracing::instrument(level = "trace", skip(self, schema_adapter_factory))] /// Optional schema adapter factory. /// /// `SchemaAdapterFactory` allows user to specify how fields from the parquet file get mapped to @@ -216,6 +223,7 @@ impl ParquetExec { self } + #[tracing::instrument(level = "trace", skip(self, pushdown_filters))] /// If true, any filter [`Expr`]s on the scan will converted to a /// [`RowFilter`](parquet::arrow::arrow_reader::RowFilter) in the /// `ParquetRecordBatchStream`. These filters are applied by the @@ -228,11 +236,13 @@ impl ParquetExec { self } + #[tracing::instrument(level = "trace", skip(self))] /// Return the value described in [`Self::with_pushdown_filters`] fn pushdown_filters(&self) -> bool { self.table_parquet_options.global.pushdown_filters } + #[tracing::instrument(level = "trace", skip(self, reorder_filters))] /// If true, the `RowFilter` made by `pushdown_filters` may try to /// minimize the cost of filter evaluation by reordering the /// predicate [`Expr`]s. If false, the predicates are applied in @@ -244,11 +254,13 @@ impl ParquetExec { self } + #[tracing::instrument(level = "trace", skip(self))] /// Return the value described in [`Self::with_reorder_filters`] fn reorder_filters(&self) -> bool { self.table_parquet_options.global.reorder_filters } + #[tracing::instrument(level = "trace", skip(self, enable_page_index))] /// If enabled, the reader will read the page index /// This is used to optimise filter pushdown /// via `RowSelector` and `RowFilter` by @@ -258,17 +270,20 @@ impl ParquetExec { self } + #[tracing::instrument(level = "trace", skip(self))] /// Return the value described in [`Self::with_enable_page_index`] fn enable_page_index(&self) -> bool { self.table_parquet_options.global.enable_page_index } + #[tracing::instrument(level = "trace", skip(self, bloom_filter_on_read))] /// If enabled, the reader will read by the bloom filter pub fn with_bloom_filter_on_read(mut self, bloom_filter_on_read: bool) -> Self { self.table_parquet_options.global.bloom_filter_on_read = bloom_filter_on_read; self } + #[tracing::instrument(level = "trace", skip(self, enable_bloom_filter_on_write))] /// If enabled, the writer will write by the bloom filter pub fn with_bloom_filter_on_write( mut self, @@ -279,15 +294,18 @@ impl ParquetExec { self } + #[tracing::instrument(level = "trace", skip(self))] /// Return the value described in [`Self::with_bloom_filter_on_read`] fn bloom_filter_on_read(&self) -> bool { self.table_parquet_options.global.bloom_filter_on_read } + #[tracing::instrument(level = "trace", skip(file_config))] fn output_partitioning_helper(file_config: &FileScanConfig) -> Partitioning { Partitioning::UnknownPartitioning(file_config.file_groups.len()) } + #[tracing::instrument(level = "trace", skip(schema, orderings, file_config))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( schema: SchemaRef, @@ -304,6 +322,7 @@ impl ParquetExec { ) } + #[tracing::instrument(level = "trace", skip(self, file_groups))] fn with_file_groups(mut self, file_groups: Vec>) -> Self { self.base_config.file_groups = file_groups; // Changing file groups may invalidate output partitioning. Update it also @@ -314,6 +333,7 @@ impl ParquetExec { } impl DisplayAs for ParquetExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -352,24 +372,29 @@ impl DisplayAs for ParquetExec { } impl ExecutionPlan for ParquetExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "ParquetExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { // this is a leaf node and has no children vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -377,6 +402,7 @@ impl ExecutionPlan for ParquetExec { Ok(self) } + #[tracing::instrument(level = "trace", skip(self, target_partitions, config))] /// Redistribute files across partitions according to their size /// See comments on [`FileGroupPartitioner`] for more detail. fn repartitioned( @@ -400,6 +426,7 @@ impl ExecutionPlan for ParquetExec { Ok(Some(Arc::new(new_plan))) } + #[tracing::instrument(level = "trace", skip(self, partition_index, ctx))] fn execute( &self, partition_index: usize, @@ -454,10 +481,12 @@ impl ExecutionPlan for ParquetExec { Ok(Box::pin(stream)) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { Ok(self.projected_statistics.clone()) } @@ -485,6 +514,7 @@ struct ParquetOpener { } impl FileOpener for ParquetOpener { + #[tracing::instrument(level = "trace", skip(self, file_meta))] fn open(&self, file_meta: FileMeta) -> Result { let file_range = file_meta.range.clone(); let file_metrics = ParquetFileMetrics::new( @@ -655,6 +685,7 @@ impl FileOpener for ParquetOpener { } } +#[tracing::instrument(level = "trace", skip(enable_page_index, page_pruning_predicate))] fn should_enable_page_index( enable_page_index: bool, page_pruning_predicate: &Option>, @@ -688,6 +719,7 @@ pub struct DefaultParquetFileReaderFactory { } impl DefaultParquetFileReaderFactory { + #[tracing::instrument(level = "trace", skip(store))] /// Create a factory. pub fn new(store: Arc) -> Self { Self { store } @@ -701,6 +733,7 @@ pub(crate) struct ParquetFileReader { } impl AsyncFileReader for ParquetFileReader { + #[tracing::instrument(level = "trace", skip(self, range))] fn get_bytes( &mut self, range: Range, @@ -709,6 +742,7 @@ impl AsyncFileReader for ParquetFileReader { self.inner.get_bytes(range) } + #[tracing::instrument(level = "trace", skip(self, ranges))] fn get_byte_ranges( &mut self, ranges: Vec>, @@ -721,6 +755,7 @@ impl AsyncFileReader for ParquetFileReader { self.inner.get_byte_ranges(ranges) } + #[tracing::instrument(level = "trace", skip(self))] fn get_metadata( &mut self, ) -> BoxFuture<'_, parquet::errors::Result>> { @@ -729,6 +764,7 @@ impl AsyncFileReader for ParquetFileReader { } impl ParquetFileReaderFactory for DefaultParquetFileReaderFactory { + #[tracing::instrument(level = "trace", skip(self, partition_index, file_meta, metadata_size_hint, metrics))] fn create_reader( &self, partition_index: usize, @@ -755,6 +791,7 @@ impl ParquetFileReaderFactory for DefaultParquetFileReaderFactory { } } +#[tracing::instrument(level = "trace", skip(task_ctx, plan, path, writer_properties))] /// Executes a query and writes the results to a partitioned Parquet file. pub async fn plan_to_parquet( task_ctx: Arc, @@ -809,6 +846,7 @@ pub async fn plan_to_parquet( // Convert parquet column schema to arrow data type, and just consider the // decimal data type. +#[tracing::instrument(level = "trace", skip(parquet_column))] pub(crate) fn parquet_to_arrow_decimal_type( parquet_column: &ColumnDescriptor, ) -> Option { @@ -890,35 +928,42 @@ mod tests { } impl RoundTrip { + #[tracing::instrument(level = "trace", skip())] fn new() -> Self { Default::default() } + #[tracing::instrument(level = "trace", skip(self, projection))] fn with_projection(mut self, projection: Vec) -> Self { self.projection = Some(projection); self } + #[tracing::instrument(level = "trace", skip(self, schema))] fn with_schema(mut self, schema: SchemaRef) -> Self { self.schema = Some(schema); self } + #[tracing::instrument(level = "trace", skip(self, predicate))] fn with_predicate(mut self, predicate: Expr) -> Self { self.predicate = Some(predicate); self } + #[tracing::instrument(level = "trace", skip(self))] fn with_pushdown_predicate(mut self) -> Self { self.pushdown_predicate = true; self } + #[tracing::instrument(level = "trace", skip(self))] fn with_page_index_predicate(mut self) -> Self { self.page_index_predicate = true; self } + #[tracing::instrument(level = "trace", skip(self, batches))] /// run the test, returning only the resulting RecordBatches async fn round_trip_to_batches( self, @@ -927,6 +972,7 @@ mod tests { self.round_trip(batches).await.batches } + #[tracing::instrument(level = "trace", skip(self, batches))] /// run the test, returning the `RoundTripResult` async fn round_trip(self, batches: Vec) -> RoundTripResult { let Self { @@ -994,6 +1040,7 @@ mod tests { } // Add a new column with the specified field name to the RecordBatch + #[tracing::instrument(level = "trace", skip(batch, field_name, array))] fn add_to_batch( batch: &RecordBatch, field_name: &str, @@ -1008,6 +1055,7 @@ mod tests { RecordBatch::try_new(schema, columns).expect("error; creating record batch") } + #[tracing::instrument(level = "trace", skip(columns))] fn create_batch(columns: Vec<(&str, ArrayRef)>) -> RecordBatch { columns.into_iter().fold( RecordBatch::new_empty(Arc::new(Schema::empty())), @@ -1599,6 +1647,7 @@ mod tests { #[tokio::test] async fn parquet_exec_with_range() -> Result<()> { + #[tracing::instrument(level = "trace", skip(meta, start, end))] fn file_range(meta: &ObjectMeta, start: i64, end: i64) -> PartitionedFile { PartitionedFile { object_meta: meta.clone(), @@ -1609,6 +1658,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(state, file_groups, expected_row_num, file_schema))] async fn assert_parquet_read( state: &SessionState, file_groups: Vec>, @@ -1873,6 +1923,7 @@ mod tests { ); } + #[tracing::instrument(level = "trace", skip())] /// Returns a string array with contents: /// "[Foo, null, bar, bar, bar, bar, zzz]" fn string_batch() -> RecordBatch { @@ -2015,6 +2066,7 @@ mod tests { assert!(pruning_predicate.is_some()); } + #[tracing::instrument(level = "trace", skip(metrics, metric_name))] /// returns the sum of all the metrics with the specified name /// the returned set. /// @@ -2033,6 +2085,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(tmp_dir, partition_count, file_extension))] fn populate_csv_partitions( tmp_dir: &TempDir, partition_count: usize, @@ -2151,6 +2204,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(expr, schema))] fn logical2physical(expr: &Expr, schema: &Schema) -> Arc { let df_schema = schema.clone().to_dfschema().unwrap(); let execution_props = ExecutionProps::new(); @@ -2181,6 +2235,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(file))] fn write_file(file: &String) { let struct_fields = Fields::from(vec![ Field::new("id", DataType::Int64, false), diff --git a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs index 402cc106492e1..66ee34298bc1f 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs @@ -109,6 +109,7 @@ pub struct PagePruningPredicate { } impl PagePruningPredicate { + #[tracing::instrument(level = "trace", skip(expr, schema))] /// Create a new [`PagePruningPredicate`] pub fn try_new(expr: &Arc, schema: SchemaRef) -> Result { let predicates = split_conjunction(expr) @@ -128,6 +129,7 @@ impl PagePruningPredicate { Ok(Self { predicates }) } + #[tracing::instrument(level = "trace", skip(self, arrow_schema, parquet_schema, row_groups, file_metadata, file_metrics))] /// Returns a [`RowSelection`] for the given file pub fn prune( &self, @@ -229,12 +231,14 @@ impl PagePruningPredicate { Ok(Some(final_selection)) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the number of filters in the [`PagePruningPredicate`] pub fn filter_number(&self) -> usize { self.predicates.len() } } +#[tracing::instrument(level = "trace", skip(predicate, arrow_schema, parquet_schema))] /// Returns the column index in the row parquet schema for the single /// column of a single column pruning predicate. /// @@ -281,6 +285,7 @@ fn find_column_index( parquet_column(parquet_schema, arrow_schema, column.name()).map(|x| x.0) } +#[tracing::instrument(level = "trace", skip(row_selections))] /// Intersects the [`RowSelector`]s /// /// For exampe, given: @@ -297,6 +302,7 @@ fn combine_multi_col_selection(row_selections: Vec>) -> RowSele .unwrap() } +#[tracing::instrument(level = "trace", skip(group, predicate, col_offset_indexes, col_page_indexes, col_desc, metrics))] fn prune_pages_in_one_row_group( group: &RowGroupMetaData, predicate: &PruningPredicate, @@ -363,6 +369,7 @@ fn prune_pages_in_one_row_group( ))) } +#[tracing::instrument(level = "trace", skip(location, num_rows))] fn create_row_count_in_each_page( location: &[PageLocation], num_rows: usize, @@ -507,18 +514,22 @@ macro_rules! get_min_max_values_for_page_index { } impl<'a> PruningStatistics for PagesPruningStatistics<'a> { + #[tracing::instrument(level = "trace", skip(self, _column))] fn min_values(&self, _column: &datafusion_common::Column) -> Option { get_min_max_values_for_page_index!(self, min) } + #[tracing::instrument(level = "trace", skip(self, _column))] fn max_values(&self, _column: &datafusion_common::Column) -> Option { get_min_max_values_for_page_index!(self, max) } + #[tracing::instrument(level = "trace", skip(self))] fn num_containers(&self) -> usize { self.col_offset_indexes.len() } + #[tracing::instrument(level = "trace", skip(self, _column))] fn null_counts(&self, _column: &datafusion_common::Column) -> Option { match self.col_page_indexes { Index::NONE => None, @@ -549,6 +560,7 @@ impl<'a> PruningStatistics for PagesPruningStatistics<'a> { } } + #[tracing::instrument(level = "trace", skip(self, _column))] fn row_counts(&self, _column: &datafusion_common::Column) -> Option { // see https://github.com/apache/arrow-rs/blob/91f0b1771308609ca27db0fb1d2d49571b3980d8/parquet/src/file/metadata.rs#L979-L982 @@ -565,6 +577,7 @@ impl<'a> PruningStatistics for PagesPruningStatistics<'a> { Some(Arc::new(Int64Array::from_iter(row_count_per_page))) } + #[tracing::instrument(level = "trace", skip(self, _column, _values))] fn contained( &self, _column: &datafusion_common::Column, diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs index 5f89ff087f707..0e41f99bbc1d9 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs @@ -81,6 +81,7 @@ pub(crate) struct DatafusionArrowPredicate { } impl DatafusionArrowPredicate { + #[tracing::instrument(level = "trace", skip(candidate, schema, metadata, rows_filtered, time))] pub fn try_new( candidate: FilterCandidate, schema: &Schema, @@ -113,10 +114,12 @@ impl DatafusionArrowPredicate { } impl ArrowPredicate for DatafusionArrowPredicate { + #[tracing::instrument(level = "trace", skip(self))] fn projection(&self) -> &ProjectionMask { &self.projection_mask } + #[tracing::instrument(level = "trace", skip(self, batch))] fn evaluate(&mut self, batch: RecordBatch) -> ArrowResult { let batch = match self.projection.is_empty() { true => batch, @@ -171,6 +174,7 @@ struct FilterCandidateBuilder<'a> { } impl<'a> FilterCandidateBuilder<'a> { + #[tracing::instrument(level = "trace", skip(expr, file_schema, table_schema))] pub fn new( expr: Arc, file_schema: &'a Schema, @@ -186,6 +190,7 @@ impl<'a> FilterCandidateBuilder<'a> { } } + #[tracing::instrument(level = "trace", skip(self, metadata))] pub fn build( mut self, metadata: &ParquetMetaData, @@ -212,6 +217,7 @@ impl<'a> FilterCandidateBuilder<'a> { impl<'a> TreeNodeRewriter for FilterCandidateBuilder<'a> { type Node = Arc; + #[tracing::instrument(level = "trace", skip(self, node))] fn f_down( &mut self, node: Arc, @@ -235,6 +241,7 @@ impl<'a> TreeNodeRewriter for FilterCandidateBuilder<'a> { Ok(Transformed::no(node)) } + #[tracing::instrument(level = "trace", skip(self, expr))] fn f_up( &mut self, expr: Arc, @@ -260,6 +267,7 @@ impl<'a> TreeNodeRewriter for FilterCandidateBuilder<'a> { } } +#[tracing::instrument(level = "trace", skip(src))] /// Computes the projection required to go from the file's schema order to the projected /// order expected by this filter /// @@ -286,6 +294,7 @@ fn remap_projection(src: &[usize]) -> Vec { projection } +#[tracing::instrument(level = "trace", skip(columns, metadata))] /// Calculate the total compressed size of all `Column's required for /// predicate `Expr`. This should represent the total amount of file IO /// required to evaluate the predicate. @@ -304,6 +313,7 @@ fn size_of_columns( Ok(total_size) } +#[tracing::instrument(level = "trace", skip(_columns, _metadata))] /// For a given set of `Column`s required for predicate `Expr` determine whether all /// columns are sorted. Sorted columns may be queried more efficiently in the presence of /// a PageIndex. @@ -315,6 +325,7 @@ fn columns_sorted( Ok(false) } +#[tracing::instrument(level = "trace", skip(expr, file_schema, table_schema, metadata, reorder_predicates, file_metrics))] /// Build a [`RowFilter`] from the given predicate `Expr` pub fn build_row_filter( expr: &Arc, @@ -491,6 +502,7 @@ mod test { } } + #[tracing::instrument(level = "trace", skip(expr, schema))] fn logical2physical(expr: &Expr, schema: &Schema) -> Arc { let df_schema = schema.clone().to_dfschema().unwrap(); let execution_props = ExecutionProps::new(); diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs index bcd9e1fa44792..7b2ab7239d982 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs @@ -38,6 +38,7 @@ use crate::physical_optimizer::pruning::{PruningPredicate, PruningStatistics}; use super::ParquetFileMetrics; +#[tracing::instrument(level = "trace", skip(arrow_schema, parquet_schema, groups, range, predicate, metrics))] /// Prune row groups based on statistics /// /// Returns a vector of indexes into `groups` which should be scanned. @@ -102,6 +103,7 @@ pub(crate) fn prune_row_groups_by_statistics( filtered } +#[tracing::instrument(level = "trace", skip(arrow_schema, builder, row_groups, groups, predicate, metrics))] /// Prune row groups by bloom filters /// /// Returns a vector of indexes into `groups` which should be scanned. @@ -183,26 +185,32 @@ struct BloomFilterStatistics { } impl PruningStatistics for BloomFilterStatistics { + #[tracing::instrument(level = "trace", skip(self, _column))] fn min_values(&self, _column: &Column) -> Option { None } + #[tracing::instrument(level = "trace", skip(self, _column))] fn max_values(&self, _column: &Column) -> Option { None } + #[tracing::instrument(level = "trace", skip(self))] fn num_containers(&self) -> usize { 1 } + #[tracing::instrument(level = "trace", skip(self, _column))] fn null_counts(&self, _column: &Column) -> Option { None } + #[tracing::instrument(level = "trace", skip(self, _column))] fn row_counts(&self, _column: &Column) -> Option { None } + #[tracing::instrument(level = "trace", skip(self, column, values))] /// Use bloom filters to determine if we are sure this column can not /// possibly contain `values` /// @@ -310,6 +318,7 @@ struct RowGroupPruningStatistics<'a> { } impl<'a> RowGroupPruningStatistics<'a> { + #[tracing::instrument(level = "trace", skip(self, name))] /// Lookups up the parquet column by name fn column(&self, name: &str) -> Option<(&ColumnChunkMetaData, &FieldRef)> { let (idx, field) = parquet_column(self.parquet_schema, self.arrow_schema, name)?; @@ -318,32 +327,38 @@ impl<'a> RowGroupPruningStatistics<'a> { } impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> { + #[tracing::instrument(level = "trace", skip(self, column))] fn min_values(&self, column: &Column) -> Option { let (column, field) = self.column(&column.name)?; min_statistics(field.data_type(), std::iter::once(column.statistics())).ok() } + #[tracing::instrument(level = "trace", skip(self, column))] fn max_values(&self, column: &Column) -> Option { let (column, field) = self.column(&column.name)?; max_statistics(field.data_type(), std::iter::once(column.statistics())).ok() } + #[tracing::instrument(level = "trace", skip(self))] fn num_containers(&self) -> usize { 1 } + #[tracing::instrument(level = "trace", skip(self, column))] fn null_counts(&self, column: &Column) -> Option { let (c, _) = self.column(&column.name)?; let scalar = ScalarValue::UInt64(Some(c.statistics()?.null_count())); scalar.to_array().ok() } + #[tracing::instrument(level = "trace", skip(self, column))] fn row_counts(&self, column: &Column) -> Option { let (c, _) = self.column(&column.name)?; let scalar = ScalarValue::UInt64(Some(c.num_values() as u64)); scalar.to_array().ok() } + #[tracing::instrument(level = "trace", skip(self, _column, _values))] fn contained( &self, _column: &Column, @@ -385,6 +400,7 @@ mod tests { } impl PrimitiveTypeField { + #[tracing::instrument(level = "trace", skip(name, physical_ty))] fn new(name: &'static str, physical_ty: PhysicalType) -> Self { Self { name, @@ -396,21 +412,25 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(self, logical_type))] fn with_logical_type(mut self, logical_type: LogicalType) -> Self { self.logical_ty = Some(logical_type); self } + #[tracing::instrument(level = "trace", skip(self, precision))] fn with_precision(mut self, precision: i32) -> Self { self.precision = Some(precision); self } + #[tracing::instrument(level = "trace", skip(self, scale))] fn with_scale(mut self, scale: i32) -> Self { self.scale = Some(scale); self } + #[tracing::instrument(level = "trace", skip(self, byte_len))] fn with_byte_len(mut self, byte_len: i32) -> Self { self.byte_len = Some(byte_len); self @@ -615,6 +635,7 @@ mod tests { ); } + #[tracing::instrument(level = "trace", skip())] fn gen_row_group_meta_data_for_pruning_predicate() -> Vec { let schema_descr = get_test_schema_descr(vec![ PrimitiveTypeField::new("c1", PhysicalType::INT32), @@ -1017,6 +1038,7 @@ mod tests { ); } + #[tracing::instrument(level = "trace", skip(schema_descr, column_statistics))] fn get_row_group_meta_data( schema_descr: &SchemaDescPtr, column_statistics: Vec, @@ -1039,6 +1061,7 @@ mod tests { .unwrap() } + #[tracing::instrument(level = "trace", skip(fields))] fn get_test_schema_descr(fields: Vec) -> SchemaDescPtr { use parquet::schema::types::Type as SchemaType; let schema_fields = fields @@ -1070,11 +1093,13 @@ mod tests { Arc::new(SchemaDescriptor::new(Arc::new(schema))) } + #[tracing::instrument(level = "trace", skip())] fn parquet_file_metrics() -> ParquetFileMetrics { let metrics = Arc::new(ExecutionPlanMetricsSet::new()); ParquetFileMetrics::new(0, "file.parquet", &metrics) } + #[tracing::instrument(level = "trace", skip(expr, schema))] fn logical2physical(expr: &Expr, schema: &Schema) -> Arc { let df_schema = schema.clone().to_dfschema().unwrap(); let execution_props = ExecutionProps::new(); @@ -1207,6 +1232,7 @@ mod tests { } impl BloomFilterTest { + #[tracing::instrument(level = "trace", skip())] /// Return a test for data_index_bloom_encoding_stats.parquet /// Note the values in the `String` column are: /// ```sql @@ -1240,6 +1266,7 @@ mod tests { } // Return a test for alltypes_plain.parquet + #[tracing::instrument(level = "trace", skip())] fn new_all_types() -> Self { Self { file_name: String::from("alltypes_plain.parquet"), @@ -1253,18 +1280,21 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(self))] /// Expect all row groups to be pruned pub fn with_expect_all_pruned(mut self) -> Self { self.post_pruning_row_groups = Some(vec![]); self } + #[tracing::instrument(level = "trace", skip(self))] /// Expect all row groups not to be pruned pub fn with_expect_none_pruned(mut self) -> Self { self.post_pruning_row_groups = Some(self.row_groups.clone()); self } + #[tracing::instrument(level = "trace", skip(self, expr))] /// Prune this file using the specified expression and check that the expected row groups are left async fn run(self, expr: Expr) { let Self { @@ -1297,6 +1327,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(file_name, data, pruning_predicate, row_groups))] async fn test_row_group_bloom_filter_pruning_predicate( file_name: &str, data: bytes::Bytes, diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs index 0ebf7dfe23842..aee205184bd8a 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs @@ -32,6 +32,7 @@ use std::sync::Arc; // Convert the bytes array to i128. // The endian of the input bytes array must be big-endian. +#[tracing::instrument(level = "trace", skip(b))] pub(crate) fn from_bytes_to_i128(b: &[u8]) -> i128 { // The bytes array are from parquet file and must be the big-endian. // The endian is defined by parquet format, and the reference document @@ -42,6 +43,7 @@ pub(crate) fn from_bytes_to_i128(b: &[u8]) -> i128 { // Copy from arrow-rs // https://github.com/apache/arrow-rs/blob/733b7e7fd1e8c43a404c3ce40ecf741d493c21b4/parquet/src/arrow/buffer/bit_util.rs#L55 // Convert the byte slice to fixed length byte array with the length of 16 +#[tracing::instrument(level = "trace", skip(b))] fn sign_extend_be(b: &[u8]) -> [u8; 16] { assert!(b.len() <= 16, "Array too large, expected less than 16"); let is_negative = (b[0] & 128u8) == 128u8; @@ -154,6 +156,7 @@ macro_rules! get_statistic { }}; } +#[tracing::instrument(level = "trace", skip(parquet_schema, arrow_schema, name))] /// Lookups up the parquet column by name /// /// Returns the parquet column index and the corresponding arrow field @@ -179,6 +182,7 @@ pub(crate) fn parquet_column<'a>( Some((parquet_idx, field)) } +#[tracing::instrument(level = "trace", skip(data_type, iterator))] /// Extracts the min statistics from an iterator of [`ParquetStatistics`] to an [`ArrayRef`] pub(crate) fn min_statistics<'a, I: Iterator>>( data_type: &DataType, @@ -189,6 +193,7 @@ pub(crate) fn min_statistics<'a, I: Iterator>>( data_type: &DataType, @@ -199,6 +204,7 @@ pub(crate) fn max_statistics<'a, I: Iterator>>( data_type: &DataType, @@ -263,6 +269,7 @@ pub struct StatisticsConverter<'a> { } impl<'a> StatisticsConverter<'a> { + #[tracing::instrument(level = "trace", skip(metadata))] /// Returns a [`UInt64Array`] with counts for each row group /// /// The returned array has no nulls, and has one value for each row group. @@ -282,6 +289,7 @@ impl<'a> StatisticsConverter<'a> { Ok(builder.finish()) } + #[tracing::instrument(level = "trace", skip(column_name, statistics_type, arrow_schema))] /// create an new statistics converter pub fn try_new( column_name: &'a str, @@ -304,6 +312,7 @@ impl<'a> StatisticsConverter<'a> { }) } + #[tracing::instrument(level = "trace", skip(self, metadata))] /// extract the statistics from a parquet file, given the parquet file's metadata /// /// The returned array contains 1 value for each row group in the parquet @@ -877,6 +886,7 @@ mod test { } impl Test { + #[tracing::instrument(level = "trace", skip(self))] fn run(self) { let Self { input, @@ -924,6 +934,7 @@ mod test { } } + #[tracing::instrument(level = "trace", skip(schema, batch))] /// Write the specified batches out as parquet and return the metadata fn parquet_metadata(schema: SchemaRef, batch: RecordBatch) -> Arc { let props = WriterProperties::builder() @@ -943,6 +954,7 @@ mod test { /// Formats the statistics nicely for display struct DisplayStats<'a>(&'a [RowGroupMetaData]); impl<'a> std::fmt::Display for DisplayStats<'a> { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let row_groups = self.0; writeln!(f, " row_groups: {}", row_groups.len())?; @@ -970,6 +982,7 @@ mod test { } impl TestFile { + #[tracing::instrument(level = "trace", skip(file_name))] fn new(file_name: &'static str) -> Self { Self { file_name, @@ -977,11 +990,13 @@ mod test { } } + #[tracing::instrument(level = "trace", skip(self, column))] fn with_column(mut self, column: ExpectedColumn) -> Self { self.expected_columns.push(column); self } + #[tracing::instrument(level = "trace", skip(self))] /// Reads the specified parquet file and validates that the exepcted min/max /// values for the specified columns are as expected. fn run(self) { @@ -1013,36 +1028,43 @@ mod test { } } + #[tracing::instrument(level = "trace", skip(input))] fn bool_array(input: impl IntoIterator>) -> ArrayRef { let array: BooleanArray = input.into_iter().collect(); Arc::new(array) } + #[tracing::instrument(level = "trace", skip(input))] fn i32_array(input: impl IntoIterator>) -> ArrayRef { let array: Int32Array = input.into_iter().collect(); Arc::new(array) } + #[tracing::instrument(level = "trace", skip(input))] fn i64_array(input: impl IntoIterator>) -> ArrayRef { let array: Int64Array = input.into_iter().collect(); Arc::new(array) } + #[tracing::instrument(level = "trace", skip(input))] fn f32_array(input: impl IntoIterator>) -> ArrayRef { let array: Float32Array = input.into_iter().collect(); Arc::new(array) } + #[tracing::instrument(level = "trace", skip(input))] fn f64_array(input: impl IntoIterator>) -> ArrayRef { let array: Float64Array = input.into_iter().collect(); Arc::new(array) } + #[tracing::instrument(level = "trace", skip(input))] fn timestamp_array(input: impl IntoIterator>) -> ArrayRef { let array: TimestampNanosecondArray = input.into_iter().collect(); Arc::new(array) } + #[tracing::instrument(level = "trace", skip(input))] fn utf8_array<'a>(input: impl IntoIterator>) -> ArrayRef { let array: StringArray = input .into_iter() @@ -1052,6 +1074,7 @@ mod test { } // returns a struct array with columns "bool_col" and "int_col" with the specified values + #[tracing::instrument(level = "trace", skip(input))] fn struct_array(input: Vec<(Option, Option)>) -> ArrayRef { let boolean: BooleanArray = input.iter().map(|(b, _i)| b).collect(); let int: Int32Array = input.iter().map(|(_b, i)| i).collect(); diff --git a/datafusion/core/src/datasource/physical_plan/statistics.rs b/datafusion/core/src/datasource/physical_plan/statistics.rs index e1c61ec1a7129..2ba38000a7e46 100644 --- a/datafusion/core/src/datasource/physical_plan/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/statistics.rs @@ -47,23 +47,27 @@ pub(crate) struct MinMaxStatistics { } impl MinMaxStatistics { + #[tracing::instrument(level = "trace", skip(self))] /// Sort order used to sort the statistics #[allow(unused)] pub fn sort_order(&self) -> &[PhysicalSortExpr] { &self.sort_order } + #[tracing::instrument(level = "trace", skip(self, idx))] /// Min value at index #[allow(unused)] pub fn min(&self, idx: usize) -> Row { self.min_by_sort_order.row(idx) } + #[tracing::instrument(level = "trace", skip(self, idx))] /// Max value at index pub fn max(&self, idx: usize) -> Row { self.max_by_sort_order.row(idx) } + #[tracing::instrument(level = "trace", skip(projected_sort_order, projected_schema, projection, files))] pub fn new_from_files<'a>( projected_sort_order: &[PhysicalSortExpr], // Sort order with respect to projected schema projected_schema: &SchemaRef, // Projected schema @@ -165,6 +169,7 @@ impl MinMaxStatistics { ) } + #[tracing::instrument(level = "trace", skip(sort_order, schema, min_values, max_values))] pub fn new( sort_order: &[PhysicalSortExpr], schema: &SchemaRef, @@ -260,6 +265,7 @@ impl MinMaxStatistics { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Return a sorted list of the min statistics together with the original indices pub fn min_values_sorted(&self) -> Vec<(usize, Row<'_>)> { let mut sort: Vec<_> = self.min_by_sort_order.iter().enumerate().collect(); @@ -267,6 +273,7 @@ impl MinMaxStatistics { sort } + #[tracing::instrument(level = "trace", skip(self))] /// Check if the min/max statistics are in order and non-overlapping pub fn is_sorted(&self) -> bool { self.max_by_sort_order @@ -276,6 +283,7 @@ impl MinMaxStatistics { } } +#[tracing::instrument(level = "trace", skip(sort_order))] fn sort_columns_from_physical_sort_exprs( sort_order: &[PhysicalSortExpr], ) -> Option> { diff --git a/datafusion/core/src/datasource/provider.rs b/datafusion/core/src/datasource/provider.rs index 7c58aded31081..7d2eecbb189b9 100644 --- a/datafusion/core/src/datasource/provider.rs +++ b/datafusion/core/src/datasource/provider.rs @@ -308,6 +308,7 @@ pub struct DefaultTableFactory { } impl DefaultTableFactory { + #[tracing::instrument(level = "trace", skip())] /// Creates a new [`DefaultTableFactory`] pub fn new() -> Self { Self::default() @@ -316,6 +317,7 @@ impl DefaultTableFactory { #[async_trait] impl TableProviderFactory for DefaultTableFactory { + #[tracing::instrument(level = "trace", skip(self, state, cmd))] async fn create( &self, state: &SessionState, diff --git a/datafusion/core/src/datasource/statistics.rs b/datafusion/core/src/datasource/statistics.rs index c67227f966a2e..9c19d5ca3da83 100644 --- a/datafusion/core/src/datasource/statistics.rs +++ b/datafusion/core/src/datasource/statistics.rs @@ -28,6 +28,7 @@ use futures::{Stream, StreamExt}; use itertools::izip; use itertools::multiunzip; +#[tracing::instrument(level = "trace", skip(all_files, file_schema, limit, collect_stats))] /// Get all files as well as the file level summary statistics (no statistic for partition columns). /// If the optional `limit` is provided, includes only sufficient files. Needed to read up to /// `limit` number of rows. `collect_stats` is passed down from the configuration parameter on @@ -150,6 +151,7 @@ pub async fn get_statistics_with_limit( Ok((result_files, statistics)) } +#[tracing::instrument(level = "trace", skip(schema))] pub(crate) fn create_max_min_accs( schema: &Schema, ) -> (Vec>, Vec>) { @@ -166,6 +168,7 @@ pub(crate) fn create_max_min_accs( (max_values, min_values) } +#[tracing::instrument(level = "trace", skip(file_num_rows, num_rows))] fn add_row_stats( file_num_rows: Precision, num_rows: Precision, @@ -177,6 +180,7 @@ fn add_row_stats( } } +#[tracing::instrument(level = "trace", skip(null_counts, max_values, min_values))] pub(crate) fn get_col_stats_vec( null_counts: Vec>, max_values: Vec>, @@ -192,6 +196,7 @@ pub(crate) fn get_col_stats_vec( .collect() } +#[tracing::instrument(level = "trace", skip(schema, null_counts, max_values, min_values))] pub(crate) fn get_col_stats( schema: &Schema, null_counts: Vec>, @@ -218,6 +223,7 @@ pub(crate) fn get_col_stats( .collect() } +#[tracing::instrument(level = "trace", skip(max_nominee, max_values))] /// If the given value is numerically greater than the original maximum value, /// return the new maximum value with appropriate exactness information. fn set_max_if_greater( @@ -241,6 +247,7 @@ fn set_max_if_greater( } } +#[tracing::instrument(level = "trace", skip(min_nominee, min_values))] /// If the given value is numerically lesser than the original minimum value, /// return the new minimum value with appropriate exactness information. fn set_min_if_lesser( diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs index bcce3c1b64226..718995bff9d02 100644 --- a/datafusion/core/src/datasource/stream.rs +++ b/datafusion/core/src/datasource/stream.rs @@ -50,6 +50,7 @@ pub struct StreamTableFactory {} #[async_trait] impl TableProviderFactory for StreamTableFactory { + #[tracing::instrument(level = "trace", skip(self, state, cmd))] async fn create( &self, state: &SessionState, @@ -94,6 +95,7 @@ pub enum StreamEncoding { impl FromStr for StreamEncoding { type Err = DataFusionError; + #[tracing::instrument(level = "trace", skip(s))] fn from_str(s: &str) -> std::result::Result { match s.to_ascii_lowercase().as_str() { "csv" => Ok(Self::Csv), @@ -116,6 +118,7 @@ pub struct StreamConfig { } impl StreamConfig { + #[tracing::instrument(level = "trace", skip(schema, location))] /// Stream data from the file at `location` /// /// * Data will be read sequentially from the provided `location` @@ -135,36 +138,42 @@ impl StreamConfig { } } + #[tracing::instrument(level = "trace", skip(self, order))] /// Specify a sort order for the stream pub fn with_order(mut self, order: Vec>) -> Self { self.order = order; self } + #[tracing::instrument(level = "trace", skip(self, batch_size))] /// Specify the batch size pub fn with_batch_size(mut self, batch_size: usize) -> Self { self.batch_size = batch_size; self } + #[tracing::instrument(level = "trace", skip(self, header))] /// Specify whether the file has a header (only applicable for [`StreamEncoding::Csv`]) pub fn with_header(mut self, header: bool) -> Self { self.header = header; self } + #[tracing::instrument(level = "trace", skip(self, encoding))] /// Specify an encoding for the stream pub fn with_encoding(mut self, encoding: StreamEncoding) -> Self { self.encoding = encoding; self } + #[tracing::instrument(level = "trace", skip(self, constraints))] /// Assign constraints pub fn with_constraints(mut self, constraints: Constraints) -> Self { self.constraints = constraints; self } + #[tracing::instrument(level = "trace", skip(self))] fn reader(&self) -> Result> { let file = File::open(&self.location)?; let schema = self.schema.clone(); @@ -187,6 +196,7 @@ impl StreamConfig { } } + #[tracing::instrument(level = "trace", skip(self))] fn writer(&self) -> Result> { match &self.encoding { StreamEncoding::Csv => { @@ -225,6 +235,7 @@ impl StreamConfig { pub struct StreamTable(Arc); impl StreamTable { + #[tracing::instrument(level = "trace", skip(config))] /// Create a new [`StreamTable`] for the given [`StreamConfig`] pub fn new(config: Arc) -> Self { Self(config) @@ -233,22 +244,27 @@ impl StreamTable { #[async_trait] impl TableProvider for StreamTable { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.0.schema.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn constraints(&self) -> Option<&Constraints> { Some(&self.0.constraints) } + #[tracing::instrument(level = "trace", skip(self))] fn table_type(&self) -> TableType { TableType::Base } + #[tracing::instrument(level = "trace", skip(self, _state, projection, _filters, limit))] async fn scan( &self, _state: &SessionState, @@ -274,6 +290,7 @@ impl TableProvider for StreamTable { )?)) } + #[tracing::instrument(level = "trace", skip(self, _state, input, _overwrite))] async fn insert_into( &self, _state: &SessionState, @@ -302,10 +319,12 @@ impl TableProvider for StreamTable { struct StreamRead(Arc); impl PartitionStream for StreamRead { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> &SchemaRef { &self.0.schema } + #[tracing::instrument(level = "trace", skip(self, _ctx))] fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { let config = self.0.clone(); let schema = self.0.schema.clone(); @@ -328,6 +347,7 @@ impl PartitionStream for StreamRead { struct StreamWrite(Arc); impl DisplayAs for StreamWrite { + #[tracing::instrument(level = "trace", skip(self, _t, f))] fn fmt_as(&self, _t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result { f.debug_struct("StreamWrite") .field("location", &self.0.location) @@ -340,14 +360,17 @@ impl DisplayAs for StreamWrite { #[async_trait] impl DataSink for StreamWrite { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { None } + #[tracing::instrument(level = "trace", skip(self, data, _context))] async fn write_all( &self, mut data: SendableRecordBatchStream, diff --git a/datafusion/core/src/datasource/streaming.rs b/datafusion/core/src/datasource/streaming.rs index 0ba6f85ec3e2b..5250b6c95d93c 100644 --- a/datafusion/core/src/datasource/streaming.rs +++ b/datafusion/core/src/datasource/streaming.rs @@ -40,6 +40,7 @@ pub struct StreamingTable { } impl StreamingTable { + #[tracing::instrument(level = "trace", skip(schema, partitions))] /// Try to create a new [`StreamingTable`] returning an error if the schema is incorrect pub fn try_new( schema: SchemaRef, @@ -62,6 +63,7 @@ impl StreamingTable { infinite: false, }) } + #[tracing::instrument(level = "trace", skip(self, infinite))] /// Sets streaming table can be infinite. pub fn with_infinite_table(mut self, infinite: bool) -> Self { self.infinite = infinite; @@ -71,18 +73,22 @@ impl StreamingTable { #[async_trait] impl TableProvider for StreamingTable { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn table_type(&self) -> TableType { TableType::View } + #[tracing::instrument(level = "trace", skip(self, _state, projection, _filters, limit))] async fn scan( &self, _state: &SessionState, diff --git a/datafusion/core/src/datasource/view.rs b/datafusion/core/src/datasource/view.rs index 3f024a6b4cb71..ffa6770e52c39 100644 --- a/datafusion/core/src/datasource/view.rs +++ b/datafusion/core/src/datasource/view.rs @@ -44,6 +44,7 @@ pub struct ViewTable { } impl ViewTable { + #[tracing::instrument(level = "trace", skip(logical_plan, definition))] /// Create new view that is executed at query runtime. /// Takes a `LogicalPlan` and an optional create statement as input. pub fn try_new( @@ -61,11 +62,13 @@ impl ViewTable { Ok(view) } + #[tracing::instrument(level = "trace", skip(self))] /// Get definition ref pub fn definition(&self) -> Option<&String> { self.definition.as_ref() } + #[tracing::instrument(level = "trace", skip(self))] /// Get logical_plan ref pub fn logical_plan(&self) -> &LogicalPlan { &self.logical_plan @@ -74,25 +77,31 @@ impl ViewTable { #[async_trait] impl TableProvider for ViewTable { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn get_logical_plan(&self) -> Option<&LogicalPlan> { Some(&self.logical_plan) } + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { Arc::clone(&self.table_schema) } + #[tracing::instrument(level = "trace", skip(self))] fn table_type(&self) -> TableType { TableType::View } + #[tracing::instrument(level = "trace", skip(self))] fn get_table_definition(&self) -> Option<&str> { self.definition.as_deref() } + #[tracing::instrument(level = "trace", skip(self, filters))] fn supports_filters_pushdown( &self, filters: &[&Expr], @@ -101,6 +110,7 @@ impl TableProvider for ViewTable { Ok(vec![TableProviderFilterPushDown::Exact; filters.len()]) } + #[tracing::instrument(level = "trace", skip(self, state, projection, filters, limit))] async fn scan( &self, state: &SessionState, diff --git a/datafusion/core/src/execution/context/avro.rs b/datafusion/core/src/execution/context/avro.rs index e829f6123eab4..257614a522079 100644 --- a/datafusion/core/src/execution/context/avro.rs +++ b/datafusion/core/src/execution/context/avro.rs @@ -21,6 +21,7 @@ use super::super::options::{AvroReadOptions, ReadOptions}; use super::{DataFilePaths, DataFrame, Result, SessionContext}; impl SessionContext { + #[tracing::instrument(level = "trace", skip(self, table_paths, options))] /// Creates a [`DataFrame`] for reading an Avro data source. /// /// For more control such as reading multiple files, you can use @@ -35,6 +36,7 @@ impl SessionContext { self._read_type(table_paths, options).await } + #[tracing::instrument(level = "trace", skip(self, name, table_path, options))] /// Registers an Avro file as a table that can be referenced from /// SQL statements executed against this context. pub async fn register_avro( diff --git a/datafusion/core/src/execution/context/csv.rs b/datafusion/core/src/execution/context/csv.rs index 6ba1a14600cb0..5039f02f96271 100644 --- a/datafusion/core/src/execution/context/csv.rs +++ b/datafusion/core/src/execution/context/csv.rs @@ -23,6 +23,7 @@ use super::super::options::{CsvReadOptions, ReadOptions}; use super::{DataFilePaths, DataFrame, ExecutionPlan, Result, SessionContext}; impl SessionContext { + #[tracing::instrument(level = "trace", skip(self, table_paths, options))] /// Creates a [`DataFrame`] for reading a CSV data source. /// /// For more control such as reading multiple files, you can use @@ -51,6 +52,7 @@ impl SessionContext { self._read_type(table_paths, options).await } + #[tracing::instrument(level = "trace", skip(self, name, table_path, options))] /// Registers a CSV file as a table which can referenced from SQL /// statements executed against this context. pub async fn register_csv( @@ -74,6 +76,7 @@ impl SessionContext { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, plan, path))] /// Executes a query and writes the results to a partitioned CSV file. pub async fn write_csv( &self, diff --git a/datafusion/core/src/execution/context/json.rs b/datafusion/core/src/execution/context/json.rs index c21e32cfdefbf..525dbf2c34340 100644 --- a/datafusion/core/src/execution/context/json.rs +++ b/datafusion/core/src/execution/context/json.rs @@ -23,6 +23,7 @@ use super::super::options::{NdJsonReadOptions, ReadOptions}; use super::{DataFilePaths, DataFrame, ExecutionPlan, Result, SessionContext}; impl SessionContext { + #[tracing::instrument(level = "trace", skip(self, table_paths, options))] /// Creates a [`DataFrame`] for reading an JSON data source. /// /// For more control such as reading multiple files, you can use @@ -37,6 +38,7 @@ impl SessionContext { self._read_type(table_paths, options).await } + #[tracing::instrument(level = "trace", skip(self, name, table_path, options))] /// Registers a JSON file as a table that it can be referenced /// from SQL statements executed against this context. pub async fn register_json( @@ -59,6 +61,7 @@ impl SessionContext { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, plan, path))] /// Executes a query and writes the results to a partitioned JSON file. pub async fn write_json( &self, diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index abd704f5335e6..9def498ed4987 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -114,18 +114,21 @@ pub trait DataFilePaths { } impl DataFilePaths for &str { + #[tracing::instrument(level = "trace", skip(self))] fn to_urls(self) -> Result> { Ok(vec![ListingTableUrl::parse(self)?]) } } impl DataFilePaths for String { + #[tracing::instrument(level = "trace", skip(self))] fn to_urls(self) -> Result> { Ok(vec![ListingTableUrl::parse(self)?]) } } impl DataFilePaths for &String { + #[tracing::instrument(level = "trace", skip(self))] fn to_urls(self) -> Result> { Ok(vec![ListingTableUrl::parse(self)?]) } @@ -135,6 +138,7 @@ impl

DataFilePaths for Vec

where P: AsRef, { + #[tracing::instrument(level = "trace", skip(self))] fn to_urls(self) -> Result> { self.iter() .map(ListingTableUrl::parse) @@ -253,17 +257,20 @@ pub struct SessionContext { } impl Default for SessionContext { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl SessionContext { + #[tracing::instrument(level = "trace", skip())] /// Creates a new `SessionContext` using the default [`SessionConfig`]. pub fn new() -> Self { Self::new_with_config(SessionConfig::new()) } + #[tracing::instrument(level = "trace", skip(self))] /// Finds any [`ListingSchemaProvider`]s and instructs them to reload tables from "disk" pub async fn refresh_catalogs(&self) -> Result<()> { let cat_names = self.catalog_names().clone(); @@ -284,6 +291,7 @@ impl SessionContext { Ok(()) } + #[tracing::instrument(level = "trace", skip(config))] /// Creates a new `SessionContext` using the provided /// [`SessionConfig`] and a new [`RuntimeEnv`]. /// @@ -294,6 +302,7 @@ impl SessionContext { Self::new_with_config_rt(config, runtime) } + #[tracing::instrument(level = "trace", skip(config))] /// Creates a new `SessionContext` using the provided /// [`SessionConfig`] and a new [`RuntimeEnv`]. #[deprecated(since = "32.0.0", note = "Use SessionContext::new_with_config")] @@ -301,6 +310,7 @@ impl SessionContext { Self::new_with_config(config) } + #[tracing::instrument(level = "trace", skip(config, runtime))] /// Creates a new `SessionContext` using the provided /// [`SessionConfig`] and a [`RuntimeEnv`]. /// @@ -319,6 +329,7 @@ impl SessionContext { Self::new_with_state(state) } + #[tracing::instrument(level = "trace", skip(config, runtime))] /// Creates a new `SessionContext` using the provided /// [`SessionConfig`] and a [`RuntimeEnv`]. #[deprecated(since = "32.0.0", note = "Use SessionState::new_with_config_rt")] @@ -326,6 +337,7 @@ impl SessionContext { Self::new_with_config_rt(config, runtime) } + #[tracing::instrument(level = "trace", skip(state))] /// Creates a new `SessionContext` using the provided [`SessionState`] pub fn new_with_state(state: SessionState) -> Self { Self { @@ -335,16 +347,19 @@ impl SessionContext { } } + #[tracing::instrument(level = "trace", skip(state))] /// Creates a new `SessionContext` using the provided [`SessionState`] #[deprecated(since = "32.0.0", note = "Use SessionState::new_with_state")] pub fn with_state(state: SessionState) -> Self { Self::new_with_state(state) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the time this `SessionContext` was created pub fn session_start_time(&self) -> DateTime { self.session_start_time } + #[tracing::instrument(level = "trace", skip(self, function_factory))] /// Registers a [`FunctionFactory`] to handle `CREATE FUNCTION` statements pub fn with_function_factory( self, @@ -354,6 +369,7 @@ impl SessionContext { self } + #[tracing::instrument(level = "trace", skip(self, table_name, batch))] /// Registers the [`RecordBatch`] as the specified table name pub fn register_batch( &self, @@ -369,16 +385,19 @@ impl SessionContext { ) } + #[tracing::instrument(level = "trace", skip(self))] /// Return the [RuntimeEnv] used to run queries with this `SessionContext` pub fn runtime_env(&self) -> Arc { self.state.read().runtime_env.clone() } + #[tracing::instrument(level = "trace", skip(self))] /// Returns an id that uniquely identifies this `SessionContext`. pub fn session_id(&self) -> String { self.session_id.clone() } + #[tracing::instrument(level = "trace", skip(self, file_type))] /// Return the [`TableProviderFactory`] that is registered for the /// specified file type, if any. pub fn table_factory( @@ -388,6 +407,7 @@ impl SessionContext { self.state.read().table_factories().get(file_type).cloned() } + #[tracing::instrument(level = "trace", skip(self))] /// Return the `enable_ident_normalization` of this Session pub fn enable_ident_normalization(&self) -> bool { self.state @@ -398,16 +418,19 @@ impl SessionContext { .enable_ident_normalization } + #[tracing::instrument(level = "trace", skip(self))] /// Return a copied version of config for this Session pub fn copied_config(&self) -> SessionConfig { self.state.read().config.clone() } + #[tracing::instrument(level = "trace", skip(self))] /// Return a copied version of table options for this Session pub fn copied_table_options(&self) -> TableOptions { self.state.read().default_table_options() } + #[tracing::instrument(level = "trace", skip(self, sql))] /// Creates a [`DataFrame`] from SQL query text. /// /// Note: This API implements DDL statements such as `CREATE TABLE` and @@ -439,6 +462,7 @@ impl SessionContext { self.sql_with_options(sql, SQLOptions::new()).await } + #[tracing::instrument(level = "trace", skip(self, sql, options))] /// Creates a [`DataFrame`] from SQL query text, first validating /// that the queries are allowed by `options` /// @@ -476,6 +500,7 @@ impl SessionContext { self.execute_logical_plan(plan).await } + #[tracing::instrument(level = "trace", skip(self, plan))] /// Execute the [`LogicalPlan`], return a [`DataFrame`]. This API /// is not featured limited (so all SQL such as `CREATE TABLE` and /// `COPY` will be run). @@ -525,6 +550,7 @@ impl SessionContext { } } + #[tracing::instrument(level = "trace", skip(self, expr, df_schema))] /// Create a [`PhysicalExpr`] from an [`Expr`] after applying type /// coercion and function rewrites. /// @@ -561,11 +587,13 @@ impl SessionContext { } // return an empty dataframe + #[tracing::instrument(level = "trace", skip(self))] fn return_empty_dataframe(&self) -> Result { let plan = LogicalPlanBuilder::empty(false).build()?; Ok(DataFrame::new(self.state(), plan)) } + #[tracing::instrument(level = "trace", skip(self, cmd))] async fn create_external_table( &self, cmd: &CreateExternalTable, @@ -586,6 +614,7 @@ impl SessionContext { self.return_empty_dataframe() } + #[tracing::instrument(level = "trace", skip(self, cmd))] async fn create_memory_table(&self, cmd: CreateMemoryTable) -> Result { let CreateMemoryTable { name, @@ -640,6 +669,7 @@ impl SessionContext { } } + #[tracing::instrument(level = "trace", skip(self, cmd))] async fn create_view(&self, cmd: CreateView) -> Result { let CreateView { name, @@ -668,6 +698,7 @@ impl SessionContext { } } + #[tracing::instrument(level = "trace", skip(self, cmd))] async fn create_catalog_schema(&self, cmd: CreateCatalogSchema) -> Result { let CreateCatalogSchema { schema_name, @@ -711,6 +742,7 @@ impl SessionContext { } } + #[tracing::instrument(level = "trace", skip(self, cmd))] async fn create_catalog(&self, cmd: CreateCatalog) -> Result { let CreateCatalog { catalog_name, @@ -733,6 +765,7 @@ impl SessionContext { } } + #[tracing::instrument(level = "trace", skip(self, cmd))] async fn drop_table(&self, cmd: DropTable) -> Result { let DropTable { name, if_exists, .. @@ -747,6 +780,7 @@ impl SessionContext { } } + #[tracing::instrument(level = "trace", skip(self, cmd))] async fn drop_view(&self, cmd: DropView) -> Result { let DropView { name, if_exists, .. @@ -761,6 +795,7 @@ impl SessionContext { } } + #[tracing::instrument(level = "trace", skip(self, cmd))] async fn drop_schema(&self, cmd: DropCatalogSchema) -> Result { let DropCatalogSchema { name, @@ -792,10 +827,12 @@ impl SessionContext { } } + #[tracing::instrument(level = "trace", skip(self, schemaref))] fn schema_doesnt_exist_err(&self, schemaref: SchemaReference) -> Result { exec_err!("Schema '{schemaref}' doesn't exist.") } + #[tracing::instrument(level = "trace", skip(self, stmt))] async fn set_variable(&self, stmt: SetVariable) -> Result { let SetVariable { variable, value, .. @@ -808,6 +845,7 @@ impl SessionContext { self.return_empty_dataframe() } + #[tracing::instrument(level = "trace", skip(self, cmd))] async fn create_custom_table( &self, cmd: &CreateExternalTable, @@ -828,6 +866,7 @@ impl SessionContext { Ok(table) } + #[tracing::instrument(level = "trace", skip(self, table_ref, table_type))] async fn find_and_deregister<'a>( &self, table_ref: impl Into, @@ -856,6 +895,7 @@ impl SessionContext { Ok(false) } + #[tracing::instrument(level = "trace", skip(self, stmt))] async fn create_function(&self, stmt: CreateFunction) -> Result { let function = { let state = self.state.read().clone(); @@ -885,6 +925,7 @@ impl SessionContext { self.return_empty_dataframe() } + #[tracing::instrument(level = "trace", skip(self, stmt))] async fn drop_function(&self, stmt: DropFunction) -> Result { // we don't know function type at this point // decision has been made to drop all functions @@ -905,6 +946,7 @@ impl SessionContext { } } + #[tracing::instrument(level = "trace", skip(self, variable_type, provider))] /// Registers a variable provider within this context. pub fn register_variable( &self, @@ -917,6 +959,7 @@ impl SessionContext { .add_var_provider(variable_type, provider); } + #[tracing::instrument(level = "trace", skip(self, name, fun))] /// Register a table UDF with this context pub fn register_udtf(&self, name: &str, fun: Arc) { self.state.write().table_functions.insert( @@ -925,6 +968,7 @@ impl SessionContext { ); } + #[tracing::instrument(level = "trace", skip(self, f))] /// Registers a scalar UDF within this context. /// /// Note in SQL queries, function names are looked up using @@ -938,6 +982,7 @@ impl SessionContext { state.register_udf(Arc::new(f)).ok(); } + #[tracing::instrument(level = "trace", skip(self, f))] /// Registers an aggregate UDF within this context. /// /// Note in SQL queries, aggregate names are looked up using @@ -949,6 +994,7 @@ impl SessionContext { self.state.write().register_udaf(Arc::new(f)).ok(); } + #[tracing::instrument(level = "trace", skip(self, f))] /// Registers a window UDF within this context. /// /// Note in SQL queries, window function names are looked up using @@ -960,21 +1006,25 @@ impl SessionContext { self.state.write().register_udwf(Arc::new(f)).ok(); } + #[tracing::instrument(level = "trace", skip(self, name))] /// Deregisters a UDF within this context. pub fn deregister_udf(&self, name: &str) { self.state.write().deregister_udf(name).ok(); } + #[tracing::instrument(level = "trace", skip(self, name))] /// Deregisters a UDAF within this context. pub fn deregister_udaf(&self, name: &str) { self.state.write().deregister_udaf(name).ok(); } + #[tracing::instrument(level = "trace", skip(self, name))] /// Deregisters a UDWF within this context. pub fn deregister_udwf(&self, name: &str) { self.state.write().deregister_udwf(name).ok(); } + #[tracing::instrument(level = "trace", skip(self, table_paths, options))] /// Creates a [`DataFrame`] for reading a data source. /// /// For more control such as reading multiple files, you can use @@ -1017,6 +1067,7 @@ impl SessionContext { self.read_table(Arc::new(provider)) } + #[tracing::instrument(level = "trace", skip(self, table_paths, options))] /// Creates a [`DataFrame`] for reading an Arrow data source. /// /// For more control such as reading multiple files, you can use @@ -1031,6 +1082,7 @@ impl SessionContext { self._read_type(table_paths, options).await } + #[tracing::instrument(level = "trace", skip(self))] /// Creates an empty DataFrame. pub fn read_empty(&self) -> Result { Ok(DataFrame::new( @@ -1039,6 +1091,7 @@ impl SessionContext { )) } + #[tracing::instrument(level = "trace", skip(self, provider))] /// Creates a [`DataFrame`] for a [`TableProvider`] such as a /// [`ListingTable`] or a custom user defined provider. pub fn read_table(&self, provider: Arc) -> Result { @@ -1049,6 +1102,7 @@ impl SessionContext { )) } + #[tracing::instrument(level = "trace", skip(self, batch))] /// Creates a [`DataFrame`] for reading a [`RecordBatch`] pub fn read_batch(&self, batch: RecordBatch) -> Result { let provider = MemTable::try_new(batch.schema(), vec![vec![batch]])?; @@ -1062,6 +1116,7 @@ impl SessionContext { .build()?, )) } + #[tracing::instrument(level = "trace", skip(self, batches))] /// Create a [`DataFrame`] for reading a [`Vec[`RecordBatch`]`] pub fn read_batches( &self, @@ -1085,6 +1140,7 @@ impl SessionContext { .build()?, )) } + #[tracing::instrument(level = "trace", skip(self, name, table_path, options, provided_schema, sql_definition))] /// Registers a [`ListingTable`] that can assemble multiple files /// from locations in an [`ObjectStore`] instance into a single /// table. @@ -1116,6 +1172,7 @@ impl SessionContext { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, name, table_path, options))] /// Registers an Arrow file as a table that can be referenced from /// SQL statements executed against this context. pub async fn register_arrow( @@ -1138,6 +1195,7 @@ impl SessionContext { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, name, catalog))] /// Registers a named catalog using a custom `CatalogProvider` so that /// it can be referenced from SQL statements executed against this /// context. @@ -1156,16 +1214,19 @@ impl SessionContext { .register_catalog(name, catalog) } + #[tracing::instrument(level = "trace", skip(self))] /// Retrieves the list of available catalog names. pub fn catalog_names(&self) -> Vec { self.state.read().catalog_list.catalog_names() } + #[tracing::instrument(level = "trace", skip(self, name))] /// Retrieves a [`CatalogProvider`] instance by name pub fn catalog(&self, name: &str) -> Option> { self.state.read().catalog_list.catalog(name) } + #[tracing::instrument(level = "trace", skip(self, table_ref, provider))] /// Registers a [`TableProvider`] as a table that can be /// referenced from SQL statements executed against this context. /// @@ -1184,6 +1245,7 @@ impl SessionContext { .register_table(table, provider) } + #[tracing::instrument(level = "trace", skip(self, table_ref))] /// Deregisters the given table. /// /// Returns the registered provider, if any @@ -1199,6 +1261,7 @@ impl SessionContext { .deregister_table(&table) } + #[tracing::instrument(level = "trace", skip(self, table_ref))] /// Return `true` if the specified table exists in the schema provider. pub fn table_exist(&self, table_ref: impl Into) -> Result { let table_ref: TableReference = table_ref.into(); @@ -1211,6 +1274,7 @@ impl SessionContext { .table_exist(table)) } + #[tracing::instrument(level = "trace", skip(self, table_ref))] /// Retrieves a [`DataFrame`] representing a table previously /// registered by calling the [`register_table`] function. /// @@ -1233,6 +1297,7 @@ impl SessionContext { Ok(DataFrame::new(self.state(), plan)) } + #[tracing::instrument(level = "trace", skip(self, table_ref))] /// Return a [`TableProvider`] for the specified table. pub async fn table_provider<'a>( &self, @@ -1247,11 +1312,13 @@ impl SessionContext { } } + #[tracing::instrument(level = "trace", skip(self))] /// Get a new TaskContext to run in this session pub fn task_ctx(&self) -> Arc { Arc::new(TaskContext::from(self)) } + #[tracing::instrument(level = "trace", skip(self))] /// Snapshots the [`SessionState`] of this [`SessionContext`] setting the /// `query_execution_start_time` to the current time pub fn state(&self) -> SessionState { @@ -1260,16 +1327,19 @@ impl SessionContext { state } + #[tracing::instrument(level = "trace", skip(self))] /// Get weak reference to [`SessionState`] pub fn state_weak_ref(&self) -> Weak> { Arc::downgrade(&self.state) } + #[tracing::instrument(level = "trace", skip(self, catalog_list))] /// Register [`CatalogProviderList`] in [`SessionState`] pub fn register_catalog_list(&mut self, catalog_list: Arc) { self.state.write().catalog_list = catalog_list; } + #[tracing::instrument(level = "trace", skip(self, extension))] /// Registers a [`ConfigExtension`] as a table option extention that can be /// referenced from SQL statements executed against this context. pub fn register_table_options_extension(&self, extension: T) { @@ -1282,34 +1352,42 @@ impl SessionContext { } impl FunctionRegistry for SessionContext { + #[tracing::instrument(level = "trace", skip(self))] fn udfs(&self) -> HashSet { self.state.read().udfs() } + #[tracing::instrument(level = "trace", skip(self, name))] fn udf(&self, name: &str) -> Result> { self.state.read().udf(name) } + #[tracing::instrument(level = "trace", skip(self, name))] fn udaf(&self, name: &str) -> Result> { self.state.read().udaf(name) } + #[tracing::instrument(level = "trace", skip(self, name))] fn udwf(&self, name: &str) -> Result> { self.state.read().udwf(name) } + #[tracing::instrument(level = "trace", skip(self, udf))] fn register_udf(&mut self, udf: Arc) -> Result>> { self.state.write().register_udf(udf) } + #[tracing::instrument(level = "trace", skip(self, udaf))] fn register_udaf( &mut self, udaf: Arc, ) -> Result>> { self.state.write().register_udaf(udaf) } + #[tracing::instrument(level = "trace", skip(self, udwf))] fn register_udwf(&mut self, udwf: Arc) -> Result>> { self.state.write().register_udwf(udwf) } + #[tracing::instrument(level = "trace", skip(self, rewrite))] fn register_function_rewrite( &mut self, rewrite: Arc, @@ -1334,6 +1412,7 @@ struct DefaultQueryPlanner {} #[async_trait] impl QueryPlanner for DefaultQueryPlanner { + #[tracing::instrument(level = "trace", skip(self, logical_plan, session_state))] /// Given a `LogicalPlan`, create an [`ExecutionPlan`] suitable for execution async fn create_physical_plan( &self, @@ -1426,6 +1505,7 @@ pub struct SessionState { } impl Debug for SessionState { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("SessionState") .field("session_id", &self.session_id) @@ -1435,6 +1515,7 @@ impl Debug for SessionState { } impl SessionState { + #[tracing::instrument(level = "trace", skip(config, runtime))] /// Returns new [`SessionState`] using the provided /// [`SessionConfig`] and [`RuntimeEnv`]. pub fn new_with_config_rt(config: SessionConfig, runtime: Arc) -> Self { @@ -1443,6 +1524,7 @@ impl SessionState { Self::new_with_config_rt_and_catalog_list(config, runtime, catalog_list) } + #[tracing::instrument(level = "trace", skip(config, runtime))] /// Returns new [`SessionState`] using the provided /// [`SessionConfig`] and [`RuntimeEnv`]. #[deprecated(since = "32.0.0", note = "Use SessionState::new_with_config_rt")] @@ -1450,6 +1532,7 @@ impl SessionState { Self::new_with_config_rt(config, runtime) } + #[tracing::instrument(level = "trace", skip(config, runtime, catalog_list))] /// Returns new [`SessionState`] using the provided /// [`SessionConfig`], [`RuntimeEnv`], and [`CatalogProviderList`] pub fn new_with_config_rt_and_catalog_list( @@ -1529,6 +1612,7 @@ impl SessionState { new_self } + #[tracing::instrument(level = "trace", skip(config, runtime, catalog_list))] /// Returns new [`SessionState`] using the provided /// [`SessionConfig`] and [`RuntimeEnv`]. #[deprecated( @@ -1542,6 +1626,7 @@ impl SessionState { ) -> Self { Self::new_with_config_rt_and_catalog_list(config, runtime, catalog_list) } + #[tracing::instrument(level = "trace", skip(config, table_factories, runtime, default_catalog))] fn register_default_schema( config: &SessionConfig, table_factories: &HashMap>, @@ -1581,6 +1666,7 @@ impl SessionState { .expect("Failed to register default schema"); } + #[tracing::instrument(level = "trace", skip(self, table_ref))] fn resolve_table_ref( &self, table_ref: impl Into, @@ -1591,6 +1677,7 @@ impl SessionState { .resolve(&catalog.default_catalog, &catalog.default_schema) } + #[tracing::instrument(level = "trace", skip(self, table_ref))] pub(crate) fn schema_for_ref( &self, table_ref: impl Into, @@ -1617,12 +1704,14 @@ impl SessionState { }) } + #[tracing::instrument(level = "trace", skip(self, session_id))] /// Replace the random session id. pub fn with_session_id(mut self, session_id: String) -> Self { self.session_id = session_id; self } + #[tracing::instrument(level = "trace", skip(self, query_planner))] /// override default query planner with `query_planner` pub fn with_query_planner( mut self, @@ -1632,6 +1721,7 @@ impl SessionState { self } + #[tracing::instrument(level = "trace", skip(self, rules))] /// Override the [`AnalyzerRule`]s optimizer plan rules. pub fn with_analyzer_rules( mut self, @@ -1641,6 +1731,7 @@ impl SessionState { self } + #[tracing::instrument(level = "trace", skip(self, rules))] /// Replace the entire list of [`OptimizerRule`]s used to optimize plans pub fn with_optimizer_rules( mut self, @@ -1650,6 +1741,7 @@ impl SessionState { self } + #[tracing::instrument(level = "trace", skip(self, physical_optimizers))] /// Replace the entire list of [`PhysicalOptimizerRule`]s used to optimize plans pub fn with_physical_optimizer_rules( mut self, @@ -1659,6 +1751,7 @@ impl SessionState { self } + #[tracing::instrument(level = "trace", skip(self, analyzer_rule))] /// Add `analyzer_rule` to the end of the list of /// [`AnalyzerRule`]s used to rewrite queries. pub fn add_analyzer_rule( @@ -1669,6 +1762,7 @@ impl SessionState { self } + #[tracing::instrument(level = "trace", skip(self, optimizer_rule))] /// Add `optimizer_rule` to the end of the list of /// [`OptimizerRule`]s used to rewrite queries. pub fn add_optimizer_rule( @@ -1679,6 +1773,7 @@ impl SessionState { self } + #[tracing::instrument(level = "trace", skip(self, physical_optimizer_rule))] /// Add `physical_optimizer_rule` to the end of the list of /// [`PhysicalOptimizerRule`]s used to rewrite queries. pub fn add_physical_optimizer_rule( @@ -1689,6 +1784,7 @@ impl SessionState { self } + #[tracing::instrument(level = "trace", skip(self, extension))] /// Adds a new [`ConfigExtension`] to TableOptions pub fn add_table_options_extension( mut self, @@ -1698,6 +1794,7 @@ impl SessionState { self } + #[tracing::instrument(level = "trace", skip(self, function_factory))] /// Registers a [`FunctionFactory`] to handle `CREATE FUNCTION` statements pub fn with_function_factory( mut self, @@ -1707,11 +1804,13 @@ impl SessionState { self } + #[tracing::instrument(level = "trace", skip(self, function_factory))] /// Registers a [`FunctionFactory`] to handle `CREATE FUNCTION` statements pub fn set_function_factory(&mut self, function_factory: Arc) { self.function_factory = Some(function_factory); } + #[tracing::instrument(level = "trace", skip(self, registry))] /// Replace the extension [`SerializerRegistry`] pub fn with_serializer_registry( mut self, @@ -1721,11 +1820,13 @@ impl SessionState { self } + #[tracing::instrument(level = "trace", skip(self))] /// Get the table factories pub fn table_factories(&self) -> &HashMap> { &self.table_factories } + #[tracing::instrument(level = "trace", skip(self))] /// Get the table factories pub fn table_factories_mut( &mut self, @@ -1733,6 +1834,7 @@ impl SessionState { &mut self.table_factories } + #[tracing::instrument(level = "trace", skip(self, sql, dialect))] /// Parse an SQL string into an DataFusion specific AST /// [`Statement`]. See [`SessionContext::sql`] for running queries. pub fn sql_to_statement( @@ -1761,6 +1863,7 @@ impl SessionState { Ok(statement) } + #[tracing::instrument(level = "trace", skip(self, statement))] /// Resolve all table references in the SQL statement. pub fn resolve_table_references( &self, @@ -1777,6 +1880,7 @@ impl SessionState { struct RelationVisitor<'a>(&'a mut hashbrown::HashSet); impl<'a> RelationVisitor<'a> { + #[tracing::instrument(level = "trace", skip(self, relation))] /// Record that `relation` was used in this statement fn insert(&mut self, relation: &ObjectName) { self.0.get_or_insert_with(relation, |_| relation.clone()); @@ -1786,11 +1890,13 @@ impl SessionState { impl<'a> Visitor for RelationVisitor<'a> { type Break = (); + #[tracing::instrument(level = "trace", skip(self, relation))] fn pre_visit_relation(&mut self, relation: &ObjectName) -> ControlFlow<()> { self.insert(relation); ControlFlow::Continue(()) } + #[tracing::instrument(level = "trace", skip(self, statement))] fn pre_visit_statement(&mut self, statement: &Statement) -> ControlFlow<()> { if let Statement::ShowCreate { obj_type: ShowCreateObject::Table | ShowCreateObject::View, @@ -1804,6 +1910,7 @@ impl SessionState { } let mut visitor = RelationVisitor(&mut relations); + #[tracing::instrument(level = "trace", skip(statement, visitor))] fn visit_statement(statement: &DFStatement, visitor: &mut RelationVisitor<'_>) { match statement { DFStatement::Statement(s) => { @@ -1848,6 +1955,7 @@ impl SessionState { .collect::>() } + #[tracing::instrument(level = "trace", skip(self, statement))] /// Convert an AST Statement into a LogicalPlan pub async fn statement_to_plan( &self, @@ -1885,6 +1993,7 @@ impl SessionState { query.statement_to_plan(statement) } + #[tracing::instrument(level = "trace", skip(self, sql))] /// Creates a [`LogicalPlan`] from the provided SQL string. This /// interface will plan any SQL DataFusion supports, including DML /// like `CREATE TABLE`, and `COPY` (which can write to local @@ -1901,6 +2010,7 @@ impl SessionState { Ok(plan) } + #[tracing::instrument(level = "trace", skip(self, plan))] /// Optimizes the logical plan by applying optimizer rules. pub fn optimize(&self, plan: &LogicalPlan) -> Result { if let LogicalPlan::Explain(e) = plan { @@ -1976,6 +2086,7 @@ impl SessionState { } } + #[tracing::instrument(level = "trace", skip(self, logical_plan))] /// Creates a physical [`ExecutionPlan`] plan from a [`LogicalPlan`]. /// /// Note: this first calls [`Self::optimize`] on the provided @@ -1994,6 +2105,7 @@ impl SessionState { .await } + #[tracing::instrument(level = "trace", skip(self, expr, df_schema))] /// Create a [`PhysicalExpr`] from an [`Expr`] after applying type /// coercion, and function rewrites. /// @@ -2027,77 +2139,92 @@ impl SessionState { create_physical_expr(&expr, df_schema, self.execution_props()) } + #[tracing::instrument(level = "trace", skip(self))] /// Return the session ID pub fn session_id(&self) -> &str { &self.session_id } + #[tracing::instrument(level = "trace", skip(self))] /// Return the runtime env pub fn runtime_env(&self) -> &Arc { &self.runtime_env } + #[tracing::instrument(level = "trace", skip(self))] /// Return the execution properties pub fn execution_props(&self) -> &ExecutionProps { &self.execution_props } + #[tracing::instrument(level = "trace", skip(self))] /// Return the [`SessionConfig`] pub fn config(&self) -> &SessionConfig { &self.config } + #[tracing::instrument(level = "trace", skip(self))] /// Return the mutable [`SessionConfig`]. pub fn config_mut(&mut self) -> &mut SessionConfig { &mut self.config } + #[tracing::instrument(level = "trace", skip(self))] /// Return the physical optimizers pub fn physical_optimizers(&self) -> &[Arc] { &self.physical_optimizers.rules } + #[tracing::instrument(level = "trace", skip(self))] /// return the configuration options pub fn config_options(&self) -> &ConfigOptions { self.config.options() } + #[tracing::instrument(level = "trace", skip(self))] /// return the TableOptions options with its extensions pub fn default_table_options(&self) -> TableOptions { self.table_option_namespace .combine_with_session_config(self.config_options()) } + #[tracing::instrument(level = "trace", skip(self))] /// Get a new TaskContext to run in this session pub fn task_ctx(&self) -> Arc { Arc::new(TaskContext::from(self)) } + #[tracing::instrument(level = "trace", skip(self))] /// Return catalog list pub fn catalog_list(&self) -> Arc { self.catalog_list.clone() } + #[tracing::instrument(level = "trace", skip(self))] /// Return reference to scalar_functions pub fn scalar_functions(&self) -> &HashMap> { &self.scalar_functions } + #[tracing::instrument(level = "trace", skip(self))] /// Return reference to aggregate_functions pub fn aggregate_functions(&self) -> &HashMap> { &self.aggregate_functions } + #[tracing::instrument(level = "trace", skip(self))] /// Return reference to window functions pub fn window_functions(&self) -> &HashMap> { &self.window_functions } + #[tracing::instrument(level = "trace", skip(self))] /// Return [SerializerRegistry] for extensions pub fn serializer_registry(&self) -> Arc { self.serializer_registry.clone() } + #[tracing::instrument(level = "trace", skip(self))] /// Return version of the cargo package that produced this query pub fn version(&self) -> &str { env!("CARGO_PKG_VERSION") @@ -2110,24 +2237,29 @@ struct SessionSimplifyProvider<'a> { } impl<'a> SessionSimplifyProvider<'a> { + #[tracing::instrument(level = "trace", skip(state, df_schema))] fn new(state: &'a SessionState, df_schema: &'a DFSchema) -> Self { Self { state, df_schema } } } impl<'a> SimplifyInfo for SessionSimplifyProvider<'a> { + #[tracing::instrument(level = "trace", skip(self, expr))] fn is_boolean_type(&self, expr: &Expr) -> Result { Ok(expr.get_type(self.df_schema)? == DataType::Boolean) } + #[tracing::instrument(level = "trace", skip(self, expr))] fn nullable(&self, expr: &Expr) -> Result { expr.nullable(self.df_schema) } + #[tracing::instrument(level = "trace", skip(self))] fn execution_props(&self) -> &ExecutionProps { self.state.execution_props() } + #[tracing::instrument(level = "trace", skip(self, expr))] fn get_data_type(&self, expr: &Expr) -> Result { expr.get_type(self.df_schema) } @@ -2139,6 +2271,7 @@ struct SessionContextProvider<'a> { } impl<'a> ContextProvider for SessionContextProvider<'a> { + #[tracing::instrument(level = "trace", skip(self, name))] fn get_table_source(&self, name: TableReference) -> Result> { let name = self.state.resolve_table_ref(name).to_string(); self.tables @@ -2147,6 +2280,7 @@ impl<'a> ContextProvider for SessionContextProvider<'a> { .ok_or_else(|| plan_datafusion_err!("table '{name}' not found")) } + #[tracing::instrument(level = "trace", skip(self, name, args))] fn get_table_function_source( &self, name: &str, @@ -2163,6 +2297,7 @@ impl<'a> ContextProvider for SessionContextProvider<'a> { Ok(provider_as_source(provider)) } + #[tracing::instrument(level = "trace", skip(self, name, schema))] /// Create a new CTE work table for a recursive CTE logical plan /// This table will be used in conjunction with a Worktable physical plan /// to read and write each iteration of a recursive CTE @@ -2175,18 +2310,22 @@ impl<'a> ContextProvider for SessionContextProvider<'a> { Ok(provider_as_source(table)) } + #[tracing::instrument(level = "trace", skip(self, name))] fn get_function_meta(&self, name: &str) -> Option> { self.state.scalar_functions().get(name).cloned() } + #[tracing::instrument(level = "trace", skip(self, name))] fn get_aggregate_meta(&self, name: &str) -> Option> { self.state.aggregate_functions().get(name).cloned() } + #[tracing::instrument(level = "trace", skip(self, name))] fn get_window_meta(&self, name: &str) -> Option> { self.state.window_functions().get(name).cloned() } + #[tracing::instrument(level = "trace", skip(self, variable_names))] fn get_variable_type(&self, variable_names: &[String]) -> Option { if variable_names.is_empty() { return None; @@ -2205,28 +2344,34 @@ impl<'a> ContextProvider for SessionContextProvider<'a> { .and_then(|provider| provider.get(&provider_type)?.get_type(variable_names)) } + #[tracing::instrument(level = "trace", skip(self))] fn options(&self) -> &ConfigOptions { self.state.config_options() } + #[tracing::instrument(level = "trace", skip(self))] fn udfs_names(&self) -> Vec { self.state.scalar_functions().keys().cloned().collect() } + #[tracing::instrument(level = "trace", skip(self))] fn udafs_names(&self) -> Vec { self.state.aggregate_functions().keys().cloned().collect() } + #[tracing::instrument(level = "trace", skip(self))] fn udwfs_names(&self) -> Vec { self.state.window_functions().keys().cloned().collect() } } impl FunctionRegistry for SessionState { + #[tracing::instrument(level = "trace", skip(self))] fn udfs(&self) -> HashSet { self.scalar_functions.keys().cloned().collect() } + #[tracing::instrument(level = "trace", skip(self, name))] fn udf(&self, name: &str) -> Result> { let result = self.scalar_functions.get(name); @@ -2235,6 +2380,7 @@ impl FunctionRegistry for SessionState { }) } + #[tracing::instrument(level = "trace", skip(self, name))] fn udaf(&self, name: &str) -> Result> { let result = self.aggregate_functions.get(name); @@ -2243,6 +2389,7 @@ impl FunctionRegistry for SessionState { }) } + #[tracing::instrument(level = "trace", skip(self, name))] fn udwf(&self, name: &str) -> Result> { let result = self.window_functions.get(name); @@ -2251,6 +2398,7 @@ impl FunctionRegistry for SessionState { }) } + #[tracing::instrument(level = "trace", skip(self, udf))] fn register_udf(&mut self, udf: Arc) -> Result>> { udf.aliases().iter().for_each(|alias| { self.scalar_functions.insert(alias.clone(), udf.clone()); @@ -2258,6 +2406,7 @@ impl FunctionRegistry for SessionState { Ok(self.scalar_functions.insert(udf.name().into(), udf)) } + #[tracing::instrument(level = "trace", skip(self, udaf))] fn register_udaf( &mut self, udaf: Arc, @@ -2268,6 +2417,7 @@ impl FunctionRegistry for SessionState { Ok(self.aggregate_functions.insert(udaf.name().into(), udaf)) } + #[tracing::instrument(level = "trace", skip(self, udwf))] fn register_udwf(&mut self, udwf: Arc) -> Result>> { udwf.aliases().iter().for_each(|alias| { self.window_functions.insert(alias.clone(), udwf.clone()); @@ -2275,6 +2425,7 @@ impl FunctionRegistry for SessionState { Ok(self.window_functions.insert(udwf.name().into(), udwf)) } + #[tracing::instrument(level = "trace", skip(self, name))] fn deregister_udf(&mut self, name: &str) -> Result>> { let udf = self.scalar_functions.remove(name); if let Some(udf) = &udf { @@ -2285,6 +2436,7 @@ impl FunctionRegistry for SessionState { Ok(udf) } + #[tracing::instrument(level = "trace", skip(self, name))] fn deregister_udaf(&mut self, name: &str) -> Result>> { let udaf = self.aggregate_functions.remove(name); if let Some(udaf) = &udaf { @@ -2295,6 +2447,7 @@ impl FunctionRegistry for SessionState { Ok(udaf) } + #[tracing::instrument(level = "trace", skip(self, name))] fn deregister_udwf(&mut self, name: &str) -> Result>> { let udwf = self.window_functions.remove(name); if let Some(udwf) = &udwf { @@ -2305,6 +2458,7 @@ impl FunctionRegistry for SessionState { Ok(udwf) } + #[tracing::instrument(level = "trace", skip(self, rewrite))] fn register_function_rewrite( &mut self, rewrite: Arc, @@ -2315,14 +2469,17 @@ impl FunctionRegistry for SessionState { } impl OptimizerConfig for SessionState { + #[tracing::instrument(level = "trace", skip(self))] fn query_execution_start_time(&self) -> DateTime { self.execution_props.query_execution_start_time } + #[tracing::instrument(level = "trace", skip(self))] fn alias_generator(&self) -> Arc { self.execution_props.alias_generator.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn options(&self) -> &ConfigOptions { self.config_options() } @@ -2330,6 +2487,7 @@ impl OptimizerConfig for SessionState { /// Create a new task context instance from SessionContext impl From<&SessionContext> for TaskContext { + #[tracing::instrument(level = "trace", skip(session))] fn from(session: &SessionContext) -> Self { TaskContext::from(&*session.state.read()) } @@ -2337,6 +2495,7 @@ impl From<&SessionContext> for TaskContext { /// Create a new task context instance from SessionState impl From<&SessionState> for TaskContext { + #[tracing::instrument(level = "trace", skip(state))] fn from(state: &SessionState) -> Self { let task_id = None; TaskContext::new( @@ -2356,6 +2515,7 @@ impl From<&SessionState> for TaskContext { pub struct EmptySerializerRegistry; impl SerializerRegistry for EmptySerializerRegistry { + #[tracing::instrument(level = "trace", skip(self, node))] fn serialize_logical_plan( &self, node: &dyn UserDefinedLogicalNode, @@ -2366,6 +2526,7 @@ impl SerializerRegistry for EmptySerializerRegistry { ) } + #[tracing::instrument(level = "trace", skip(self, name, _bytes))] fn deserialize_logical_plan( &self, name: &str, @@ -2391,6 +2552,7 @@ pub struct SQLOptions { } impl Default for SQLOptions { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self { allow_ddl: true, @@ -2401,29 +2563,34 @@ impl Default for SQLOptions { } impl SQLOptions { + #[tracing::instrument(level = "trace", skip())] /// Create a new `SQLOptions` with default values pub fn new() -> Self { Default::default() } + #[tracing::instrument(level = "trace", skip(self, allow))] /// Should DML data modification commands (e.g. `INSERT and COPY`) be run? Defaults to `true`. pub fn with_allow_ddl(mut self, allow: bool) -> Self { self.allow_ddl = allow; self } + #[tracing::instrument(level = "trace", skip(self, allow))] /// Should DML data modification commands (e.g. `INSERT and COPY`) be run? Defaults to `true` pub fn with_allow_dml(mut self, allow: bool) -> Self { self.allow_dml = allow; self } + #[tracing::instrument(level = "trace", skip(self, allow))] /// Should Statements such as (e.g. `SET VARIABLE and `BEGIN TRANSACTION` ...`) be run?. Defaults to `true` pub fn with_allow_statements(mut self, allow: bool) -> Self { self.allow_statements = allow; self } + #[tracing::instrument(level = "trace", skip(self, plan))] /// Return an error if the [`LogicalPlan`] has any nodes that are /// incompatible with this [`SQLOptions`]. pub fn verify_plan(&self, plan: &LogicalPlan) -> Result<()> { @@ -2436,6 +2603,7 @@ struct BadPlanVisitor<'a> { options: &'a SQLOptions, } impl<'a> BadPlanVisitor<'a> { + #[tracing::instrument(level = "trace", skip(options))] fn new(options: &'a SQLOptions) -> Self { Self { options } } @@ -2444,6 +2612,7 @@ impl<'a> BadPlanVisitor<'a> { impl<'a> TreeNodeVisitor for BadPlanVisitor<'a> { type Node = LogicalPlan; + #[tracing::instrument(level = "trace", skip(self, node))] fn f_down(&mut self, node: &Self::Node) -> Result { match node { LogicalPlan::Ddl(ddl) if !self.options.allow_ddl => { @@ -2689,6 +2858,7 @@ mod tests { catalog_and_schema_test(config).await; } + #[tracing::instrument(level = "trace", skip(config))] async fn catalog_and_schema_test(config: SessionConfig) { let ctx = SessionContext::new_with_config(config); let catalog = MemoryCatalogProvider::new(); @@ -2840,6 +3010,7 @@ mod tests { #[async_trait] impl PhysicalPlanner for MyPhysicalPlanner { + #[tracing::instrument(level = "trace", skip(self, _logical_plan, _session_state))] async fn create_physical_plan( &self, _logical_plan: &LogicalPlan, @@ -2848,6 +3019,7 @@ mod tests { not_impl_err!("query not supported") } + #[tracing::instrument(level = "trace", skip(self, _expr, _input_dfschema, _session_state))] fn create_physical_expr( &self, _expr: &Expr, @@ -2862,6 +3034,7 @@ mod tests { #[async_trait] impl QueryPlanner for MyQueryPlanner { + #[tracing::instrument(level = "trace", skip(self, logical_plan, session_state))] async fn create_physical_plan( &self, logical_plan: &LogicalPlan, @@ -2874,6 +3047,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(tmp_dir, partition_count))] /// Generate a partitioned CSV file and register it with an execution context async fn create_ctx( tmp_dir: &TempDir, diff --git a/datafusion/core/src/execution/context/parquet.rs b/datafusion/core/src/execution/context/parquet.rs index fef20df6e69dc..aa1c5cafc9fd5 100644 --- a/datafusion/core/src/execution/context/parquet.rs +++ b/datafusion/core/src/execution/context/parquet.rs @@ -24,6 +24,7 @@ use crate::datasource::physical_plan::parquet::plan_to_parquet; use parquet::file::properties::WriterProperties; impl SessionContext { + #[tracing::instrument(level = "trace", skip(self, table_paths, options))] /// Creates a [`DataFrame`] for reading a Parquet data source. /// /// For more control such as reading multiple files, you can use @@ -38,6 +39,7 @@ impl SessionContext { self._read_type(table_paths, options).await } + #[tracing::instrument(level = "trace", skip(self, name, table_path, options))] /// Registers a Parquet file as a table that can be referenced from SQL /// statements executed against this context. pub async fn register_parquet( @@ -60,6 +62,7 @@ impl SessionContext { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, plan, path, writer_properties))] /// Executes a query and writes the results to a partitioned Parquet file. pub async fn write_parquet( &self, diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs index 1a82dac4658ca..78c9a060465c5 100644 --- a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs +++ b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs @@ -37,6 +37,7 @@ use datafusion_physical_plan::udaf::AggregateFunctionExpr; pub struct AggregateStatistics {} impl AggregateStatistics { + #[tracing::instrument(level = "trace", skip())] #[allow(missing_docs)] pub fn new() -> Self { Self {} @@ -44,6 +45,7 @@ impl AggregateStatistics { } impl PhysicalOptimizerRule for AggregateStatistics { + #[tracing::instrument(level = "trace", skip(self, plan, _config))] fn optimize( &self, plan: Arc, @@ -90,16 +92,19 @@ impl PhysicalOptimizerRule for AggregateStatistics { } } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "aggregate_statistics" } + #[tracing::instrument(level = "trace", skip(self))] /// This rule will change the nullable properties of the schema, disable the schema check. fn schema_check(&self) -> bool { false } } +#[tracing::instrument(level = "trace", skip(node))] /// assert if the node passed as argument is a final `AggregateExec` node that can be optimized: /// - its child (with possible intermediate layers) is a partial `AggregateExec` node /// - they both have no grouping expression @@ -134,6 +139,7 @@ fn take_optimizable(node: &dyn ExecutionPlan) -> Option> None } +#[tracing::instrument(level = "trace", skip(agg_expr, stats))] /// If this agg_expr is a count that can be exactly derived from the statistics, return it. fn take_optimizable_column_and_table_count( agg_expr: &dyn AggregateExpr, @@ -205,6 +211,7 @@ fn take_optimizable_column_and_table_count( None } +#[tracing::instrument(level = "trace", skip(agg_expr, stats))] /// If this agg_expr is a min that is exactly defined in the statistics, return it. fn take_optimizable_min( agg_expr: &dyn AggregateExpr, @@ -255,6 +262,7 @@ fn take_optimizable_min( None } +#[tracing::instrument(level = "trace", skip(agg_expr, stats))] /// If this agg_expr is a max that is exactly defined in the statistics, return it. fn take_optimizable_max( agg_expr: &dyn AggregateExpr, @@ -326,6 +334,7 @@ pub(crate) mod tests { use datafusion_physical_expr::PhysicalExpr; use datafusion_physical_plan::aggregates::AggregateMode; + #[tracing::instrument(level = "trace", skip())] /// Mock data using a MemoryExec which has an exact count statistic fn mock_data() -> Result> { let schema = Arc::new(Schema::new(vec![ @@ -348,6 +357,7 @@ pub(crate) mod tests { )?)) } + #[tracing::instrument(level = "trace", skip(plan, agg))] /// Checks that the count optimization was applied and we still get the right result async fn assert_count_optim_success( plan: AggregateExec, @@ -380,6 +390,7 @@ pub(crate) mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(batch, agg))] fn check_batch(batch: RecordBatch, agg: &TestAggregate) { let schema = batch.schema(); let fields = schema.fields(); @@ -406,14 +417,17 @@ pub(crate) mod tests { } impl TestAggregate { + #[tracing::instrument(level = "trace", skip())] pub(crate) fn new_count_star() -> Self { Self::CountStar } + #[tracing::instrument(level = "trace", skip(schema))] fn new_count_column(schema: &Arc) -> Self { Self::ColumnA(schema.clone()) } + #[tracing::instrument(level = "trace", skip(self))] /// Return appropriate expr depending if COUNT is for col or table (*) pub(crate) fn count_expr(&self) -> Arc { Arc::new(Count::new( @@ -423,6 +437,7 @@ pub(crate) mod tests { )) } + #[tracing::instrument(level = "trace", skip(self))] /// what argument would this aggregate need in the plan? fn column(&self) -> Arc { match self { @@ -431,6 +446,7 @@ pub(crate) mod tests { } } + #[tracing::instrument(level = "trace", skip(self))] /// What name would this aggregate produce in a plan? fn column_name(&self) -> &'static str { match self { @@ -439,6 +455,7 @@ pub(crate) mod tests { } } + #[tracing::instrument(level = "trace", skip(self))] /// What is the expected count? fn expected_count(&self) -> i64 { match self { diff --git a/datafusion/core/src/physical_optimizer/coalesce_batches.rs b/datafusion/core/src/physical_optimizer/coalesce_batches.rs index 42b7463600dcb..1a4b1f22fe7bc 100644 --- a/datafusion/core/src/physical_optimizer/coalesce_batches.rs +++ b/datafusion/core/src/physical_optimizer/coalesce_batches.rs @@ -38,12 +38,14 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; pub struct CoalesceBatches {} impl CoalesceBatches { + #[tracing::instrument(level = "trace", skip())] #[allow(missing_docs)] pub fn new() -> Self { Self::default() } } impl PhysicalOptimizerRule for CoalesceBatches { + #[tracing::instrument(level = "trace", skip(self, plan, config))] fn optimize( &self, plan: Arc, @@ -84,10 +86,12 @@ impl PhysicalOptimizerRule for CoalesceBatches { .data() } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "coalesce_batches" } + #[tracing::instrument(level = "trace", skip(self))] fn schema_check(&self) -> bool { true } diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs index 29adc71d5c5a8..c21857c390cef 100644 --- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs +++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs @@ -39,6 +39,7 @@ use datafusion_physical_expr::{AggregateExpr, PhysicalExpr}; pub struct CombinePartialFinalAggregate {} impl CombinePartialFinalAggregate { + #[tracing::instrument(level = "trace", skip())] #[allow(missing_docs)] pub fn new() -> Self { Self {} @@ -46,6 +47,7 @@ impl CombinePartialFinalAggregate { } impl PhysicalOptimizerRule for CombinePartialFinalAggregate { + #[tracing::instrument(level = "trace", skip(self, plan, _config))] fn optimize( &self, plan: Arc, @@ -117,10 +119,12 @@ impl PhysicalOptimizerRule for CombinePartialFinalAggregate { .data() } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "CombinePartialFinalAggregate" } + #[tracing::instrument(level = "trace", skip(self))] fn schema_check(&self) -> bool { true } @@ -138,6 +142,7 @@ type GroupExprs = ( Vec>>, ); +#[tracing::instrument(level = "trace", skip(final_agg, partial_agg))] fn can_combine(final_agg: GroupExprsRef, partial_agg: GroupExprsRef) -> bool { let (final_group_by, final_aggr_expr, final_filter_expr) = normalize_group_exprs(final_agg); @@ -161,6 +166,7 @@ fn can_combine(final_agg: GroupExprsRef, partial_agg: GroupExprsRef) -> bool { } // To compare the group expressions between the final and partial aggregations, need to discard all the column indexes and compare +#[tracing::instrument(level = "trace", skip(group_exprs))] fn normalize_group_exprs(group_exprs: GroupExprsRef) -> GroupExprs { let (group, agg, filter) = group_exprs; let new_group_expr = group @@ -176,6 +182,7 @@ fn normalize_group_exprs(group_exprs: GroupExprsRef) -> GroupExprs { (new_group, agg.to_vec(), filter.to_vec()) } +#[tracing::instrument(level = "trace", skip(group_expr))] fn discard_column_index(group_expr: Arc) -> Arc { group_expr .clone() @@ -229,6 +236,7 @@ mod tests { }; } + #[tracing::instrument(level = "trace", skip(plan))] fn trim_plan_display(plan: &str) -> Vec<&str> { plan.split('\n') .map(|s| s.trim()) @@ -236,6 +244,7 @@ mod tests { .collect() } + #[tracing::instrument(level = "trace", skip())] fn schema() -> SchemaRef { Arc::new(Schema::new(vec![ Field::new("a", DataType::Int64, true), @@ -244,6 +253,7 @@ mod tests { ])) } + #[tracing::instrument(level = "trace", skip(schema))] fn parquet_exec(schema: &SchemaRef) -> Arc { Arc::new(ParquetExec::new( FileScanConfig { @@ -263,6 +273,7 @@ mod tests { )) } + #[tracing::instrument(level = "trace", skip(input, group_by, aggr_expr))] fn partial_aggregate_exec( input: Arc, group_by: PhysicalGroupBy, @@ -283,6 +294,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip(input, group_by, aggr_expr))] fn final_aggregate_exec( input: Arc, group_by: PhysicalGroupBy, @@ -303,6 +315,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip(input))] fn repartition_exec(input: Arc) -> Arc { Arc::new( RepartitionExec::try_new(input, Partitioning::RoundRobinBatch(10)).unwrap(), diff --git a/datafusion/core/src/physical_optimizer/convert_first_last.rs b/datafusion/core/src/physical_optimizer/convert_first_last.rs index 62537169cfc6e..8c7a1b28ca007 100644 --- a/datafusion/core/src/physical_optimizer/convert_first_last.rs +++ b/datafusion/core/src/physical_optimizer/convert_first_last.rs @@ -49,12 +49,14 @@ use super::PhysicalOptimizerRule; pub struct OptimizeAggregateOrder {} impl OptimizeAggregateOrder { + #[tracing::instrument(level = "trace", skip())] pub fn new() -> Self { Self::default() } } impl PhysicalOptimizerRule for OptimizeAggregateOrder { + #[tracing::instrument(level = "trace", skip(self, plan, _config))] fn optimize( &self, plan: Arc, @@ -64,15 +66,18 @@ impl PhysicalOptimizerRule for OptimizeAggregateOrder { .data() } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "OptimizeAggregateOrder" } + #[tracing::instrument(level = "trace", skip(self))] fn schema_check(&self) -> bool { true } } +#[tracing::instrument(level = "trace", skip(plan))] fn get_common_requirement_of_aggregate_input( plan: Arc, ) -> Result>> { @@ -138,6 +143,7 @@ fn get_common_requirement_of_aggregate_input( } } +#[tracing::instrument(level = "trace", skip(aggr_exec))] /// In `create_initial_plan` for LogicalPlan::Aggregate, we have a nested AggregateExec where the first layer /// is in Partial mode and the second layer is in Final or Finalpartitioned mode. /// If the first layer of aggregate plan is transformed, we need to update the child of the layer with final mode. @@ -173,6 +179,7 @@ fn try_get_updated_aggr_expr_from_child( aggr_exec.aggr_expr().to_vec() } +#[tracing::instrument(level = "trace", skip(prefix_requirement, aggr_exprs, eq_properties))] /// Get the common requirement that satisfies all the aggregate expressions. /// /// # Parameters diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index c4db08c1cb679..e8746af93fc70 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -179,6 +179,7 @@ use itertools::izip; pub struct EnforceDistribution {} impl EnforceDistribution { + #[tracing::instrument(level = "trace", skip())] #[allow(missing_docs)] pub fn new() -> Self { Self {} @@ -186,6 +187,7 @@ impl EnforceDistribution { } impl PhysicalOptimizerRule for EnforceDistribution { + #[tracing::instrument(level = "trace", skip(self, plan, config))] fn optimize( &self, plan: Arc, @@ -218,15 +220,18 @@ impl PhysicalOptimizerRule for EnforceDistribution { Ok(distribution_context.plan) } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "EnforceDistribution" } + #[tracing::instrument(level = "trace", skip(self))] fn schema_check(&self) -> bool { true } } +#[tracing::instrument(level = "trace", skip(requirements))] /// When the physical planner creates the Joins, the ordering of join keys is from the original query. /// That might not match with the output partitioning of the join node's children /// A Top-Down process will use this method to adjust children's output partitioning based on the parent key reordering requirements: @@ -417,6 +422,7 @@ fn adjust_input_keys_ordering( Ok(Transformed::yes(requirements)) } +#[tracing::instrument(level = "trace", skip(join_plan, on, sort_options, join_constructor))] fn reorder_partitioned_join_keys( mut join_plan: PlanWithKeyRequirements, on: &[(PhysicalExprRef, PhysicalExprRef)], @@ -455,6 +461,7 @@ where Ok(join_plan) } +#[tracing::instrument(level = "trace", skip(agg_node, agg_exec))] fn reorder_aggregate_keys( mut agg_node: PlanWithKeyRequirements, agg_exec: &AggregateExec, @@ -551,6 +558,7 @@ fn reorder_aggregate_keys( Ok(agg_node) } +#[tracing::instrument(level = "trace", skip(parent_required, left_columns_len))] fn shift_right_required( parent_required: &[Arc], left_columns_len: usize, @@ -570,6 +578,7 @@ fn shift_right_required( (new_right_required.len() == parent_required.len()).then_some(new_right_required) } +#[tracing::instrument(level = "trace", skip(plan))] /// When the physical planner creates the Joins, the ordering of join keys is from the original query. /// That might not match with the output partitioning of the join node's children /// This method will try to change the ordering of the join keys to match with the @@ -676,6 +685,7 @@ pub(crate) fn reorder_join_keys_to_inputs( Ok(plan) } +#[tracing::instrument(level = "trace", skip(join_keys, left_partition, right_partition, left_equivalence_properties, right_equivalence_properties))] /// Reorder the current join keys ordering based on either left partition or right partition fn reorder_current_join_keys( join_keys: JoinKeyPairs, @@ -704,6 +714,7 @@ fn reorder_current_join_keys( } } +#[tracing::instrument(level = "trace", skip(join_keys, expected, equivalence_properties))] fn try_reorder( join_keys: JoinKeyPairs, expected: &[Arc], @@ -769,6 +780,7 @@ fn try_reorder( (pairs, Some(positions)) } +#[tracing::instrument(level = "trace", skip(current, expected))] /// Return the expected expressions positions. /// For example, the current expressions are ['c', 'a', 'a', b'], the expected expressions are ['b', 'c', 'a', 'a'], /// @@ -794,6 +806,7 @@ fn expected_expr_positions( Some(indexes) } +#[tracing::instrument(level = "trace", skip(on))] fn extract_join_keys(on: &[(PhysicalExprRef, PhysicalExprRef)]) -> JoinKeyPairs { let (left_keys, right_keys) = on .iter() @@ -805,6 +818,7 @@ fn extract_join_keys(on: &[(PhysicalExprRef, PhysicalExprRef)]) -> JoinKeyPairs } } +#[tracing::instrument(level = "trace", skip(new_left_keys, new_right_keys))] fn new_join_conditions( new_left_keys: &[Arc], new_right_keys: &[Arc], @@ -816,6 +830,7 @@ fn new_join_conditions( .collect() } +#[tracing::instrument(level = "trace", skip(input, n_target))] /// Adds RoundRobin repartition operator to the plan increase parallelism. /// /// # Arguments @@ -853,6 +868,7 @@ fn add_roundrobin_on_top( } } +#[tracing::instrument(level = "trace", skip(input, hash_exprs, n_target))] /// Adds a hash repartition operator: /// - to increase parallelism, and/or /// - to satisfy requirements of the subsequent operators. @@ -908,6 +924,7 @@ fn add_hash_on_top( Ok(input) } +#[tracing::instrument(level = "trace", skip(input))] /// Adds a [`SortPreservingMergeExec`] operator on top of input executor /// to satisfy single distribution requirement. /// @@ -945,6 +962,7 @@ fn add_spm_on_top(input: DistributionContext) -> DistributionContext { } } +#[tracing::instrument(level = "trace", skip(distribution_context))] /// Updates the physical plan inside [`DistributionContext`] so that distribution /// changing operators are removed from the top. If they are necessary, they will /// be added in subsequent stages. @@ -978,6 +996,7 @@ fn remove_dist_changing_operators( Ok(distribution_context) } +#[tracing::instrument(level = "trace", skip(context))] /// Updates the [`DistributionContext`] if preserving ordering while changing partitioning is not helpful or desirable. /// /// Assume that following plan is given: @@ -1030,6 +1049,7 @@ fn replace_order_preserving_variants( context.update_plan_from_children() } +#[tracing::instrument(level = "trace", skip(dist_context, config))] /// This function checks whether we need to add additional data exchange /// operators to satisfy distribution requirements. Since this function /// takes care of such requirements, we should avoid manually adding data @@ -1236,6 +1256,7 @@ fn ensure_distribution( /// necessary. type DistributionContext = PlanContext; +#[tracing::instrument(level = "trace", skip(dist_context))] fn update_children(mut dist_context: DistributionContext) -> Result { for child_context in dist_context.children.iter_mut() { let child_plan_any = child_context.plan.as_any(); @@ -1322,6 +1343,7 @@ pub(crate) mod tests { } impl SortRequiredExec { + #[tracing::instrument(level = "trace", skip(input, requirement))] fn new_with_requirement( input: Arc, requirement: Vec, @@ -1334,6 +1356,7 @@ pub(crate) mod tests { } } + #[tracing::instrument(level = "trace", skip(input))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(input: &Arc) -> PlanProperties { PlanProperties::new( @@ -1345,6 +1368,7 @@ pub(crate) mod tests { } impl DisplayAs for SortRequiredExec { + #[tracing::instrument(level = "trace", skip(self, _t, f))] fn fmt_as( &self, _t: DisplayFormatType, @@ -1359,27 +1383,33 @@ pub(crate) mod tests { } impl ExecutionPlan for SortRequiredExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "SortRequiredExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn std::any::Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false] } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } // model that it requires the output ordering of its input + #[tracing::instrument(level = "trace", skip(self))] fn required_input_ordering(&self) -> Vec>> { if self.expr.is_empty() { vec![None] @@ -1388,6 +1418,7 @@ pub(crate) mod tests { } } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, mut children: Vec>, @@ -1400,6 +1431,7 @@ pub(crate) mod tests { ))) } + #[tracing::instrument(level = "trace", skip(self, _partition, _context))] fn execute( &self, _partition: usize, @@ -1408,11 +1440,13 @@ pub(crate) mod tests { unreachable!(); } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { self.input.statistics() } } + #[tracing::instrument(level = "trace", skip())] pub(crate) fn schema() -> SchemaRef { Arc::new(Schema::new(vec![ Field::new("a", DataType::Int64, true), @@ -1423,10 +1457,12 @@ pub(crate) mod tests { ])) } + #[tracing::instrument(level = "trace", skip())] fn parquet_exec() -> Arc { parquet_exec_with_sort(vec![]) } + #[tracing::instrument(level = "trace", skip(output_ordering))] /// create a single parquet file that is sorted pub(crate) fn parquet_exec_with_sort( output_ordering: Vec>, @@ -1449,10 +1485,12 @@ pub(crate) mod tests { )) } + #[tracing::instrument(level = "trace", skip())] fn parquet_exec_multiple() -> Arc { parquet_exec_multiple_sorted(vec![]) } + #[tracing::instrument(level = "trace", skip(output_ordering))] /// Created a sorted parquet exec with multiple files fn parquet_exec_multiple_sorted( output_ordering: Vec>, @@ -1478,10 +1516,12 @@ pub(crate) mod tests { )) } + #[tracing::instrument(level = "trace", skip())] fn csv_exec() -> Arc { csv_exec_with_sort(vec![]) } + #[tracing::instrument(level = "trace", skip(output_ordering))] fn csv_exec_with_sort(output_ordering: Vec>) -> Arc { Arc::new(CsvExec::new( FileScanConfig { @@ -1503,11 +1543,13 @@ pub(crate) mod tests { )) } + #[tracing::instrument(level = "trace", skip())] fn csv_exec_multiple() -> Arc { csv_exec_multiple_sorted(vec![]) } // Created a sorted parquet exec with multiple files + #[tracing::instrument(level = "trace", skip(output_ordering))] fn csv_exec_multiple_sorted( output_ordering: Vec>, ) -> Arc { @@ -1534,6 +1576,7 @@ pub(crate) mod tests { )) } + #[tracing::instrument(level = "trace", skip(input, alias_pairs))] fn projection_exec_with_alias( input: Arc, alias_pairs: Vec<(String, String)>, @@ -1545,6 +1588,7 @@ pub(crate) mod tests { Arc::new(ProjectionExec::try_new(exprs, input).unwrap()) } + #[tracing::instrument(level = "trace", skip(input, alias_pairs))] fn aggregate_exec_with_alias( input: Arc, alias_pairs: Vec<(String, String)>, @@ -1593,6 +1637,7 @@ pub(crate) mod tests { ) } + #[tracing::instrument(level = "trace", skip(left, right, join_on, join_type))] fn hash_join_exec( left: Arc, right: Arc, @@ -1614,6 +1659,7 @@ pub(crate) mod tests { ) } + #[tracing::instrument(level = "trace", skip(left, right, join_on, join_type))] fn sort_merge_join_exec( left: Arc, right: Arc, @@ -1634,6 +1680,7 @@ pub(crate) mod tests { ) } + #[tracing::instrument(level = "trace", skip(input))] fn filter_exec(input: Arc) -> Arc { let predicate = Arc::new(BinaryExpr::new( col("c", &schema()).unwrap(), @@ -1643,6 +1690,7 @@ pub(crate) mod tests { Arc::new(FilterExec::try_new(predicate, input).unwrap()) } + #[tracing::instrument(level = "trace", skip(sort_exprs, input, preserve_partitioning))] fn sort_exec( sort_exprs: Vec, input: Arc, @@ -1653,6 +1701,7 @@ pub(crate) mod tests { Arc::new(new_sort) } + #[tracing::instrument(level = "trace", skip(sort_exprs, input))] fn sort_preserving_merge_exec( sort_exprs: Vec, input: Arc, @@ -1660,6 +1709,7 @@ pub(crate) mod tests { Arc::new(SortPreservingMergeExec::new(sort_exprs, input)) } + #[tracing::instrument(level = "trace", skip(input))] fn limit_exec(input: Arc) -> Arc { Arc::new(GlobalLimitExec::new( Arc::new(LocalLimitExec::new(input, 100)), @@ -1668,10 +1718,12 @@ pub(crate) mod tests { )) } + #[tracing::instrument(level = "trace", skip(input))] fn union_exec(input: Vec>) -> Arc { Arc::new(UnionExec::new(input)) } + #[tracing::instrument(level = "trace", skip(input, sort_exprs))] fn sort_required_exec_with_req( input: Arc, sort_exprs: LexOrdering, @@ -1679,6 +1731,7 @@ pub(crate) mod tests { Arc::new(SortRequiredExec::new_with_requirement(input, sort_exprs)) } + #[tracing::instrument(level = "trace", skip(plan))] pub(crate) fn trim_plan_display(plan: &str) -> Vec<&str> { plan.split('\n') .map(|s| s.trim()) @@ -1686,6 +1739,7 @@ pub(crate) mod tests { .collect() } + #[tracing::instrument(level = "trace", skip(plan, target_partitions, prefer_existing_sort))] fn ensure_distribution_helper( plan: Arc, target_partitions: usize, diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs index bc435626c6a99..1fe5a3950f382 100644 --- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs @@ -74,6 +74,7 @@ use itertools::izip; pub struct EnforceSorting {} impl EnforceSorting { + #[tracing::instrument(level = "trace", skip())] #[allow(missing_docs)] pub fn new() -> Self { Self {} @@ -86,6 +87,7 @@ impl EnforceSorting { /// via its children. type PlanWithCorrespondingSort = PlanContext; +#[tracing::instrument(level = "trace", skip(node, data))] fn update_sort_ctx_children( mut node: PlanWithCorrespondingSort, data: bool, @@ -124,6 +126,7 @@ fn update_sort_ctx_children( /// connected to a `CoalescePartitionsExec` via its children. type PlanWithCorrespondingCoalescePartitions = PlanContext; +#[tracing::instrument(level = "trace", skip(coalesce_context))] fn update_coalesce_ctx_children( coalesce_context: &mut PlanWithCorrespondingCoalescePartitions, ) { @@ -152,6 +155,7 @@ fn update_coalesce_ctx_children( /// into [`SortExec`] + [`SortPreservingMergeExec`] cascades, which enables us to /// perform sorting in parallel. impl PhysicalOptimizerRule for EnforceSorting { + #[tracing::instrument(level = "trace", skip(self, plan, config))] fn optimize( &self, plan: Arc, @@ -196,15 +200,18 @@ impl PhysicalOptimizerRule for EnforceSorting { .data() } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "EnforceSorting" } + #[tracing::instrument(level = "trace", skip(self))] fn schema_check(&self) -> bool { true } } +#[tracing::instrument(level = "trace", skip(plan))] fn replace_with_partial_sort( plan: Arc, ) -> Result> { @@ -241,6 +248,7 @@ fn replace_with_partial_sort( Ok(plan) } +#[tracing::instrument(level = "trace", skip(requirements))] /// This function turns plans of the form /// ```text /// "SortExec: expr=\[a@0 ASC\]", @@ -314,6 +322,7 @@ fn parallelize_sorts( } } +#[tracing::instrument(level = "trace", skip(requirements))] /// This function enforces sorting requirements and makes optimizations without /// violating these requirements whenever possible. fn ensure_sorting( @@ -380,6 +389,7 @@ fn ensure_sorting( update_sort_ctx_children(requirements, false).map(Transformed::yes) } +#[tracing::instrument(level = "trace", skip(node))] /// Analyzes a given [`SortExec`] (`plan`) to determine whether its input /// already has a finer ordering than it enforces. fn analyze_immediate_sort_removal( @@ -413,6 +423,7 @@ fn analyze_immediate_sort_removal( Transformed::no(node) } +#[tracing::instrument(level = "trace", skip(window_tree))] /// Adjusts a [`WindowAggExec`] or a [`BoundedWindowAggExec`] to determine /// whether it may allow removing a sort. fn adjust_window_sort_removal( @@ -483,6 +494,7 @@ fn adjust_window_sort_removal( Ok(window_tree) } +#[tracing::instrument(level = "trace", skip(requirements))] /// Removes the [`CoalescePartitionsExec`] from the plan in `node`. fn remove_corresponding_coalesce_in_sub_plan( mut requirements: PlanWithCorrespondingCoalescePartitions, @@ -516,6 +528,7 @@ fn remove_corresponding_coalesce_in_sub_plan( requirements.update_plan_from_children() } +#[tracing::instrument(level = "trace", skip(child_idx, node, parent))] /// Updates child to remove the unnecessary sort below it. fn update_child_to_remove_unnecessary_sort( child_idx: usize, @@ -533,6 +546,7 @@ fn update_child_to_remove_unnecessary_sort( Ok(node) } +#[tracing::instrument(level = "trace", skip(node, requires_single_partition))] /// Removes the sort from the plan in `node`. fn remove_corresponding_sort_from_sub_plan( mut node: PlanWithCorrespondingSort, @@ -595,6 +609,7 @@ fn remove_corresponding_sort_from_sub_plan( Ok(node) } +#[tracing::instrument(level = "trace", skip(sort_any))] /// Converts an [ExecutionPlan] trait object to a [PhysicalSortExpr] slice when possible. fn get_sort_exprs( sort_any: &Arc, @@ -633,6 +648,7 @@ mod tests { use rstest::rstest; + #[tracing::instrument(level = "trace", skip())] fn create_test_schema() -> Result { let nullable_column = Field::new("nullable_col", DataType::Int32, true); let non_nullable_column = Field::new("non_nullable_col", DataType::Int32, false); @@ -640,6 +656,7 @@ mod tests { Ok(schema) } + #[tracing::instrument(level = "trace", skip())] fn create_test_schema2() -> Result { let col_a = Field::new("col_a", DataType::Int32, true); let col_b = Field::new("col_b", DataType::Int32, true); @@ -648,6 +665,7 @@ mod tests { } // Generate a schema which consists of 5 columns (a, b, c, d, e) + #[tracing::instrument(level = "trace", skip())] fn create_test_schema3() -> Result { let a = Field::new("a", DataType::Int32, true); let b = Field::new("b", DataType::Int32, false); diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs index 135a59aa03530..1b78bfe3d6a15 100644 --- a/datafusion/core/src/physical_optimizer/join_selection.rs +++ b/datafusion/core/src/physical_optimizer/join_selection.rs @@ -50,6 +50,7 @@ use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr}; pub struct JoinSelection {} impl JoinSelection { + #[tracing::instrument(level = "trace", skip())] #[allow(missing_docs)] pub fn new() -> Self { Self {} @@ -58,6 +59,7 @@ impl JoinSelection { // TODO: We need some performance test for Right Semi/Right Join swap to Left Semi/Left Join in case that the right side is smaller but not much smaller. // TODO: In PrestoSQL, the optimizer flips join sides only if one side is much smaller than the other by more than SIZE_DIFFERENCE_THRESHOLD times, by default is 8 times. +#[tracing::instrument(level = "trace", skip(left, right))] /// Checks statistics for join swap. fn should_swap_join_order( left: &dyn ExecutionPlan, @@ -85,6 +87,7 @@ fn should_swap_join_order( } } +#[tracing::instrument(level = "trace", skip(plan, threshold_byte_size, threshold_num_rows))] fn supports_collect_by_thresholds( plan: &dyn ExecutionPlan, threshold_byte_size: usize, @@ -105,6 +108,7 @@ fn supports_collect_by_thresholds( } } +#[tracing::instrument(level = "trace", skip(join_type))] /// Predicate that checks whether the given join type supports input swapping. fn supports_swap(join_type: JoinType) -> bool { matches!( @@ -120,6 +124,7 @@ fn supports_swap(join_type: JoinType) -> bool { ) } +#[tracing::instrument(level = "trace", skip(join_type))] /// This function returns the new join type we get after swapping the given /// join's inputs. fn swap_join_type(join_type: JoinType) -> JoinType { @@ -135,6 +140,7 @@ fn swap_join_type(join_type: JoinType) -> JoinType { } } +#[tracing::instrument(level = "trace", skip(left_schema_len, right_schema_len, projection))] /// This function swaps the given join's projection. fn swap_join_projection( left_schema_len: usize, @@ -156,6 +162,7 @@ fn swap_join_projection( }) } +#[tracing::instrument(level = "trace", skip(hash_join, partition_mode))] /// This function swaps the inputs of the given join operator. fn swap_hash_join( hash_join: &HashJoinExec, @@ -199,6 +206,7 @@ fn swap_hash_join( } } +#[tracing::instrument(level = "trace", skip(join))] /// Swaps inputs of `NestedLoopJoinExec` and wraps it into `ProjectionExec` is required fn swap_nl_join(join: &NestedLoopJoinExec) -> Result> { let new_filter = swap_join_filter(join.filter()); @@ -231,6 +239,7 @@ fn swap_nl_join(join: &NestedLoopJoinExec) -> Result> { Ok(plan) } +#[tracing::instrument(level = "trace", skip(left_schema, right_schema))] /// When the order of the join is changed by the optimizer, the columns in /// the output should not be impacted. This function creates the expressions /// that will allow to swap back the values from the original left as the first @@ -256,6 +265,7 @@ fn swap_reverting_projection( left_cols.chain(right_cols).collect() } +#[tracing::instrument(level = "trace", skip(filter))] /// Swaps join sides for filter column indices and produces new JoinFilter fn swap_filter(filter: &JoinFilter) -> JoinFilter { let column_indices = filter @@ -274,12 +284,14 @@ fn swap_filter(filter: &JoinFilter) -> JoinFilter { ) } +#[tracing::instrument(level = "trace", skip(filter))] /// Swaps join sides for filter column indices and produces new `JoinFilter` (if exists). fn swap_join_filter(filter: Option<&JoinFilter>) -> Option { filter.map(swap_filter) } impl PhysicalOptimizerRule for JoinSelection { + #[tracing::instrument(level = "trace", skip(self, plan, config))] fn optimize( &self, plan: Arc, @@ -322,15 +334,18 @@ impl PhysicalOptimizerRule for JoinSelection { .data() } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "join_selection" } + #[tracing::instrument(level = "trace", skip(self))] fn schema_check(&self) -> bool { true } } +#[tracing::instrument(level = "trace", skip(hash_join, ignore_threshold, threshold_byte_size, threshold_num_rows))] /// Tries to create a [`HashJoinExec`] in [`PartitionMode::CollectLeft`] when possible. /// /// This function will first consider the given join type and check whether the @@ -399,6 +414,7 @@ fn try_collect_left( } } +#[tracing::instrument(level = "trace", skip(hash_join))] fn partitioned_hash_join(hash_join: &HashJoinExec) -> Result> { let left = hash_join.left(); let right = hash_join.right(); @@ -419,6 +435,7 @@ fn partitioned_hash_join(hash_join: &HashJoinExec) -> Result, &ConfigOptions) -> Result>; +#[tracing::instrument(level = "trace", skip(input, config_options))] /// Converts a hash join to a symmetric hash join in the case of infinite inputs on both sides. /// /// This subrule checks if a hash join can be replaced with a symmetric hash join when dealing @@ -598,6 +616,7 @@ fn hash_join_convert_symmetric_subrule( Ok(input) } +#[tracing::instrument(level = "trace", skip(input, _config_options))] /// This subrule will swap build/probe sides of a hash join depending on whether /// one of its inputs may produce an infinite stream of records. The rule ensures /// that the left (build) side of the hash join always operates on an input stream @@ -660,6 +679,7 @@ fn hash_join_swap_subrule( Ok(input) } +#[tracing::instrument(level = "trace", skip(hash_join))] /// This function swaps sides of a hash join to make it runnable even if one of /// its inputs are infinite. Note that this is not always possible; i.e. /// [`JoinType::Full`], [`JoinType::Right`], [`JoinType::RightAnti`] and @@ -687,6 +707,7 @@ fn swap_join_according_to_unboundedness( } } +#[tracing::instrument(level = "trace", skip(input, subrules, config_options))] /// Apply given `PipelineFixerSubrule`s to a given plan. This plan, along with /// auxiliary boundedness information, is in the `PipelineStatePropagator` object. fn apply_subrules( @@ -717,6 +738,7 @@ mod tests_statistical { use rstest::rstest; + #[tracing::instrument(level = "trace", skip())] /// Return statistcs for empty table fn empty_statistics() -> Statistics { Statistics { @@ -726,6 +748,7 @@ mod tests_statistical { } } + #[tracing::instrument(level = "trace", skip())] /// Get table thresholds: (num_rows, byte_size) fn get_thresholds() -> (usize, usize) { let optimizer_options = ConfigOptions::new().optimizer; @@ -735,6 +758,7 @@ mod tests_statistical { ) } + #[tracing::instrument(level = "trace", skip())] /// Return statistcs for small table fn small_statistics() -> Statistics { let (threshold_num_rows, threshold_byte_size) = get_thresholds(); @@ -745,6 +769,7 @@ mod tests_statistical { } } + #[tracing::instrument(level = "trace", skip())] /// Return statistcs for big table fn big_statistics() -> Statistics { let (threshold_num_rows, threshold_byte_size) = get_thresholds(); @@ -755,6 +780,7 @@ mod tests_statistical { } } + #[tracing::instrument(level = "trace", skip())] /// Return statistcs for big table fn bigger_statistics() -> Statistics { let (threshold_num_rows, threshold_byte_size) = get_thresholds(); @@ -765,6 +791,7 @@ mod tests_statistical { } } + #[tracing::instrument(level = "trace", skip())] fn create_big_and_small() -> (Arc, Arc) { let big = Arc::new(StatisticsExec::new( big_statistics(), @@ -778,6 +805,7 @@ mod tests_statistical { (big, small) } + #[tracing::instrument(level = "trace", skip(min, max, distinct_count))] /// Create a column statistics vector for a single column /// that has the given min/max/distinct_count properties. /// @@ -802,6 +830,7 @@ mod tests_statistical { }] } + #[tracing::instrument(level = "trace", skip())] /// Create join filter for NLJoinExec with expression `big_col > small_col` /// where both columns are 0-indexed and come from left and right inputs respectively fn nl_join_filter() -> Option { @@ -831,6 +860,7 @@ mod tests_statistical { )) } + #[tracing::instrument(level = "trace", skip())] /// Returns three plans with statistics of (min, max, distinct_count) /// * big 100K rows @ (0, 50k, 50k) /// * medium 10K rows @ (1k, 5k, 1k) @@ -1311,6 +1341,7 @@ mod tests_statistical { assert_col_expr(col, "c", 0); } + #[tracing::instrument(level = "trace", skip(expr, name, index))] fn assert_col_expr(expr: &Arc, name: &str, index: usize) { let col = expr .as_any() @@ -1448,6 +1479,7 @@ mod tests_statistical { check_join_partition_mode(big, empty, join_on, false, PartitionMode::Partitioned); } + #[tracing::instrument(level = "trace", skip(left, right, on, is_swapped, expected_mode))] fn check_join_partition_mode( left: Arc, right: Arc, @@ -1875,6 +1907,7 @@ mod hash_join_tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(t))] async fn test_join_with_maybe_swap_unbounded_case(t: TestCase) -> Result<()> { let left_unbounded = t.initial_sources_unbounded.0 == SourceType::Unbounded; let right_unbounded = t.initial_sources_unbounded.1 == SourceType::Unbounded; diff --git a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs index 950bb3c8eeb22..b33481a1a428e 100644 --- a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs +++ b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs @@ -38,11 +38,13 @@ use itertools::Itertools; pub struct LimitedDistinctAggregation {} impl LimitedDistinctAggregation { + #[tracing::instrument(level = "trace", skip())] /// Create a new `LimitedDistinctAggregation` pub fn new() -> Self { Self {} } + #[tracing::instrument(level = "trace", skip(aggr, limit))] fn transform_agg( aggr: &AggregateExec, limit: usize, @@ -66,6 +68,7 @@ impl LimitedDistinctAggregation { Some(Arc::new(new_aggr)) } + #[tracing::instrument(level = "trace", skip(plan))] /// transform_limit matches an `AggregateExec` as the child of a `LocalLimitExec` /// or `GlobalLimitExec` and pushes the limit into the aggregation as a soft limit when /// there is a group by, but no sorting, no aggregate expressions, and no filters in the @@ -151,12 +154,14 @@ impl LimitedDistinctAggregation { } impl Default for LimitedDistinctAggregation { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl PhysicalOptimizerRule for LimitedDistinctAggregation { + #[tracing::instrument(level = "trace", skip(self, plan, config))] fn optimize( &self, plan: Arc, @@ -180,10 +185,12 @@ impl PhysicalOptimizerRule for LimitedDistinctAggregation { } } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "LimitedDistinctAggregation" } + #[tracing::instrument(level = "trace", skip(self))] fn schema_check(&self) -> bool { true } @@ -215,6 +222,7 @@ mod tests { use datafusion_physical_plan::aggregates::AggregateMode; use datafusion_physical_plan::displayable; + #[tracing::instrument(level = "trace", skip())] fn mock_data() -> Result> { let schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Int32, true), @@ -250,6 +258,7 @@ mod tests { )?)) } + #[tracing::instrument(level = "trace", skip(plan, expected))] fn assert_plan_matches_expected( plan: &Arc, expected: &[&str], @@ -273,6 +282,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(plan, expected))] async fn assert_results_match_expected( plan: Arc, expected: &str, @@ -285,6 +295,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(input_schema, columns))] pub fn build_group_by( input_schema: &SchemaRef, columns: Vec, diff --git a/datafusion/core/src/physical_optimizer/optimizer.rs b/datafusion/core/src/physical_optimizer/optimizer.rs index 416985983dfef..f5b5b94199511 100644 --- a/datafusion/core/src/physical_optimizer/optimizer.rs +++ b/datafusion/core/src/physical_optimizer/optimizer.rs @@ -67,12 +67,14 @@ pub struct PhysicalOptimizer { } impl Default for PhysicalOptimizer { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl PhysicalOptimizer { + #[tracing::instrument(level = "trace", skip())] /// Create a new optimizer using the recommended list of rules pub fn new() -> Self { let rules: Vec> = vec![ @@ -134,6 +136,7 @@ impl PhysicalOptimizer { Self::with_rules(rules) } + #[tracing::instrument(level = "trace", skip(rules))] /// Create a new optimizer with the given rules pub fn with_rules(rules: Vec>) -> Self { Self { rules } diff --git a/datafusion/core/src/physical_optimizer/output_requirements.rs b/datafusion/core/src/physical_optimizer/output_requirements.rs index 5bf86e88d6464..dfec34da1293b 100644 --- a/datafusion/core/src/physical_optimizer/output_requirements.rs +++ b/datafusion/core/src/physical_optimizer/output_requirements.rs @@ -50,6 +50,7 @@ pub struct OutputRequirements { } impl OutputRequirements { + #[tracing::instrument(level = "trace", skip())] /// Create a new rule which works in `Add` mode; i.e. it simply adds a /// top-level [`OutputRequirementExec`] into the physical plan to keep track /// of global ordering and distribution requirements if there are any. @@ -60,6 +61,7 @@ impl OutputRequirements { } } + #[tracing::instrument(level = "trace", skip())] /// Create a new rule which works in `Remove` mode; i.e. it simply removes /// the top-level [`OutputRequirementExec`] from the physical plan if there is /// any. We do this because a `OutputRequirementExec` is an ancillary, @@ -94,6 +96,7 @@ pub(crate) struct OutputRequirementExec { } impl OutputRequirementExec { + #[tracing::instrument(level = "trace", skip(input, requirements, dist_requirement))] pub(crate) fn new( input: Arc, requirements: Option, @@ -108,10 +111,12 @@ impl OutputRequirementExec { } } + #[tracing::instrument(level = "trace", skip(self))] pub(crate) fn input(&self) -> Arc { self.input.clone() } + #[tracing::instrument(level = "trace", skip(input))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(input: &Arc) -> PlanProperties { PlanProperties::new( @@ -123,6 +128,7 @@ impl OutputRequirementExec { } impl DisplayAs for OutputRequirementExec { + #[tracing::instrument(level = "trace", skip(self, _t, f))] fn fmt_as( &self, _t: DisplayFormatType, @@ -133,38 +139,47 @@ impl DisplayAs for OutputRequirementExec { } impl ExecutionPlan for OutputRequirementExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "OutputRequirementExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn std::any::Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false] } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { vec![self.dist_requirement.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { vec![true] } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_ordering(&self) -> Vec>> { vec![self.order_requirement.clone()] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, mut children: Vec>, @@ -176,6 +191,7 @@ impl ExecutionPlan for OutputRequirementExec { ))) } + #[tracing::instrument(level = "trace", skip(self, _partition, _context))] fn execute( &self, _partition: usize, @@ -184,12 +200,14 @@ impl ExecutionPlan for OutputRequirementExec { unreachable!(); } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { self.input.statistics() } } impl PhysicalOptimizerRule for OutputRequirements { + #[tracing::instrument(level = "trace", skip(self, plan, _config))] fn optimize( &self, plan: Arc, @@ -211,15 +229,18 @@ impl PhysicalOptimizerRule for OutputRequirements { } } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "OutputRequirements" } + #[tracing::instrument(level = "trace", skip(self))] fn schema_check(&self) -> bool { true } } +#[tracing::instrument(level = "trace", skip(plan))] /// This functions adds ancillary `OutputRequirementExec` to the physical plan, so that /// global requirements are not lost during optimization. fn require_top_ordering(plan: Arc) -> Result> { @@ -237,6 +258,7 @@ fn require_top_ordering(plan: Arc) -> Result Self { Self {} @@ -45,6 +46,7 @@ impl PipelineChecker { } impl PhysicalOptimizerRule for PipelineChecker { + #[tracing::instrument(level = "trace", skip(self, plan, config))] fn optimize( &self, plan: Arc, @@ -54,15 +56,18 @@ impl PhysicalOptimizerRule for PipelineChecker { .data() } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "PipelineChecker" } + #[tracing::instrument(level = "trace", skip(self))] fn schema_check(&self) -> bool { true } } +#[tracing::instrument(level = "trace", skip(input, optimizer_options))] /// This function propagates finiteness information and rejects any plan with /// pipeline-breaking operators acting on infinite inputs. pub fn check_finiteness_requirements( @@ -87,6 +92,7 @@ pub fn check_finiteness_requirements( } } +#[tracing::instrument(level = "trace", skip(join))] /// This function returns whether a given symmetric hash join is amenable to /// data pruning. For this to be possible, it needs to have a filter where /// all involved [`PhysicalExpr`]s, [`Operator`]s and data types support diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs index e94bc985dee45..a9c2b10701835 100644 --- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs @@ -63,6 +63,7 @@ use itertools::Itertools; pub struct ProjectionPushdown {} impl ProjectionPushdown { + #[tracing::instrument(level = "trace", skip())] #[allow(missing_docs)] pub fn new() -> Self { Self {} @@ -70,6 +71,7 @@ impl ProjectionPushdown { } impl PhysicalOptimizerRule for ProjectionPushdown { + #[tracing::instrument(level = "trace", skip(self, plan, _config))] fn optimize( &self, plan: Arc, @@ -78,15 +80,18 @@ impl PhysicalOptimizerRule for ProjectionPushdown { plan.transform_down(remove_unnecessary_projections).data() } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "ProjectionPushdown" } + #[tracing::instrument(level = "trace", skip(self))] fn schema_check(&self) -> bool { true } } +#[tracing::instrument(level = "trace", skip(plan))] /// This function checks if `plan` is a [`ProjectionExec`], and inspects its /// input(s) to test whether it can push `plan` under its input(s). This function /// will operate on the entire tree and may ultimately remove `plan` entirely @@ -161,6 +166,7 @@ pub fn remove_unnecessary_projections( Ok(maybe_modified.map_or(Transformed::no(plan), Transformed::yes)) } +#[tracing::instrument(level = "trace", skip(projection, csv))] /// Tries to embed `projection` to its input (`csv`). If possible, returns /// [`CsvExec`] as the top plan. Otherwise, returns `None`. fn try_swapping_with_csv( @@ -190,6 +196,7 @@ fn try_swapping_with_csv( }) } +#[tracing::instrument(level = "trace", skip(projection, memory))] /// Tries to embed `projection` to its input (`memory`). If possible, returns /// [`MemoryExec`] as the top plan. Otherwise, returns `None`. fn try_swapping_with_memory( @@ -216,6 +223,7 @@ fn try_swapping_with_memory( .transpose() } +#[tracing::instrument(level = "trace", skip(projection, streaming_table))] /// Tries to embed `projection` to its input (`streaming table`). /// If possible, returns [`StreamingTableExec`] as the top plan. Otherwise, /// returns `None`. @@ -264,6 +272,7 @@ fn try_swapping_with_streaming_table( .map(|e| Some(Arc::new(e) as _)) } +#[tracing::instrument(level = "trace", skip(projection, child))] /// Unifies `projection` with its input (which is also a [`ProjectionExec`]). fn try_unifying_projections( projection: &ProjectionExec, @@ -309,6 +318,7 @@ fn try_unifying_projections( .map(|e| Some(Arc::new(e) as _)) } +#[tracing::instrument(level = "trace", skip(expr))] /// Checks if the given expression is trivial. /// An expression is considered trivial if it is either a `Column` or a `Literal`. fn is_expr_trivial(expr: &Arc) -> bool { @@ -316,6 +326,7 @@ fn is_expr_trivial(expr: &Arc) -> bool { || expr.as_any().downcast_ref::().is_some() } +#[tracing::instrument(level = "trace", skip(projection, output_req))] /// Tries to swap `projection` with its input (`output_req`). If possible, /// performs the swap and returns [`OutputRequirementExec`] as the top plan. /// Otherwise, returns `None`. @@ -367,6 +378,7 @@ fn try_swapping_with_output_req( .map(|e| Some(Arc::new(e) as _)) } +#[tracing::instrument(level = "trace", skip(projection))] /// Tries to swap `projection` with its input, which is known to be a /// [`CoalescePartitionsExec`]. If possible, performs the swap and returns /// [`CoalescePartitionsExec`] as the top plan. Otherwise, returns `None`. @@ -382,6 +394,7 @@ fn try_swapping_with_coalesce_partitions( .map(|e| Some(Arc::new(CoalescePartitionsExec::new(e)) as _)) } +#[tracing::instrument(level = "trace", skip(projection, filter))] /// Tries to swap `projection` with its input (`filter`). If possible, performs /// the swap and returns [`FilterExec`] as the top plan. Otherwise, returns `None`. fn try_swapping_with_filter( @@ -406,6 +419,7 @@ fn try_swapping_with_filter( .map(|e| Some(Arc::new(e) as _)) } +#[tracing::instrument(level = "trace", skip(projection, repartition))] /// Tries to swap the projection with its input [`RepartitionExec`]. If it can be done, /// it returns the new swapped version having the [`RepartitionExec`] as the top plan. /// Otherwise, it returns None. @@ -448,6 +462,7 @@ fn try_swapping_with_repartition( )?))) } +#[tracing::instrument(level = "trace", skip(projection, sort))] /// Tries to swap the projection with its input [`SortExec`]. If it can be done, /// it returns the new swapped version having the [`SortExec`] as the top plan. /// Otherwise, it returns None. @@ -478,6 +493,7 @@ fn try_swapping_with_sort( ))) } +#[tracing::instrument(level = "trace", skip(projection, spm))] /// Tries to swap the projection with its input [`SortPreservingMergeExec`]. /// If this is possible, it returns the new [`SortPreservingMergeExec`] whose /// child is a projection. Otherwise, it returns None. @@ -511,6 +527,7 @@ fn try_swapping_with_sort_preserving_merge( ))) } +#[tracing::instrument(level = "trace", skip(projection, union))] /// Tries to push `projection` down through `union`. If possible, performs the /// pushdown and returns a new [`UnionExec`] as the top plan which has projections /// as its children. Otherwise, returns `None`. @@ -532,6 +549,7 @@ fn try_pushdown_through_union( Ok(Some(Arc::new(UnionExec::new(new_children)))) } +#[tracing::instrument(level = "trace", skip(projection, hash_join))] /// Some projection can't be pushed down left input or right input of hash join because filter or on need may need some columns that won't be used in later. /// By embed those projection to hash join, we can reduce the cost of build_batch_from_indices in hash join (build_batch_from_indices need to can compute::take() for each column) and avoid unnecessary output creation. fn try_embed_to_hash_join( @@ -589,6 +607,7 @@ fn try_embed_to_hash_join( } } +#[tracing::instrument(level = "trace", skip(exprs))] /// Collect all column indices from the given projection expressions. fn collect_column_indices(exprs: &[(Arc, String)]) -> Vec { // Collect indices and remove duplicates. @@ -603,6 +622,7 @@ fn collect_column_indices(exprs: &[(Arc, String)]) -> Vec bool { }) && exprs.len() == projection.input().schema().fields().len() } +#[tracing::instrument(level = "trace", skip(exprs))] /// Given the expression set of a projection, checks if the projection causes /// any renaming or constructs a non-`Column` physical expression. fn all_alias_free_columns(exprs: &[(Arc, String)]) -> bool { @@ -923,6 +949,7 @@ fn all_alias_free_columns(exprs: &[(Arc, String)]) -> bool { }) } +#[tracing::instrument(level = "trace", skip(projection, source))] /// Updates a source provider's projected columns according to the given /// projection operator's expressions. To use this function safely, one must /// ensure that all expressions are `Column` expressions without aliases. @@ -941,6 +968,7 @@ fn new_projections_for_columns( .collect() } +#[tracing::instrument(level = "trace", skip(expr, projected_exprs, sync_with_child))] /// The function operates in two modes: /// /// 1) When `sync_with_child` is `true`: @@ -1020,6 +1048,7 @@ fn update_expr( new_expr.map(|e| (state == RewriteState::RewrittenValid).then_some(e)) } +#[tracing::instrument(level = "trace", skip(projection, child))] /// Creates a new [`ProjectionExec`] instance with the given child plan and /// projected expressions. fn make_with_child( @@ -1030,11 +1059,13 @@ fn make_with_child( .map(|e| Arc::new(e) as _) } +#[tracing::instrument(level = "trace", skip(exprs))] /// Returns `true` if all the expressions in the argument are `Column`s. fn all_columns(exprs: &[(Arc, String)]) -> bool { exprs.iter().all(|(expr, _)| expr.as_any().is::()) } +#[tracing::instrument(level = "trace", skip(exprs))] /// Downcasts all the expressions in `exprs` to `Column`s. If any of the given /// expressions is not a `Column`, returns `None`. fn physical_to_column_exprs( @@ -1050,6 +1081,7 @@ fn physical_to_column_exprs( .collect() } +#[tracing::instrument(level = "trace", skip(left_table_column_count, projection_as_columns))] /// Returns the last index before encountering a column coming from the right table when traveling /// through the projection from left to right, and the last index before encountering a column /// coming from the left table when traveling through the projection from right to left. @@ -1083,6 +1115,7 @@ fn join_table_borders( (far_right_left_col_ind, far_left_right_col_ind) } +#[tracing::instrument(level = "trace", skip(proj_left_exprs, proj_right_exprs, hash_join_on))] /// Tries to update the equi-join `Column`'s of a join as if the input of /// the join was replaced by a projection. fn update_join_on( @@ -1108,6 +1141,7 @@ fn update_join_on( } } +#[tracing::instrument(level = "trace", skip(hash_join_on, projection_exprs))] /// This function generates a new set of columns to be used in a hash join /// operation based on a set of equi-join conditions (`hash_join_on`) and a /// list of projection expressions (`projection_exprs`). @@ -1153,6 +1187,7 @@ fn new_columns_for_join_on( (new_columns.len() == hash_join_on.len()).then_some(new_columns) } +#[tracing::instrument(level = "trace", skip(projection_left_exprs, projection_right_exprs, join_filter, join_left, join_right))] /// Tries to update the column indices of a [`JoinFilter`] as if the input of /// the join was replaced by a projection. fn update_join_filter( @@ -1200,6 +1235,7 @@ fn update_join_filter( }) } +#[tracing::instrument(level = "trace", skip(join_filter, join_side, projection_exprs, join_child_schema))] /// This function determines and returns a vector of indices representing the /// positions of columns in `projection_exprs` that are involved in `join_filter`, /// and correspond to a particular side (`join_side`) of the join operation. @@ -1221,6 +1257,7 @@ fn new_indices_for_join_filter( .collect() } +#[tracing::instrument(level = "trace", skip(projection_as_columns, join_schema, far_right_left_col_ind, far_left_right_col_ind))] /// Checks three conditions for pushing a projection down through a join: /// - Projection must narrow the join output schema. /// - Columns coming from left/right tables must be collected at the left/right @@ -1241,6 +1278,7 @@ fn join_allows_pushdown( && far_left_right_col_ind < projection_as_columns.len() as i32 } +#[tracing::instrument(level = "trace", skip(projection_as_columns, far_right_left_col_ind, far_left_right_col_ind, left_child, right_child))] /// If pushing down the projection over this join's children seems possible, /// this function constructs the new [`ProjectionExec`]s that will come on top /// of the original children of the join. @@ -1317,6 +1355,7 @@ mod tests { } impl DummyUDF { + #[tracing::instrument(level = "trace", skip())] fn new() -> Self { Self { signature: Signature::variadic_any(Volatility::Immutable), @@ -1325,22 +1364,27 @@ mod tests { } impl ScalarUDFImpl for DummyUDF { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "dummy_udf" } + #[tracing::instrument(level = "trace", skip(self))] fn signature(&self) -> &Signature { &self.signature } + #[tracing::instrument(level = "trace", skip(self, _arg_types))] fn return_type(&self, _arg_types: &[DataType]) -> Result { Ok(DataType::Int32) } + #[tracing::instrument(level = "trace", skip(self, _args))] fn invoke(&self, _args: &[ColumnarValue]) -> Result { unimplemented!("DummyUDF::invoke") } @@ -1667,6 +1711,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] fn create_simple_csv_exec() -> Arc { let schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Int32, true), @@ -1695,6 +1740,7 @@ mod tests { )) } + #[tracing::instrument(level = "trace", skip())] fn create_projecting_csv_exec() -> Arc { let schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Int32, true), @@ -1722,6 +1768,7 @@ mod tests { )) } + #[tracing::instrument(level = "trace", skip())] fn create_projecting_memory_exec() -> Arc { let schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Int32, true), @@ -1806,9 +1853,11 @@ mod tests { schema: SchemaRef, } impl PartitionStream for DummyStreamPartition { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> &SchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self, _ctx))] fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { unreachable!() } diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs index 605ef9f9023fb..27bc298a14d59 100644 --- a/datafusion/core/src/physical_optimizer/pruning.rs +++ b/datafusion/core/src/physical_optimizer/pruning.rs @@ -477,6 +477,7 @@ pub struct PruningPredicate { } impl PruningPredicate { + #[tracing::instrument(level = "trace", skip(expr, schema))] /// Try to create a new instance of [`PruningPredicate`] /// /// This will translate the provided `expr` filter expression into @@ -516,6 +517,7 @@ impl PruningPredicate { }) } + #[tracing::instrument(level = "trace", skip(self, statistics))] /// For each set of statistics, evaluates the pruning predicate /// and returns a `bool` with the following meaning for a /// all rows whose values match the statistics: @@ -579,26 +581,31 @@ impl PruningPredicate { Ok(builder.build()) } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to the input schema pub fn schema(&self) -> &SchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self))] /// Returns a reference to the physical expr used to construct this pruning predicate pub fn orig_expr(&self) -> &Arc { &self.orig_expr } + #[tracing::instrument(level = "trace", skip(self))] /// Returns a reference to the predicate expr pub fn predicate_expr(&self) -> &Arc { &self.predicate_expr } + #[tracing::instrument(level = "trace", skip(self))] /// Returns a reference to the literal guarantees pub fn literal_guarantees(&self) -> &[LiteralGuarantee] { &self.literal_guarantees } + #[tracing::instrument(level = "trace", skip(self))] /// Returns true if this pruning predicate can not prune anything. /// /// This happens if the predicate is a literal `true` and @@ -607,10 +614,12 @@ impl PruningPredicate { is_always_true(&self.predicate_expr) && self.literal_guarantees.is_empty() } + #[tracing::instrument(level = "trace", skip(self))] pub(crate) fn required_columns(&self) -> &RequiredColumns { &self.required_columns } + #[tracing::instrument(level = "trace", skip(self))] /// Names of the columns that are known to be / not be in a set /// of literals (constants). These are the columns the that may be passed to /// [`PruningStatistics::contained`] during pruning. @@ -640,6 +649,7 @@ struct BoolVecBuilder { } impl BoolVecBuilder { + #[tracing::instrument(level = "trace", skip(num_containers))] /// Create a new `BoolVecBuilder` with `num_containers` elements fn new(num_containers: usize) -> Self { Self { @@ -648,6 +658,7 @@ impl BoolVecBuilder { } } + #[tracing::instrument(level = "trace", skip(self, array))] /// Combines result `array` for a conjunct (e.g. `AND` clause) of a /// predicate into the currently in progress array. /// @@ -667,6 +678,7 @@ impl BoolVecBuilder { } } + #[tracing::instrument(level = "trace", skip(self, value))] /// Combines the results in the [`ColumnarValue`] to the currently in /// progress array, following the same rules as [`Self::combine_array`]. /// @@ -688,17 +700,20 @@ impl BoolVecBuilder { } } + #[tracing::instrument(level = "trace", skip(self))] /// Convert this builder into a Vec of bools fn build(self) -> Vec { self.inner } + #[tracing::instrument(level = "trace", skip(self))] /// Check all containers has rows that DEFINITELY DO NOT pass the predicate fn check_all_pruned(&self) -> bool { self.inner.iter().all(|&x| !x) } } +#[tracing::instrument(level = "trace", skip(expr))] fn is_always_true(expr: &Arc) -> bool { expr.as_any() .downcast_ref::() @@ -726,10 +741,12 @@ pub(crate) struct RequiredColumns { } impl RequiredColumns { + #[tracing::instrument(level = "trace", skip())] fn new() -> Self { Self::default() } + #[tracing::instrument(level = "trace", skip(self))] /// Returns number of unique columns pub(crate) fn n_columns(&self) -> usize { self.iter() @@ -738,6 +755,7 @@ impl RequiredColumns { .len() } + #[tracing::instrument(level = "trace", skip(self))] /// Returns an iterator over items in columns (see doc on /// `self.columns` for details) pub(crate) fn iter( @@ -746,6 +764,7 @@ impl RequiredColumns { self.columns.iter() } + #[tracing::instrument(level = "trace", skip(self, column, statistics_type))] fn find_stat_column( &self, column: &phys_expr::Column, @@ -758,6 +777,7 @@ impl RequiredColumns { .map(|(i, (_c, _t, _f))| i) } + #[tracing::instrument(level = "trace", skip(self, column, column_expr, field, stat_type, suffix))] /// Rewrites column_expr so that all appearances of column /// are replaced with a reference to either the min or max /// statistics column, while keeping track that a reference to the statistics @@ -793,6 +813,7 @@ impl RequiredColumns { rewrite_column_expr(column_expr.clone(), column, &stat_column) } + #[tracing::instrument(level = "trace", skip(self, column, column_expr, field))] /// rewrite col --> col_min fn min_column_expr( &mut self, @@ -803,6 +824,7 @@ impl RequiredColumns { self.stat_column_expr(column, column_expr, field, StatisticsType::Min, "min") } + #[tracing::instrument(level = "trace", skip(self, column, column_expr, field))] /// rewrite col --> col_max fn max_column_expr( &mut self, @@ -813,6 +835,7 @@ impl RequiredColumns { self.stat_column_expr(column, column_expr, field, StatisticsType::Max, "max") } + #[tracing::instrument(level = "trace", skip(self, column, column_expr, field))] /// rewrite col --> col_null_count fn null_count_column_expr( &mut self, @@ -829,6 +852,7 @@ impl RequiredColumns { ) } + #[tracing::instrument(level = "trace", skip(self, column, column_expr, field))] /// rewrite col --> col_row_count fn row_count_column_expr( &mut self, @@ -847,11 +871,13 @@ impl RequiredColumns { } impl From> for RequiredColumns { + #[tracing::instrument(level = "trace", skip(columns))] fn from(columns: Vec<(phys_expr::Column, StatisticsType, Field)>) -> Self { Self { columns } } } +#[tracing::instrument(level = "trace", skip(statistics, required_columns))] /// Build a RecordBatch from a list of statistics, creating arrays, /// with one row for each PruningStatistics and columns specified in /// in the required_columns parameter. @@ -940,6 +966,7 @@ struct PruningExpressionBuilder<'a> { } impl<'a> PruningExpressionBuilder<'a> { + #[tracing::instrument(level = "trace", skip(left, right, op, schema, required_columns))] fn try_new( left: &'a Arc, right: &'a Arc, @@ -987,24 +1014,29 @@ impl<'a> PruningExpressionBuilder<'a> { }) } + #[tracing::instrument(level = "trace", skip(self))] fn op(&self) -> Operator { self.op } + #[tracing::instrument(level = "trace", skip(self))] fn scalar_expr(&self) -> &Arc { &self.scalar_expr } + #[tracing::instrument(level = "trace", skip(self))] fn min_column_expr(&mut self) -> Result> { self.required_columns .min_column_expr(&self.column, &self.column_expr, self.field) } + #[tracing::instrument(level = "trace", skip(self))] fn max_column_expr(&mut self) -> Result> { self.required_columns .max_column_expr(&self.column, &self.column_expr, self.field) } + #[tracing::instrument(level = "trace", skip(self))] /// This function is to simply retune the `null_count` physical expression no matter what the /// predicate expression is /// @@ -1025,6 +1057,7 @@ impl<'a> PruningExpressionBuilder<'a> { ) } + #[tracing::instrument(level = "trace", skip(self))] /// This function is to simply retune the `row_count` physical expression no matter what the /// predicate expression is /// @@ -1046,6 +1079,7 @@ impl<'a> PruningExpressionBuilder<'a> { } } +#[tracing::instrument(level = "trace", skip(column_expr, op, scalar_expr, schema))] /// This function is designed to rewrite the column_expr to /// ensure the column_expr is monotonically increasing. /// @@ -1131,6 +1165,7 @@ fn rewrite_expr_to_prunable( } } +#[tracing::instrument(level = "trace", skip(op))] fn is_compare_op(op: Operator) -> bool { matches!( op, @@ -1147,6 +1182,7 @@ fn is_compare_op(op: Operator) -> bool { // Must make sure the two type has order. // For example, casts from string to numbers is not correct. // Because the "13" is less than "3" with UTF8 comparison order. +#[tracing::instrument(level = "trace", skip(from_type, to_type))] fn verify_support_type_for_prune(from_type: &DataType, to_type: &DataType) -> Result<()> { // TODO: support other data type for prunable cast or try cast if matches!( @@ -1168,6 +1204,7 @@ fn verify_support_type_for_prune(from_type: &DataType, to_type: &DataType) -> Re } } +#[tracing::instrument(level = "trace", skip(e, column_old, column_new))] /// replaces a column with an old name with a new name in an expression fn rewrite_column_expr( e: Arc, @@ -1186,6 +1223,7 @@ fn rewrite_column_expr( .data() } +#[tracing::instrument(level = "trace", skip(op))] fn reverse_operator(op: Operator) -> Result { op.swap().ok_or_else(|| { DataFusionError::Internal(format!( @@ -1194,6 +1232,7 @@ fn reverse_operator(op: Operator) -> Result { }) } +#[tracing::instrument(level = "trace", skip(column, schema, required_columns, is_not))] /// Given a column reference to `column`, returns a pruning /// expression in terms of the min and max that will evaluate to true /// if the column may contain values, and false if definitely does not @@ -1235,6 +1274,7 @@ fn build_single_column_expr( } } +#[tracing::instrument(level = "trace", skip(expr, schema, required_columns, with_not))] /// Given an expression reference to `expr`, if `expr` is a column expression, /// returns a pruning expression in terms of IsNull that will evaluate to true /// if the column may contain null, and false if definitely does not @@ -1293,6 +1333,7 @@ fn build_is_null_column_expr( /// an OR chain const MAX_LIST_VALUE_SIZE_REWRITE: usize = 20; +#[tracing::instrument(level = "trace", skip(expr, schema, required_columns))] /// Translate logical filter expression into pruning predicate /// expression that will evaluate to FALSE if it can be determined no /// rows between the min/max values could pass the predicates. @@ -1413,6 +1454,7 @@ fn build_predicate_expression( build_statistics_expr(&mut expr_builder).unwrap_or(unhandled) } +#[tracing::instrument(level = "trace", skip(expr_builder))] fn build_statistics_expr( expr_builder: &mut PruningExpressionBuilder, ) -> Result> { @@ -1499,6 +1541,7 @@ fn build_statistics_expr( Ok(statistics_expr) } +#[tracing::instrument(level = "trace", skip(statistics_expr, expr_builder))] /// Wrap the statistics expression in a case expression. /// This is necessary to handle the case where the column is known /// to be all nulls. @@ -1587,9 +1630,11 @@ mod tests { } impl ContainerStats { + #[tracing::instrument(level = "trace", skip())] fn new() -> Self { Default::default() } + #[tracing::instrument(level = "trace", skip(min, max, precision, scale))] fn new_decimal128( min: impl IntoIterator>, max: impl IntoIterator>, @@ -1611,6 +1656,7 @@ mod tests { )) } + #[tracing::instrument(level = "trace", skip(min, max))] fn new_i64( min: impl IntoIterator>, max: impl IntoIterator>, @@ -1620,6 +1666,7 @@ mod tests { .with_max(Arc::new(max.into_iter().collect::())) } + #[tracing::instrument(level = "trace", skip(min, max))] fn new_i32( min: impl IntoIterator>, max: impl IntoIterator>, @@ -1629,6 +1676,7 @@ mod tests { .with_max(Arc::new(max.into_iter().collect::())) } + #[tracing::instrument(level = "trace", skip(min, max))] fn new_utf8<'a>( min: impl IntoIterator>, max: impl IntoIterator>, @@ -1638,6 +1686,7 @@ mod tests { .with_max(Arc::new(max.into_iter().collect::())) } + #[tracing::instrument(level = "trace", skip(min, max))] fn new_bool( min: impl IntoIterator>, max: impl IntoIterator>, @@ -1647,22 +1696,27 @@ mod tests { .with_max(Arc::new(max.into_iter().collect::())) } + #[tracing::instrument(level = "trace", skip(self))] fn min(&self) -> Option { self.min.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn max(&self) -> Option { self.max.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn null_counts(&self) -> Option { self.null_counts.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn row_counts(&self) -> Option { self.row_counts.clone() } + #[tracing::instrument(level = "trace", skip(self))] /// return an iterator over all arrays in this statistics fn arrays(&self) -> Vec { let contained_arrays = self @@ -1682,6 +1736,7 @@ mod tests { .collect() } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the number of containers represented by this statistics This /// picks the length of the first array as all arrays must have the same /// length (which is verified by `assert_invariants`). @@ -1690,6 +1745,7 @@ mod tests { self.arrays().iter().map(|a| a.len()).next().unwrap_or(0) } + #[tracing::instrument(level = "trace", skip(self))] /// Ensure that the lengths of all arrays are consistent fn assert_invariants(&self) { let mut prev_len = None; @@ -1707,18 +1763,21 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(self, min))] /// Add min values fn with_min(mut self, min: ArrayRef) -> Self { self.min = Some(min); self } + #[tracing::instrument(level = "trace", skip(self, max))] /// Add max values fn with_max(mut self, max: ArrayRef) -> Self { self.max = Some(max); self } + #[tracing::instrument(level = "trace", skip(self, counts))] /// Add null counts. There must be the same number of null counts as /// there are containers fn with_null_counts( @@ -1733,6 +1792,7 @@ mod tests { self } + #[tracing::instrument(level = "trace", skip(self, counts))] /// Add row counts. There must be the same number of row counts as /// there are containers fn with_row_counts( @@ -1747,6 +1807,7 @@ mod tests { self } + #[tracing::instrument(level = "trace", skip(self, values, contained))] /// Add contained information. pub fn with_contained( mut self, @@ -1761,6 +1822,7 @@ mod tests { self } + #[tracing::instrument(level = "trace", skip(self, find_values))] /// get any contained information for the specified values fn contained(&self, find_values: &HashSet) -> Option { // find the one with the matching values @@ -1778,10 +1840,12 @@ mod tests { } impl TestStatistics { + #[tracing::instrument(level = "trace", skip())] fn new() -> Self { Self::default() } + #[tracing::instrument(level = "trace", skip(self, name, container_stats))] fn with( mut self, name: impl Into, @@ -1792,6 +1856,7 @@ mod tests { self } + #[tracing::instrument(level = "trace", skip(self, name, counts))] /// Add null counts for the specified column. /// There must be the same number of null counts as /// there are containers @@ -1814,6 +1879,7 @@ mod tests { self } + #[tracing::instrument(level = "trace", skip(self, name, counts))] /// Add row counts for the specified column. /// There must be the same number of row counts as /// there are containers @@ -1836,6 +1902,7 @@ mod tests { self } + #[tracing::instrument(level = "trace", skip(self, name, values, contained))] /// Add contained information for the specified column. fn with_contained( mut self, @@ -1859,6 +1926,7 @@ mod tests { } impl PruningStatistics for TestStatistics { + #[tracing::instrument(level = "trace", skip(self, column))] fn min_values(&self, column: &Column) -> Option { self.stats .get(column) @@ -1866,6 +1934,7 @@ mod tests { .unwrap_or(None) } + #[tracing::instrument(level = "trace", skip(self, column))] fn max_values(&self, column: &Column) -> Option { self.stats .get(column) @@ -1873,6 +1942,7 @@ mod tests { .unwrap_or(None) } + #[tracing::instrument(level = "trace", skip(self))] fn num_containers(&self) -> usize { self.stats .values() @@ -1881,6 +1951,7 @@ mod tests { .unwrap_or(0) } + #[tracing::instrument(level = "trace", skip(self, column))] fn null_counts(&self, column: &Column) -> Option { self.stats .get(column) @@ -1888,6 +1959,7 @@ mod tests { .unwrap_or(None) } + #[tracing::instrument(level = "trace", skip(self, column))] fn row_counts(&self, column: &Column) -> Option { self.stats .get(column) @@ -1895,6 +1967,7 @@ mod tests { .unwrap_or(None) } + #[tracing::instrument(level = "trace", skip(self, column, values))] fn contained( &self, column: &Column, @@ -1914,26 +1987,32 @@ mod tests { } impl PruningStatistics for OneContainerStats { + #[tracing::instrument(level = "trace", skip(self, _column))] fn min_values(&self, _column: &Column) -> Option { self.min_values.clone() } + #[tracing::instrument(level = "trace", skip(self, _column))] fn max_values(&self, _column: &Column) -> Option { self.max_values.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn num_containers(&self) -> usize { self.num_containers } + #[tracing::instrument(level = "trace", skip(self, _column))] fn null_counts(&self, _column: &Column) -> Option { None } + #[tracing::instrument(level = "trace", skip(self, _column))] fn row_counts(&self, _column: &Column) -> Option { None } + #[tracing::instrument(level = "trace", skip(self, _column, _values))] fn contained( &self, _column: &Column, @@ -2852,6 +2931,7 @@ mod tests { ); } + #[tracing::instrument(level = "trace", skip())] /// Creates setup for boolean chunk pruning /// /// For predicate "b1" (boolean expr) @@ -2959,6 +3039,7 @@ mod tests { ); } + #[tracing::instrument(level = "trace", skip())] /// Creates a setup for chunk pruning, modeling a int32 column "i" /// with 5 different containers (e.g. RowGroups). They have [min, /// max]: @@ -3842,6 +3923,7 @@ mod tests { ); } + #[tracing::instrument(level = "trace", skip(expr, schema, statistics, expected))] /// prunes the specified expr with the specified schema and statistics, and /// ensures it returns expected. /// @@ -3862,6 +3944,7 @@ mod tests { assert_eq!(result, expected); } + #[tracing::instrument(level = "trace", skip(expr, schema, required_columns))] fn test_build_predicate_expression( expr: &Expr, schema: &Schema, @@ -3871,6 +3954,7 @@ mod tests { build_predicate_expression(&expr, schema, required_columns) } + #[tracing::instrument(level = "trace", skip(expr, schema))] fn logical2physical(expr: &Expr, schema: &Schema) -> Arc { let df_schema = schema.clone().to_dfschema().unwrap(); let execution_props = ExecutionProps::new(); diff --git a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs index 9168f7fc281c9..881f99effa3b5 100644 --- a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs +++ b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs @@ -41,6 +41,7 @@ use itertools::izip; /// (but somewhat slower) cousins. pub type OrderPreservationContext = PlanContext; +#[tracing::instrument(level = "trace", skip(opc))] /// Updates order-preservation data for all children of the given node. pub fn update_children(opc: &mut OrderPreservationContext) { for PlanContext { @@ -86,6 +87,7 @@ pub fn update_children(opc: &mut OrderPreservationContext) { opc.data = false; } +#[tracing::instrument(level = "trace", skip(sort_input, is_spr_better, is_spm_better))] /// Calculates the updated plan by replacing operators that lose ordering /// inside `sort_input` with their order-preserving variants. This will /// generate an alternative plan, which will be accepted or rejected later on @@ -141,6 +143,7 @@ fn plan_with_order_preserving_variants( sort_input.update_plan_from_children() } +#[tracing::instrument(level = "trace", skip(sort_input))] /// Calculates the updated plan by replacing operators that preserve ordering /// inside `sort_input` with their order-breaking variants. This will restore /// the original plan modified by [`plan_with_order_preserving_variants`]. @@ -191,6 +194,7 @@ fn plan_with_order_breaking_variants( Ok(sort_input) } +#[tracing::instrument(level = "trace", skip(requirements, is_spr_better, is_spm_better, config))] /// The `replace_with_order_preserving_variants` optimizer sub-rule tries to /// remove `SortExec`s from the physical plan by replacing operators that do /// not preserve ordering with their order-preserving variants; i.e. by replacing @@ -1342,6 +1346,7 @@ mod tests { // End test cases // Start test helpers + #[tracing::instrument(level = "trace", skip(name, schema))] fn sort_expr(name: &str, schema: &Schema) -> PhysicalSortExpr { let sort_opts = SortOptions { nulls_first: false, @@ -1350,11 +1355,13 @@ mod tests { sort_expr_options(name, schema, sort_opts) } + #[tracing::instrument(level = "trace", skip(name, schema))] fn sort_expr_default(name: &str, schema: &Schema) -> PhysicalSortExpr { let sort_opts = SortOptions::default(); sort_expr_options(name, schema, sort_opts) } + #[tracing::instrument(level = "trace", skip(name, schema, options))] fn sort_expr_options( name: &str, schema: &Schema, @@ -1366,6 +1373,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(sort_exprs, input, preserve_partitioning))] fn sort_exec( sort_exprs: impl IntoIterator, input: Arc, @@ -1378,6 +1386,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip(sort_exprs, input))] fn sort_preserving_merge_exec( sort_exprs: impl IntoIterator, input: Arc, @@ -1386,6 +1395,7 @@ mod tests { Arc::new(SortPreservingMergeExec::new(sort_exprs, input)) } + #[tracing::instrument(level = "trace", skip(input))] fn repartition_exec_round_robin( input: Arc, ) -> Arc { @@ -1394,6 +1404,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip(input))] fn repartition_exec_hash(input: Arc) -> Arc { let input_schema = input.schema(); Arc::new( @@ -1405,6 +1416,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip(input))] fn filter_exec(input: Arc) -> Arc { let input_schema = input.schema(); let predicate = expressions::binary( @@ -1417,14 +1429,17 @@ mod tests { Arc::new(FilterExec::try_new(predicate, input).unwrap()) } + #[tracing::instrument(level = "trace", skip(input))] fn coalesce_batches_exec(input: Arc) -> Arc { Arc::new(CoalesceBatchesExec::new(input, 8192)) } + #[tracing::instrument(level = "trace", skip(input))] fn coalesce_partitions_exec(input: Arc) -> Arc { Arc::new(CoalescePartitionsExec::new(input)) } + #[tracing::instrument(level = "trace", skip(left, right))] fn hash_join_exec( left: Arc, right: Arc, @@ -1448,6 +1463,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip())] fn create_test_schema() -> Result { let column_a = Field::new("a", DataType::Int32, false); let column_b = Field::new("b", DataType::Int32, false); @@ -1459,6 +1475,7 @@ mod tests { } // creates a stream exec source for the test purposes + #[tracing::instrument(level = "trace", skip(schema, sort_exprs))] fn stream_exec_ordered( schema: &SchemaRef, sort_exprs: impl IntoIterator, @@ -1483,6 +1500,7 @@ mod tests { // creates a csv exec source for the test purposes // projection and has_header parameters are given static due to testing needs + #[tracing::instrument(level = "trace", skip(schema, sort_exprs))] fn csv_exec_sorted( schema: &SchemaRef, sort_exprs: impl IntoIterator, diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs index c527819e77464..f849967770872 100644 --- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs @@ -46,6 +46,7 @@ use datafusion_physical_expr::{ /// [`EnforceSorting`]: crate::physical_optimizer::enforce_sorting::EnforceSorting pub type SortPushDown = PlanContext>>; +#[tracing::instrument(level = "trace", skip(node))] /// Assigns the ordering requirement of the root node to the its children. pub fn assign_initial_requirements(node: &mut SortPushDown) { let reqs = node.plan.required_input_ordering(); @@ -54,6 +55,7 @@ pub fn assign_initial_requirements(node: &mut SortPushDown) { } } +#[tracing::instrument(level = "trace", skip(requirements))] pub(crate) fn pushdown_sorts( mut requirements: SortPushDown, ) -> Result> { @@ -115,6 +117,7 @@ pub(crate) fn pushdown_sorts( Ok(Transformed::yes(requirements)) } +#[tracing::instrument(level = "trace", skip(plan, parent_required))] fn pushdown_requirement_to_children( plan: &Arc, parent_required: LexRequirementRef, @@ -211,6 +214,7 @@ fn pushdown_requirement_to_children( // TODO: Add support for Projection push down } +#[tracing::instrument(level = "trace", skip(parent_required, request_child, child_plan))] /// Determine children requirements: /// - If children requirements are more specific, do not push down parent /// requirements. @@ -239,6 +243,7 @@ fn determine_children_requirement( RequirementsCompatibility::NonCompatible } } +#[tracing::instrument(level = "trace", skip(smj, parent_required, sort_expr, push_side))] fn try_pushdown_requirements_to_join( smj: &SortMergeJoinExec, parent_required: LexRequirementRef, @@ -281,6 +286,7 @@ fn try_pushdown_requirements_to_join( })) } +#[tracing::instrument(level = "trace", skip(required_exprs, join_type, left_columns_len))] fn expr_source_side( required_exprs: &[PhysicalSortExpr], join_type: JoinType, @@ -329,6 +335,7 @@ fn expr_source_side( } } +#[tracing::instrument(level = "trace", skip(parent_required, left_columns_len))] fn shift_right_required( parent_required: LexRequirementRef, left_columns_len: usize, diff --git a/datafusion/core/src/physical_optimizer/test_utils.rs b/datafusion/core/src/physical_optimizer/test_utils.rs index fdb3e46fd8012..2e6077b132f70 100644 --- a/datafusion/core/src/physical_optimizer/test_utils.rs +++ b/datafusion/core/src/physical_optimizer/test_utils.rs @@ -51,6 +51,7 @@ use datafusion_physical_plan::tree_node::PlanContext; use async_trait::async_trait; +#[tracing::instrument(level = "trace", skip(ctx, table_name, infinite))] async fn register_current_csv( ctx: &SessionContext, table_name: &str, @@ -94,12 +95,14 @@ pub struct UnaryTestCase { #[async_trait] impl SqlTestCase for UnaryTestCase { + #[tracing::instrument(level = "trace", skip(self, ctx))] async fn register_table(&self, ctx: &SessionContext) -> Result<()> { let table_is_infinite = self.source_type == SourceType::Unbounded; register_current_csv(ctx, "test", table_is_infinite).await?; Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] fn expect_fail(&self) -> bool { self.expect_fail } @@ -112,6 +115,7 @@ pub struct BinaryTestCase { #[async_trait] impl SqlTestCase for BinaryTestCase { + #[tracing::instrument(level = "trace", skip(self, ctx))] async fn register_table(&self, ctx: &SessionContext) -> Result<()> { let left_table_is_infinite = self.source_types.0 == SourceType::Unbounded; let right_table_is_infinite = self.source_types.1 == SourceType::Unbounded; @@ -120,6 +124,7 @@ impl SqlTestCase for BinaryTestCase { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] fn expect_fail(&self) -> bool { self.expect_fail } @@ -132,6 +137,7 @@ pub struct QueryCase { } impl QueryCase { + #[tracing::instrument(level = "trace", skip(self))] /// Run the test cases pub(crate) async fn run(&self) -> Result<()> { for case in &self.cases { @@ -146,6 +152,7 @@ impl QueryCase { } Ok(()) } + #[tracing::instrument(level = "trace", skip(self, ctx, error))] async fn run_case(&self, ctx: SessionContext, error: Option<&String>) -> Result<()> { let dataframe = ctx.sql(self.sql.as_str()).await?; let plan = dataframe.create_physical_plan().await; @@ -164,6 +171,7 @@ impl QueryCase { } } +#[tracing::instrument(level = "trace", skip(left, right, join_on, join_type))] pub fn sort_merge_join_exec( left: Arc, right: Arc, @@ -184,11 +192,13 @@ pub fn sort_merge_join_exec( ) } +#[tracing::instrument(level = "trace", skip(name, schema))] /// make PhysicalSortExpr with default options pub fn sort_expr(name: &str, schema: &Schema) -> PhysicalSortExpr { sort_expr_options(name, schema, SortOptions::default()) } +#[tracing::instrument(level = "trace", skip(name, schema, options))] /// PhysicalSortExpr with specified options pub fn sort_expr_options( name: &str, @@ -201,14 +211,17 @@ pub fn sort_expr_options( } } +#[tracing::instrument(level = "trace", skip(input))] pub fn coalesce_partitions_exec(input: Arc) -> Arc { Arc::new(CoalescePartitionsExec::new(input)) } +#[tracing::instrument(level = "trace", skip(schema))] pub(crate) fn memory_exec(schema: &SchemaRef) -> Arc { Arc::new(MemoryExec::try_new(&[vec![]], schema.clone(), None).unwrap()) } +#[tracing::instrument(level = "trace", skip(left, right, on, filter, join_type))] pub fn hash_join_exec( left: Arc, right: Arc, @@ -228,6 +241,7 @@ pub fn hash_join_exec( )?)) } +#[tracing::instrument(level = "trace", skip(col_name, sort_exprs, input))] pub fn bounded_window_exec( col_name: &str, sort_exprs: impl IntoIterator, @@ -257,6 +271,7 @@ pub fn bounded_window_exec( ) } +#[tracing::instrument(level = "trace", skip(predicate, input))] pub fn filter_exec( predicate: Arc, input: Arc, @@ -264,6 +279,7 @@ pub fn filter_exec( Arc::new(FilterExec::try_new(predicate, input).unwrap()) } +#[tracing::instrument(level = "trace", skip(sort_exprs, input))] pub fn sort_preserving_merge_exec( sort_exprs: impl IntoIterator, input: Arc, @@ -272,6 +288,7 @@ pub fn sort_preserving_merge_exec( Arc::new(SortPreservingMergeExec::new(sort_exprs, input)) } +#[tracing::instrument(level = "trace", skip(schema))] /// Create a non sorted parquet exec pub fn parquet_exec(schema: &SchemaRef) -> Arc { Arc::new(ParquetExec::new( @@ -293,6 +310,7 @@ pub fn parquet_exec(schema: &SchemaRef) -> Arc { } // Created a sorted parquet exec +#[tracing::instrument(level = "trace", skip(schema, sort_exprs))] pub fn parquet_exec_sorted( schema: &SchemaRef, sort_exprs: impl IntoIterator, @@ -317,26 +335,32 @@ pub fn parquet_exec_sorted( )) } +#[tracing::instrument(level = "trace", skip(input))] pub fn union_exec(input: Vec>) -> Arc { Arc::new(UnionExec::new(input)) } +#[tracing::instrument(level = "trace", skip(input))] pub fn limit_exec(input: Arc) -> Arc { global_limit_exec(local_limit_exec(input)) } +#[tracing::instrument(level = "trace", skip(input))] pub fn local_limit_exec(input: Arc) -> Arc { Arc::new(LocalLimitExec::new(input, 100)) } +#[tracing::instrument(level = "trace", skip(input))] pub fn global_limit_exec(input: Arc) -> Arc { Arc::new(GlobalLimitExec::new(input, 0, Some(100))) } +#[tracing::instrument(level = "trace", skip(input))] pub fn repartition_exec(input: Arc) -> Arc { Arc::new(RepartitionExec::try_new(input, Partitioning::RoundRobinBatch(10)).unwrap()) } +#[tracing::instrument(level = "trace", skip(input))] pub fn spr_repartition_exec(input: Arc) -> Arc { Arc::new( RepartitionExec::try_new(input, Partitioning::RoundRobinBatch(10)) @@ -345,6 +369,7 @@ pub fn spr_repartition_exec(input: Arc) -> Arc) -> Arc { let schema = input.schema(); Arc::new( @@ -360,10 +385,12 @@ pub fn aggregate_exec(input: Arc) -> Arc { ) } +#[tracing::instrument(level = "trace", skip(input))] pub fn coalesce_batches_exec(input: Arc) -> Arc { Arc::new(CoalesceBatchesExec::new(input, 128)) } +#[tracing::instrument(level = "trace", skip(sort_exprs, input))] pub fn sort_exec( sort_exprs: impl IntoIterator, input: Arc, @@ -372,6 +399,7 @@ pub fn sort_exec( Arc::new(SortExec::new(sort_exprs, input)) } +#[tracing::instrument(level = "trace", skip(context))] /// A [`PlanContext`] object is susceptible to being left in an inconsistent state after /// untested mutable operations. It is crucial that there be no discrepancies between a plan /// associated with the root node and the plan generated after traversing all nodes diff --git a/datafusion/core/src/physical_optimizer/topk_aggregation.rs b/datafusion/core/src/physical_optimizer/topk_aggregation.rs index 7c0519eda3b3b..aec581a79a39f 100644 --- a/datafusion/core/src/physical_optimizer/topk_aggregation.rs +++ b/datafusion/core/src/physical_optimizer/topk_aggregation.rs @@ -40,11 +40,13 @@ use itertools::Itertools; pub struct TopKAggregation {} impl TopKAggregation { + #[tracing::instrument(level = "trace", skip())] /// Create a new `LimitAggregation` pub fn new() -> Self { Self {} } + #[tracing::instrument(level = "trace", skip(aggr, order, limit))] fn transform_agg( aggr: &AggregateExec, order: &PhysicalSortExpr, @@ -84,6 +86,7 @@ impl TopKAggregation { Some(Arc::new(new_aggr)) } + #[tracing::instrument(level = "trace", skip(plan))] fn transform_sort(plan: Arc) -> Option> { let sort = plan.as_any().downcast_ref::()?; @@ -129,12 +132,14 @@ impl TopKAggregation { } impl Default for TopKAggregation { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl PhysicalOptimizerRule for TopKAggregation { + #[tracing::instrument(level = "trace", skip(self, plan, config))] fn optimize( &self, plan: Arc, @@ -156,10 +161,12 @@ impl PhysicalOptimizerRule for TopKAggregation { } } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "LimitAggregation" } + #[tracing::instrument(level = "trace", skip(self))] fn schema_check(&self) -> bool { true } diff --git a/datafusion/core/src/physical_optimizer/utils.rs b/datafusion/core/src/physical_optimizer/utils.rs index 2c0d042281e6f..b6a3d93108870 100644 --- a/datafusion/core/src/physical_optimizer/utils.rs +++ b/datafusion/core/src/physical_optimizer/utils.rs @@ -31,6 +31,7 @@ use datafusion_physical_expr::{LexRequirement, PhysicalSortRequirement}; use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec}; use datafusion_physical_plan::tree_node::PlanContext; +#[tracing::instrument(level = "trace", skip(node, sort_requirements, fetch))] /// This utility function adds a `SortExec` above an operator according to the /// given ordering requirements while preserving the original partitioning. pub fn add_sort_above( @@ -52,6 +53,7 @@ pub fn add_sort_above( PlanContext::new(Arc::new(new_sort), T::default(), vec![node]) } +#[tracing::instrument(level = "trace", skip(node, sort_requirements, fetch))] /// This utility function adds a `SortExec` above an operator according to the /// given ordering requirements while preserving the original partitioning. If /// requirement is already satisfied no `SortExec` is added. @@ -71,38 +73,45 @@ pub fn add_sort_above_with_check( } } +#[tracing::instrument(level = "trace", skip(plan))] /// Checks whether the given operator is a limit; /// i.e. either a [`LocalLimitExec`] or a [`GlobalLimitExec`]. pub fn is_limit(plan: &Arc) -> bool { plan.as_any().is::() || plan.as_any().is::() } +#[tracing::instrument(level = "trace", skip(plan))] /// Checks whether the given operator is a window; /// i.e. either a [`WindowAggExec`] or a [`BoundedWindowAggExec`]. pub fn is_window(plan: &Arc) -> bool { plan.as_any().is::() || plan.as_any().is::() } +#[tracing::instrument(level = "trace", skip(plan))] /// Checks whether the given operator is a [`SortExec`]. pub fn is_sort(plan: &Arc) -> bool { plan.as_any().is::() } +#[tracing::instrument(level = "trace", skip(plan))] /// Checks whether the given operator is a [`SortPreservingMergeExec`]. pub fn is_sort_preserving_merge(plan: &Arc) -> bool { plan.as_any().is::() } +#[tracing::instrument(level = "trace", skip(plan))] /// Checks whether the given operator is a [`CoalescePartitionsExec`]. pub fn is_coalesce_partitions(plan: &Arc) -> bool { plan.as_any().is::() } +#[tracing::instrument(level = "trace", skip(plan))] /// Checks whether the given operator is a [`UnionExec`]. pub fn is_union(plan: &Arc) -> bool { plan.as_any().is::() } +#[tracing::instrument(level = "trace", skip(plan))] /// Checks whether the given operator is a [`RepartitionExec`]. pub fn is_repartition(plan: &Arc) -> bool { plan.as_any().is::() diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 597a03a52f218..1a2c5fd0e0458 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -104,6 +104,7 @@ use log::{debug, trace}; use sqlparser::ast::NullTreatment; use tokio::sync::Mutex; +#[tracing::instrument(level = "trace", skip(fun, distinct, args, order_by))] fn create_function_physical_name( fun: &str, distinct: bool, @@ -127,10 +128,12 @@ fn create_function_physical_name( .unwrap_or(phys_name)) } +#[tracing::instrument(level = "trace", skip(e))] fn physical_name(e: &Expr) -> Result { create_physical_name(e, true) } +#[tracing::instrument(level = "trace", skip(e, is_first_expr))] fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { match e { Expr::Unnest(_) => { @@ -434,6 +437,7 @@ pub struct DefaultPhysicalPlanner { #[async_trait] impl PhysicalPlanner for DefaultPhysicalPlanner { + #[tracing::instrument(level = "trace", skip(self, logical_plan, session_state))] /// Create a physical plan from a logical plan async fn create_physical_plan( &self, @@ -452,6 +456,7 @@ impl PhysicalPlanner for DefaultPhysicalPlanner { } } + #[tracing::instrument(level = "trace", skip(self, expr, input_dfschema, session_state))] /// Create a physical expression from a logical expression /// suitable for evaluation /// @@ -494,6 +499,7 @@ enum ChildrenContainer { } impl ChildrenContainer { + #[tracing::instrument(level = "trace", skip(self))] fn one(self) -> Result> { match self { Self::One(p) => Ok(p), @@ -501,6 +507,7 @@ impl ChildrenContainer { } } + #[tracing::instrument(level = "trace", skip(self))] fn two(self) -> Result<[Arc; 2]> { match self { Self::Multiple(v) if v.len() == 2 => Ok(v.try_into().unwrap()), @@ -508,6 +515,7 @@ impl ChildrenContainer { } } + #[tracing::instrument(level = "trace", skip(self))] fn vec(self) -> Vec> { match self { Self::None => vec![], @@ -526,6 +534,7 @@ struct LogicalNode<'a> { } impl DefaultPhysicalPlanner { + #[tracing::instrument(level = "trace", skip(extension_planners))] /// Create a physical planner that uses `extension_planners` to /// plan user-defined logical nodes [`LogicalPlan::Extension`]. /// The planner uses the first [`ExtensionPlanner`] to return a non-`None` @@ -536,6 +545,7 @@ impl DefaultPhysicalPlanner { Self { extension_planners } } + #[tracing::instrument(level = "trace", skip(self, logical_plan, session_state))] /// Create a physical plan from a logical plan async fn create_initial_plan( &self, @@ -611,6 +621,7 @@ impl DefaultPhysicalPlanner { Ok(plan) } + #[tracing::instrument(level = "trace", skip(self, leaf_starter_index, flat_tree, session_state))] /// These tasks start at a leaf and traverse up the tree towards the root, building /// an ExecutionPlan as they go. When they reach a node with two or more children, /// they append their current result (a child of the parent node) to the children @@ -695,6 +706,7 @@ impl DefaultPhysicalPlanner { Ok(Some(plan)) } + #[tracing::instrument(level = "trace", skip(self, node, session_state, children))] /// Given a single LogicalPlan node, map it to it's physical ExecutionPlan counterpart. async fn map_logical_node_to_physical( &self, @@ -1508,6 +1520,7 @@ impl DefaultPhysicalPlanner { Ok(exec_node) } + #[tracing::instrument(level = "trace", skip(self, group_expr, input_dfschema, input_schema, session_state))] fn create_grouping_physical_expr( &self, group_expr: &[Expr], @@ -1560,6 +1573,7 @@ impl DefaultPhysicalPlanner { } } +#[tracing::instrument(level = "trace", skip(grouping_sets, input_dfschema, input_schema, session_state))] /// Expand and align a GROUPING SET expression. /// (see ) /// @@ -1618,6 +1632,7 @@ fn merge_grouping_set_physical_expr( )) } +#[tracing::instrument(level = "trace", skip(exprs, input_dfschema, input_schema, session_state))] /// Expand and align a CUBE expression. This is a special case of GROUPING SETS /// (see ) fn create_cube_physical_expr( @@ -1660,6 +1675,7 @@ fn create_cube_physical_expr( Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups)) } +#[tracing::instrument(level = "trace", skip(exprs, input_dfschema, input_schema, session_state))] /// Expand and align a ROLLUP expression. This is a special case of GROUPING SETS /// (see ) fn create_rollup_physical_expr( @@ -1705,6 +1721,7 @@ fn create_rollup_physical_expr( Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups)) } +#[tracing::instrument(level = "trace", skip(expr, input_dfschema, input_schema, session_state))] /// For a given logical expr, get a properly typed NULL ScalarValue physical expression fn get_null_physical_expr_pair( expr: &Expr, @@ -1723,6 +1740,7 @@ fn get_null_physical_expr_pair( Ok((Arc::new(null_value), physical_name)) } +#[tracing::instrument(level = "trace", skip(expr, input_dfschema, session_state))] fn get_physical_expr_pair( expr: &Expr, input_dfschema: &DFSchema, @@ -1734,6 +1752,7 @@ fn get_physical_expr_pair( Ok((physical_expr, physical_name)) } +#[tracing::instrument(level = "trace", skip(window_frame))] /// Check if window bounds are valid after schema information is available, and /// window_frame bounds are casted to the corresponding column type. /// queries like: @@ -1754,6 +1773,7 @@ pub fn is_window_frame_bound_valid(window_frame: &WindowFrame) -> bool { } } +#[tracing::instrument(level = "trace", skip(e, name, logical_schema, execution_props))] /// Create a window expression with a name from a logical expression pub fn create_window_expr_with_name( e: &Expr, @@ -1804,6 +1824,7 @@ pub fn create_window_expr_with_name( } } +#[tracing::instrument(level = "trace", skip(e, logical_schema, execution_props))] /// Create a window expression from a logical expression or an alias pub fn create_window_expr( e: &Expr, @@ -1826,6 +1847,7 @@ type AggregateExprWithOptionalArgs = ( Option>, ); +#[tracing::instrument(level = "trace", skip(e, name, logical_input_schema, physical_input_schema, execution_props))] /// Create an aggregate expression with a name from a logical expression pub fn create_aggregate_expr_with_name_and_maybe_filter( e: &Expr, @@ -1912,6 +1934,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter( } } +#[tracing::instrument(level = "trace", skip(e, logical_input_schema, physical_input_schema, execution_props))] /// Create an aggregate expression from a logical expression or an alias pub fn create_aggregate_expr_and_maybe_filter( e: &Expr, @@ -1934,6 +1957,7 @@ pub fn create_aggregate_expr_and_maybe_filter( ) } +#[tracing::instrument(level = "trace", skip(e, input_dfschema, execution_props))] /// Create a physical sort expression from a logical expression pub fn create_physical_sort_expr( e: &Expr, @@ -1958,6 +1982,7 @@ pub fn create_physical_sort_expr( } } +#[tracing::instrument(level = "trace", skip(exprs, input_dfschema, execution_props))] /// Create vector of physical sort expression from a vector of logical expression pub fn create_physical_sort_exprs( exprs: &[Expr], @@ -1971,6 +1996,7 @@ pub fn create_physical_sort_exprs( } impl DefaultPhysicalPlanner { + #[tracing::instrument(level = "trace", skip(self, logical_plan, session_state))] /// Handles capturing the various plans for EXPLAIN queries /// /// Returns @@ -2086,6 +2112,7 @@ impl DefaultPhysicalPlanner { } } + #[tracing::instrument(level = "trace", skip(self, plan, session_state, observer))] /// Optimize a physical plan by applying each physical optimizer, /// calling observer(plan, optimizer after each one) fn optimize_internal( @@ -2143,6 +2170,7 @@ impl DefaultPhysicalPlanner { } // return an record_batch which describes a table's schema. + #[tracing::instrument(level = "trace", skip(self, table_schema, output_schema))] fn plan_describe( &self, table_schema: Arc, @@ -2179,6 +2207,7 @@ impl DefaultPhysicalPlanner { Ok(Arc::new(mem_exec)) } + #[tracing::instrument(level = "trace", skip(self, session_state, input_exec, input, expr))] fn create_project_physical_exec( &self, session_state: &SessionState, @@ -2233,6 +2262,7 @@ impl DefaultPhysicalPlanner { } } +#[tracing::instrument(level = "trace", skip(value))] fn tuple_err(value: (Result, Result)) -> Result<(T, R)> { match value { (Ok(e), Ok(e1)) => Ok((e, e1)), @@ -2268,6 +2298,7 @@ mod tests { }; use datafusion_physical_expr::EquivalenceProperties; + #[tracing::instrument(level = "trace", skip())] fn make_session_state() -> SessionState { let runtime = Arc::new(RuntimeEnv::default()); let config = SessionConfig::new().with_target_partitions(4); @@ -2275,6 +2306,7 @@ mod tests { SessionState::new_with_config_rt(config, runtime) } + #[tracing::instrument(level = "trace", skip(logical_plan))] async fn plan(logical_plan: &LogicalPlan) -> Result> { let session_state = make_session_state(); // optimize the logical plan @@ -2570,6 +2602,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] /// Return a `null` literal representing a struct type like: `{ a: bool }` fn struct_literal() -> Expr { let struct_literal = ScalarValue::try_from(DataType::Struct( @@ -2760,6 +2793,7 @@ mod tests { #[async_trait] impl ExtensionPlanner for ErrorExtensionPlanner { + #[tracing::instrument(level = "trace", skip(self, _planner, _node, _logical_inputs, _physical_inputs, _session_state))] /// Create a physical plan for an extension node async fn plan_extension( &self, @@ -2779,6 +2813,7 @@ mod tests { } impl Default for NoOpExtensionNode { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self { schema: DFSchemaRef::new( @@ -2793,32 +2828,39 @@ mod tests { } impl Debug for NoOpExtensionNode { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "NoOp") } } impl UserDefinedLogicalNodeCore for NoOpExtensionNode { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { "NoOp" } + #[tracing::instrument(level = "trace", skip(self))] fn inputs(&self) -> Vec<&LogicalPlan> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> &DFSchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self))] fn expressions(&self) -> Vec { vec![] } + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "NoOp") } + #[tracing::instrument(level = "trace", skip(self, _exprs, _inputs))] fn from_template(&self, _exprs: &[Expr], _inputs: &[LogicalPlan]) -> Self { unimplemented!("NoOp"); } @@ -2830,11 +2872,13 @@ mod tests { } impl NoOpExecutionPlan { + #[tracing::instrument(level = "trace", skip(schema))] fn new(schema: SchemaRef) -> Self { let cache = Self::compute_properties(schema.clone()); Self { cache } } + #[tracing::instrument(level = "trace", skip(schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -2849,6 +2893,7 @@ mod tests { } impl DisplayAs for NoOpExecutionPlan { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { @@ -2859,23 +2904,28 @@ mod tests { } impl ExecutionPlan for NoOpExecutionPlan { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "NoOpExecutionPlan" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self, _children))] fn with_new_children( self: Arc, _children: Vec>, @@ -2883,6 +2933,7 @@ mod tests { unimplemented!("NoOpExecutionPlan::with_new_children"); } + #[tracing::instrument(level = "trace", skip(self, _partition, _context))] fn execute( &self, _partition: usize, @@ -2898,6 +2949,7 @@ mod tests { #[async_trait] impl ExtensionPlanner for BadExtensionPlanner { + #[tracing::instrument(level = "trace", skip(self, _planner, _node, _logical_inputs, _physical_inputs, _session_state))] /// Create a physical plan for an extension node async fn plan_extension( &self, @@ -2913,6 +2965,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(name))] async fn test_csv_scan_with_name(name: &str) -> Result { let ctx = SessionContext::new(); let testdata = crate::test_util::arrow_test_data(); @@ -2937,6 +2990,7 @@ mod tests { Ok(LogicalPlanBuilder::from(logical_plan)) } + #[tracing::instrument(level = "trace", skip())] async fn test_csv_scan() -> Result { let ctx = SessionContext::new(); let testdata = crate::test_util::arrow_test_data(); diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs index 4658b22074fa4..3df737dee2643 100644 --- a/datafusion/core/src/test/mod.rs +++ b/datafusion/core/src/test/mod.rs @@ -61,6 +61,7 @@ use xz2::write::XzEncoder; #[cfg(feature = "compression")] use zstd::Encoder as ZstdEncoder; +#[tracing::instrument(level = "trace", skip())] pub fn create_table_dual() -> Arc { let dual_schema = Arc::new(Schema::new(vec![ Field::new("id", DataType::Int32, false), @@ -78,6 +79,7 @@ pub fn create_table_dual() -> Arc { Arc::new(provider) } +#[tracing::instrument(level = "trace", skip(partitions, work_dir))] /// Returns a [`CsvExec`] that scans "aggregate_test_100.csv" with `partitions` partitions pub fn scan_partitioned_csv(partitions: usize, work_dir: &Path) -> Result> { let schema = aggr_test_schema(); @@ -102,6 +104,7 @@ pub fn scan_partitioned_csv(partitions: usize, work_dir: &Path) -> Result>`] for scanning `partitions` of `filename` pub fn partitioned_file_groups( path: &str, @@ -192,6 +195,7 @@ pub fn partitioned_file_groups( .collect::>()) } +#[tracing::instrument(level = "trace", skip(schema, file_groups))] /// Returns a [`FileScanConfig`] for given `file_groups` pub fn partitioned_csv_config( schema: SchemaRef, @@ -210,6 +214,7 @@ pub fn partitioned_csv_config( }) } +#[tracing::instrument(level = "trace", skip(plan, expected))] pub fn assert_fields_eq(plan: &LogicalPlan, expected: Vec<&str>) { let actual: Vec = plan .schema() @@ -220,11 +225,13 @@ pub fn assert_fields_eq(plan: &LogicalPlan, expected: Vec<&str>) { assert_eq!(actual, expected); } +#[tracing::instrument(level = "trace", skip(schema))] /// Returns the column names on the schema pub fn columns(schema: &Schema) -> Vec { schema.fields().iter().map(|f| f.name().clone()).collect() } +#[tracing::instrument(level = "trace", skip(seq_start, seq_end))] /// Return a new table provider that has a single Int32 column with /// values between `seq_start` and `seq_end` pub fn table_with_sequence( @@ -240,6 +247,7 @@ pub fn table_with_sequence( Ok(Arc::new(MemTable::try_new(schema, partitions)?)) } +#[tracing::instrument(level = "trace", skip(sz))] /// Return a RecordBatch with a single Int32 array with values (0..sz) pub fn make_partition(sz: i32) -> RecordBatch { let seq_start = 0; @@ -252,6 +260,7 @@ pub fn make_partition(sz: i32) -> RecordBatch { RecordBatch::try_new(schema, vec![arr]).unwrap() } +#[tracing::instrument(level = "trace", skip())] /// Return a new table which provide this decimal column pub fn table_with_decimal() -> Arc { let batch_decimal = make_decimal(); @@ -260,6 +269,7 @@ pub fn table_with_decimal() -> Arc { Arc::new(MemTable::try_new(schema, partitions).unwrap()) } +#[tracing::instrument(level = "trace", skip())] fn make_decimal() -> RecordBatch { let mut decimal_builder = Decimal128Builder::with_capacity(20); for i in 110000..110010 { @@ -276,6 +286,7 @@ fn make_decimal() -> RecordBatch { RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap() } +#[tracing::instrument(level = "trace", skip(schema, sort_exprs))] /// Created a sorted Csv exec pub fn csv_exec_sorted( schema: &SchemaRef, @@ -309,14 +320,17 @@ pub(crate) struct TestStreamPartition { } impl PartitionStream for TestStreamPartition { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> &SchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self, _ctx))] fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { unreachable!() } } +#[tracing::instrument(level = "trace", skip(schema, sort_exprs))] /// Create an unbounded stream exec pub fn stream_exec_ordered( schema: &SchemaRef, @@ -339,6 +353,7 @@ pub fn stream_exec_ordered( ) } +#[tracing::instrument(level = "trace", skip(schema, sort_exprs))] /// Create a csv exec for tests pub fn csv_exec_ordered( schema: &SchemaRef, @@ -375,6 +390,7 @@ pub struct StatisticsExec { } impl StatisticsExec { + #[tracing::instrument(level = "trace", skip(stats, schema))] pub fn new(stats: Statistics, schema: Schema) -> Self { assert_eq!( stats.column_statistics.len(), schema.fields().len(), @@ -388,6 +404,7 @@ impl StatisticsExec { } } + #[tracing::instrument(level = "trace", skip(schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -402,6 +419,7 @@ impl StatisticsExec { } impl DisplayAs for StatisticsExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -421,18 +439,22 @@ impl DisplayAs for StatisticsExec { } impl ExecutionPlan for StatisticsExec { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -440,6 +462,7 @@ impl ExecutionPlan for StatisticsExec { Ok(self) } + #[tracing::instrument(level = "trace", skip(self, _partition, _context))] fn execute( &self, _partition: usize, @@ -448,6 +471,7 @@ impl ExecutionPlan for StatisticsExec { unimplemented!("This plan only serves for testing statistics") } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { Ok(self.stats.clone()) } diff --git a/datafusion/core/src/test/object_store.rs b/datafusion/core/src/test/object_store.rs index d6f324a7f1f95..eca59a20ff795 100644 --- a/datafusion/core/src/test/object_store.rs +++ b/datafusion/core/src/test/object_store.rs @@ -24,6 +24,7 @@ use object_store::{memory::InMemory, path::Path, ObjectMeta, ObjectStore}; use std::sync::Arc; use url::Url; +#[tracing::instrument(level = "trace", skip(ctx, files))] /// Returns a test object store with the provided `ctx` pub fn register_test_store(ctx: &SessionContext, files: &[(&str, u64)]) { let url = Url::parse("test://").unwrap(); @@ -31,6 +32,7 @@ pub fn register_test_store(ctx: &SessionContext, files: &[(&str, u64)]) { .register_object_store(&url, make_test_store_and_state(files).0); } +#[tracing::instrument(level = "trace", skip(files))] /// Create a test object store with the provided files pub fn make_test_store_and_state(files: &[(&str, u64)]) -> (Arc, SessionState) { let memory = InMemory::new(); @@ -52,6 +54,7 @@ pub fn make_test_store_and_state(files: &[(&str, u64)]) -> (Arc, Sessi ) } +#[tracing::instrument(level = "trace", skip(path))] /// Helper method to fetch the file size and date at given path and create a `ObjectMeta` pub fn local_unpartitioned_file(path: impl AsRef) -> ObjectMeta { let location = Path::from_filesystem_path(path.as_ref()).unwrap(); diff --git a/datafusion/core/src/test/variable.rs b/datafusion/core/src/test/variable.rs index 38207b42cb7b8..d9fb259498122 100644 --- a/datafusion/core/src/test/variable.rs +++ b/datafusion/core/src/test/variable.rs @@ -27,6 +27,7 @@ use arrow::datatypes::DataType; pub struct SystemVar {} impl SystemVar { + #[tracing::instrument(level = "trace", skip())] /// new system variable pub fn new() -> Self { Self {} @@ -34,12 +35,14 @@ impl SystemVar { } impl VarProvider for SystemVar { + #[tracing::instrument(level = "trace", skip(self, var_names))] /// get system variable value fn get_value(&self, var_names: Vec) -> Result { let s = format!("{}-{}", "system-var", var_names.concat()); Ok(ScalarValue::from(s)) } + #[tracing::instrument(level = "trace", skip(self))] fn get_type(&self, _: &[String]) -> Option { Some(DataType::Utf8) } @@ -50,6 +53,7 @@ impl VarProvider for SystemVar { pub struct UserDefinedVar {} impl UserDefinedVar { + #[tracing::instrument(level = "trace", skip())] /// new user defined variable pub fn new() -> Self { Self {} @@ -57,6 +61,7 @@ impl UserDefinedVar { } impl VarProvider for UserDefinedVar { + #[tracing::instrument(level = "trace", skip(self, var_names))] /// Get user defined variable value fn get_value(&self, var_names: Vec) -> Result { if var_names[0] != "@integer" { @@ -67,6 +72,7 @@ impl VarProvider for UserDefinedVar { } } + #[tracing::instrument(level = "trace", skip(self, var_names))] fn get_type(&self, var_names: &[String]) -> Option { if var_names[0] != "@integer" { Some(DataType::Utf8) diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs index 75ef364d01fda..2bb34a0d8c669 100644 --- a/datafusion/core/src/test_util/mod.rs +++ b/datafusion/core/src/test_util/mod.rs @@ -57,6 +57,7 @@ use tempfile::TempDir; pub use datafusion_common::test_util::parquet_test_data; pub use datafusion_common::test_util::{arrow_test_data, get_data_dir}; +#[tracing::instrument(level = "trace", skip(name, table_schema, projection))] /// Scan an empty data source, mainly used in tests pub fn scan_empty( name: Option<&str>, @@ -69,6 +70,7 @@ pub fn scan_empty( LogicalPlanBuilder::scan(name, provider_as_source(provider), projection) } +#[tracing::instrument(level = "trace", skip(name, table_schema, projection, partitions))] /// Scan an empty data source with configured partition, mainly used in tests. pub fn scan_empty_with_partitions( name: Option<&str>, @@ -82,6 +84,7 @@ pub fn scan_empty_with_partitions( LogicalPlanBuilder::scan(name, provider_as_source(provider), projection) } +#[tracing::instrument(level = "trace", skip())] /// Get the schema for the aggregate_test_* csv files pub fn aggr_test_schema() -> SchemaRef { let mut f1 = Field::new("c1", DataType::Utf8, false); @@ -105,6 +108,7 @@ pub fn aggr_test_schema() -> SchemaRef { Arc::new(schema) } +#[tracing::instrument(level = "trace", skip(ctx, table_name))] /// Register session context for the aggregate_test_100.csv file pub async fn register_aggregate_csv( ctx: &mut SessionContext, @@ -121,6 +125,7 @@ pub async fn register_aggregate_csv( Ok(()) } +#[tracing::instrument(level = "trace", skip(name))] /// Create a table from the aggregate_test_100.csv file with the specified name pub async fn test_table_with_name(name: &str) -> Result { let mut ctx = SessionContext::new(); @@ -128,11 +133,13 @@ pub async fn test_table_with_name(name: &str) -> Result { ctx.table(name).await } +#[tracing::instrument(level = "trace", skip())] /// Create a table from the aggregate_test_100.csv file with the name "aggregate_test_100" pub async fn test_table() -> Result { test_table_with_name("aggregate_test_100").await } +#[tracing::instrument(level = "trace", skip(ctx, sql))] /// Execute SQL and return results pub async fn plan_and_collect( ctx: &SessionContext, @@ -141,6 +148,7 @@ pub async fn plan_and_collect( ctx.sql(sql).await?.collect().await } +#[tracing::instrument(level = "trace", skip(tmp_dir, partition_count, file_extension))] /// Generate CSV partitions within the supplied directory pub fn populate_csv_partitions( tmp_dir: &TempDir, @@ -175,6 +183,7 @@ pub struct TestTableFactory {} #[async_trait] impl TableProviderFactory for TestTableFactory { + #[tracing::instrument(level = "trace", skip(self, cmd))] async fn create( &self, _: &SessionState, @@ -199,18 +208,22 @@ impl TestTableProvider {} #[async_trait] impl TableProvider for TestTableProvider { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn table_type(&self) -> TableType { unimplemented!("TestTableProvider is a stub for testing.") } + #[tracing::instrument(level = "trace", skip(self, _state, _projection, _filters, _limit))] async fn scan( &self, _state: &SessionState, @@ -230,6 +243,7 @@ pub struct UnboundedExec { cache: PlanProperties, } impl UnboundedExec { + #[tracing::instrument(level = "trace", skip(batch_produce, batch, partitions))] /// Create new exec that clones the given record batch to its output. /// /// Set `batch_produce` to `Some(n)` to emit exactly `n` batches per partition. @@ -246,6 +260,7 @@ impl UnboundedExec { } } + #[tracing::instrument(level = "trace", skip(schema, batch_produce, n_partitions))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( schema: SchemaRef, @@ -267,6 +282,7 @@ impl UnboundedExec { } impl DisplayAs for UnboundedExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -285,18 +301,22 @@ impl DisplayAs for UnboundedExec { } impl ExecutionPlan for UnboundedExec { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -304,6 +324,7 @@ impl ExecutionPlan for UnboundedExec { Ok(self) } + #[tracing::instrument(level = "trace", skip(self, _partition, _context))] fn execute( &self, _partition: usize, @@ -327,6 +348,7 @@ struct UnboundedStream { impl Stream for UnboundedStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, _cx))] fn poll_next( mut self: Pin<&mut Self>, _cx: &mut Context<'_>, @@ -342,11 +364,13 @@ impl Stream for UnboundedStream { } impl RecordBatchStream for UnboundedStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.batch.schema() } } +#[tracing::instrument(level = "trace", skip(ctx, schema, file_path, table_name, file_sort_order))] /// This function creates an unbounded sorted file for testing purposes. pub fn register_unbounded_file_with_ordering( ctx: &SessionContext, diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs index 0a044aa9da008..8b1fd324b13ae 100644 --- a/datafusion/core/src/test_util/parquet.rs +++ b/datafusion/core/src/test_util/parquet.rs @@ -64,6 +64,7 @@ pub struct ParquetScanOptions { } impl ParquetScanOptions { + #[tracing::instrument(level = "trace", skip(self))] /// Returns a [`SessionConfig`] with the given options pub fn config(&self) -> SessionConfig { let mut config = ConfigOptions::new(); @@ -75,6 +76,7 @@ impl ParquetScanOptions { } impl TestParquetFile { + #[tracing::instrument(level = "trace", skip(path, props, batches))] /// Creates a new parquet file at the specified location with the /// given properties pub fn try_new( @@ -127,6 +129,7 @@ impl TestParquetFile { } impl TestParquetFile { + #[tracing::instrument(level = "trace", skip(self, ctx, maybe_filter))] /// Return a `ParquetExec` with the specified options. /// /// If `maybe_filter` is non-None, the ParquetExec will be filtered using @@ -192,6 +195,7 @@ impl TestParquetFile { } } + #[tracing::instrument(level = "trace", skip(plan))] /// Retrieve metrics from the parquet exec returned from `create_scan` /// /// Recursively searches for ParquetExec and returns the metrics @@ -209,11 +213,13 @@ impl TestParquetFile { None } + #[tracing::instrument(level = "trace", skip(self))] /// The schema of this parquet file pub fn schema(&self) -> SchemaRef { self.schema.clone() } + #[tracing::instrument(level = "trace", skip(self))] /// The path to the parquet file pub fn path(&self) -> &std::path::Path { self.path.as_path() diff --git a/datafusion/physical-plan/src/aggregates/group_values/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/bytes.rs index d073c8995a9bf..d56398d16f5ef 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/bytes.rs @@ -32,6 +32,7 @@ pub struct GroupValuesByes { } impl GroupValuesByes { + #[tracing::instrument(level = "trace", skip(output_type))] pub fn new(output_type: OutputType) -> Self { Self { map: ArrowBytesMap::new(output_type), @@ -41,6 +42,7 @@ impl GroupValuesByes { } impl GroupValues for GroupValuesByes { + #[tracing::instrument(level = "trace", skip(self, cols, groups))] fn intern( &mut self, cols: &[ArrayRef], @@ -72,18 +74,22 @@ impl GroupValues for GroupValuesByes { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] fn size(&self) -> usize { self.map.size() + std::mem::size_of::() } + #[tracing::instrument(level = "trace", skip(self))] fn is_empty(&self) -> bool { self.num_groups == 0 } + #[tracing::instrument(level = "trace", skip(self))] fn len(&self) -> usize { self.num_groups } + #[tracing::instrument(level = "trace", skip(self, emit_to))] fn emit(&mut self, emit_to: EmitTo) -> datafusion_common::Result> { // Reset the map to default, and convert it into a single array let map_contents = self.map.take().into_state(); @@ -120,6 +126,7 @@ impl GroupValues for GroupValuesByes { Ok(vec![group_values]) } + #[tracing::instrument(level = "trace", skip(self, _batch))] fn clear_shrink(&mut self, _batch: &RecordBatch) { // in theory we could potentially avoid this reallocation and clear the // contents of the maps, but for now we just reset the map from the beginning diff --git a/datafusion/physical-plan/src/aggregates/group_values/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/mod.rs index b5bc923b467d2..cc1aacf71f71f 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/mod.rs @@ -52,6 +52,7 @@ pub trait GroupValues: Send { fn clear_shrink(&mut self, batch: &RecordBatch); } +#[tracing::instrument(level = "trace", skip(schema))] pub fn new_group_values(schema: SchemaRef) -> Result> { if schema.fields.len() == 1 { let d = schema.fields[0].data_type(); diff --git a/datafusion/physical-plan/src/aggregates/group_values/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/primitive.rs index 18d20f3c47e68..aef510b47d3d0 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/primitive.rs @@ -93,6 +93,7 @@ pub struct GroupValuesPrimitive { } impl GroupValuesPrimitive { + #[tracing::instrument(level = "trace", skip(data_type))] pub fn new(data_type: DataType) -> Self { assert!(PrimitiveArray::::is_compatible(&data_type)); Self { @@ -109,6 +110,7 @@ impl GroupValues for GroupValuesPrimitive where T::Native: HashValue, { + #[tracing::instrument(level = "trace", skip(self, cols, groups))] fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> Result<()> { assert_eq!(cols.len(), 1); groups.clear(); @@ -148,19 +150,24 @@ where Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] fn size(&self) -> usize { self.map.capacity() * std::mem::size_of::() + self.values.allocated_size() } + #[tracing::instrument(level = "trace", skip(self))] fn is_empty(&self) -> bool { self.values.is_empty() } + #[tracing::instrument(level = "trace", skip(self))] fn len(&self) -> usize { self.values.len() } + #[tracing::instrument(level = "trace", skip(self, emit_to))] fn emit(&mut self, emit_to: EmitTo) -> Result> { + #[tracing::instrument(level = "trace", skip(values, null_idx))] fn build_primitive( values: Vec, null_idx: Option, @@ -208,6 +215,7 @@ where Ok(vec![Arc::new(array.with_data_type(self.data_type.clone()))]) } + #[tracing::instrument(level = "trace", skip(self, batch))] fn clear_shrink(&mut self, batch: &RecordBatch) { let count = batch.num_rows(); self.values.clear(); diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs index 3b7480cd292a9..94268ee67ccbf 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/row.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs @@ -67,6 +67,7 @@ pub struct GroupValuesRows { } impl GroupValuesRows { + #[tracing::instrument(level = "trace", skip(schema))] pub fn try_new(schema: SchemaRef) -> Result { let row_converter = RowConverter::new( schema @@ -91,6 +92,7 @@ impl GroupValuesRows { } impl GroupValues for GroupValuesRows { + #[tracing::instrument(level = "trace", skip(self, cols, groups))] fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec) -> Result<()> { // Convert the group keys into the row format // Avoid reallocation when https://github.com/apache/arrow-rs/issues/4479 is available @@ -145,6 +147,7 @@ impl GroupValues for GroupValuesRows { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] fn size(&self) -> usize { let group_values_size = self.group_values.as_ref().map(|v| v.size()).unwrap_or(0); self.row_converter.size() @@ -153,10 +156,12 @@ impl GroupValues for GroupValuesRows { + self.hashes_buffer.allocated_size() } + #[tracing::instrument(level = "trace", skip(self))] fn is_empty(&self) -> bool { self.len() == 0 } + #[tracing::instrument(level = "trace", skip(self))] fn len(&self) -> usize { self.group_values .as_ref() @@ -164,6 +169,7 @@ impl GroupValues for GroupValuesRows { .unwrap_or(0) } + #[tracing::instrument(level = "trace", skip(self, emit_to))] fn emit(&mut self, emit_to: EmitTo) -> Result> { let mut group_values = self .group_values @@ -221,6 +227,7 @@ impl GroupValues for GroupValuesRows { Ok(output) } + #[tracing::instrument(level = "trace", skip(self, batch))] fn clear_shrink(&mut self, batch: &RecordBatch) { let count = batch.num_rows(); self.group_values = self.group_values.take().map(|mut rows| { diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 21608db40d566..b11369629c305 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -89,6 +89,7 @@ pub enum AggregateMode { } impl AggregateMode { + #[tracing::instrument(level = "trace", skip(self))] /// Checks whether this aggregation step describes a "first stage" calculation. /// In other words, its input is not another aggregation result and the /// `merge_batch` method will not be called for these modes. @@ -134,6 +135,7 @@ pub struct PhysicalGroupBy { } impl PhysicalGroupBy { + #[tracing::instrument(level = "trace", skip(expr, null_expr, groups))] /// Create a new `PhysicalGroupBy` pub fn new( expr: Vec<(Arc, String)>, @@ -147,6 +149,7 @@ impl PhysicalGroupBy { } } + #[tracing::instrument(level = "trace", skip(expr))] /// Create a GROUPING SET with only a single group. This is the "standard" /// case when building a plan from an expression such as `GROUP BY a,b,c` pub fn new_single(expr: Vec<(Arc, String)>) -> Self { @@ -158,36 +161,43 @@ impl PhysicalGroupBy { } } + #[tracing::instrument(level = "trace", skip(self))] /// Returns true if this GROUP BY contains NULL expressions pub fn contains_null(&self) -> bool { self.groups.iter().flatten().any(|is_null| *is_null) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the group expressions pub fn expr(&self) -> &[(Arc, String)] { &self.expr } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the null expressions pub fn null_expr(&self) -> &[(Arc, String)] { &self.null_expr } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the group null masks pub fn groups(&self) -> &[Vec] { &self.groups } + #[tracing::instrument(level = "trace", skip(self))] /// Returns true if this `PhysicalGroupBy` has no group expressions pub fn is_empty(&self) -> bool { self.expr.is_empty() } + #[tracing::instrument(level = "trace", skip(self))] /// Check whether grouping set is single group pub fn is_single(&self) -> bool { self.null_expr.is_empty() } + #[tracing::instrument(level = "trace", skip(self))] /// Calculate GROUP BY expressions according to input schema. pub fn input_exprs(&self) -> Vec> { self.expr @@ -196,6 +206,7 @@ impl PhysicalGroupBy { .collect() } + #[tracing::instrument(level = "trace", skip(self))] /// Return grouping expressions as they occur in the output schema. pub fn output_exprs(&self) -> Vec> { self.expr @@ -207,6 +218,7 @@ impl PhysicalGroupBy { } impl PartialEq for PhysicalGroupBy { + #[tracing::instrument(level = "trace", skip(self, other))] fn eq(&self, other: &PhysicalGroupBy) -> bool { self.expr.len() == other.expr.len() && self @@ -231,6 +243,7 @@ enum StreamType { } impl From for SendableRecordBatchStream { + #[tracing::instrument(level = "trace", skip(stream))] fn from(stream: StreamType) -> Self { match stream { StreamType::AggregateStream(stream) => Box::pin(stream), @@ -272,6 +285,7 @@ pub struct AggregateExec { } impl AggregateExec { + #[tracing::instrument(level = "trace", skip(self, required_input_ordering, aggr_expr, cache, input_order_mode))] /// Function used in `ConvertFirstLast` optimizer rule, /// where we need parts of the new value, others cloned from the old one pub fn new_with_aggr_expr_and_ordering_info( @@ -298,10 +312,12 @@ impl AggregateExec { } } + #[tracing::instrument(level = "trace", skip(self))] pub fn cache(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(mode, group_by, aggr_expr, filter_expr, input, input_schema))] /// Create a new hash aggregate execution plan pub fn try_new( mode: AggregateMode, @@ -331,6 +347,7 @@ impl AggregateExec { ) } + #[tracing::instrument(level = "trace", skip(mode, group_by, aggr_expr, filter_expr, input, input_schema, schema))] /// Create a new hash aggregate execution plan with the given schema. /// This constructor isn't part of the public API, it is used internally /// by Datafusion to enforce schema consistency during when re-creating @@ -418,51 +435,61 @@ impl AggregateExec { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Aggregation mode (full, partial) pub fn mode(&self) -> &AggregateMode { &self.mode } + #[tracing::instrument(level = "trace", skip(self, limit))] /// Set the `limit` of this AggExec pub fn with_limit(mut self, limit: Option) -> Self { self.limit = limit; self } + #[tracing::instrument(level = "trace", skip(self))] /// Grouping expressions pub fn group_expr(&self) -> &PhysicalGroupBy { &self.group_by } + #[tracing::instrument(level = "trace", skip(self))] /// Grouping expressions as they occur in the output schema pub fn output_group_expr(&self) -> Vec> { self.group_by.output_exprs() } + #[tracing::instrument(level = "trace", skip(self))] /// Aggregate expressions pub fn aggr_expr(&self) -> &[Arc] { &self.aggr_expr } + #[tracing::instrument(level = "trace", skip(self))] /// FILTER (WHERE clause) expression for each aggregate expression pub fn filter_expr(&self) -> &[Option>] { &self.filter_expr } + #[tracing::instrument(level = "trace", skip(self))] /// Input plan pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(self))] /// Get the input schema before any aggregates are applied pub fn input_schema(&self) -> SchemaRef { self.input_schema.clone() } + #[tracing::instrument(level = "trace", skip(self))] /// number of rows soft limit of the AggregateExec pub fn limit(&self) -> Option { self.limit } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute_typed( &self, partition: usize, @@ -490,6 +517,7 @@ impl AggregateExec { )?)) } + #[tracing::instrument(level = "trace", skip(self))] /// Finds the DataType and SortDirection for this Aggregate, if there is one pub fn get_minmax_desc(&self) -> Option<(Field, bool)> { let agg_expr = self.aggr_expr.iter().exactly_one().ok()?; @@ -502,6 +530,7 @@ impl AggregateExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// true, if this Aggregate has a group-by with no required or explicit ordering, /// no filtering and no aggregate expressions /// This method qualifies the use of the LimitedDistinctAggregation rewrite rule @@ -535,6 +564,7 @@ impl AggregateExec { true } + #[tracing::instrument(level = "trace", skip(input, schema, projection_mapping, mode, input_order_mode))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. pub fn compute_properties( input: &Arc, @@ -582,12 +612,14 @@ impl AggregateExec { PlanProperties::new(eq_properties, output_partitioning, exec_mode) } + #[tracing::instrument(level = "trace", skip(self))] pub fn input_order_mode(&self) -> &InputOrderMode { &self.input_order_mode } } impl DisplayAs for AggregateExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -665,19 +697,23 @@ impl DisplayAs for AggregateExec { } impl ExecutionPlan for AggregateExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "AggregateExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for down-casting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { match &self.mode { AggregateMode::Partial => { @@ -692,14 +728,17 @@ impl ExecutionPlan for AggregateExec { } } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_ordering(&self) -> Vec> { vec![self.required_input_ordering.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -718,6 +757,7 @@ impl ExecutionPlan for AggregateExec { Ok(Arc::new(me)) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -727,10 +767,12 @@ impl ExecutionPlan for AggregateExec { .map(|stream| stream.into()) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { // TODO stats: group expressions: // - once expressions will be able to compute their own stats, use it here @@ -779,6 +821,7 @@ impl ExecutionPlan for AggregateExec { } } +#[tracing::instrument(level = "trace", skip(input_schema, group_expr, aggr_expr, contains_null_expr, mode))] fn create_schema( input_schema: &Schema, group_expr: &[(Arc, String)], @@ -819,11 +862,13 @@ fn create_schema( Ok(Schema::new(fields)) } +#[tracing::instrument(level = "trace", skip(schema, group_count))] fn group_schema(schema: &Schema, group_count: usize) -> SchemaRef { let group_fields = schema.fields()[0..group_count].to_vec(); Arc::new(Schema::new(group_fields)) } +#[tracing::instrument(level = "trace", skip(aggr_expr, group_by, agg_mode))] /// Determines the lexical ordering requirement for an aggregate expression. /// /// # Parameters @@ -871,6 +916,7 @@ fn get_aggregate_expr_req( req } +#[tracing::instrument(level = "trace", skip(existing_req, aggr_expr, group_by, eq_properties, agg_mode))] /// Computes the finer ordering for between given existing ordering requirement /// of aggregate expression. /// @@ -898,11 +944,13 @@ fn finer_ordering( eq_properties.get_finer_ordering(existing_req, &aggr_req) } +#[tracing::instrument(level = "trace", skip(lhs, rhs))] /// Concatenates the given slices. pub fn concat_slices(lhs: &[T], rhs: &[T]) -> Vec { [lhs, rhs].concat() } +#[tracing::instrument(level = "trace", skip(aggr_exprs, group_by, eq_properties, agg_mode))] /// Get the common requirement that satisfies all the aggregate expressions. /// /// # Parameters @@ -989,6 +1037,7 @@ fn get_finer_aggregate_exprs_requirement( Ok(PhysicalSortRequirement::from_sort_exprs(&requirement)) } +#[tracing::instrument(level = "trace", skip(aggr_expr, mode, col_idx_base))] /// returns physical expressions for arguments to evaluate against a batch /// The expressions are different depending on `mode`: /// * Partial: AggregateExpr::expressions @@ -1029,6 +1078,7 @@ fn aggregate_expressions( } } +#[tracing::instrument(level = "trace", skip(index_base, expr))] /// uses `state_fields` to build a vec of physical column expressions required to merge the /// AggregateExpr' accumulator's state. /// @@ -1048,6 +1098,7 @@ fn merge_expressions( pub(crate) type AccumulatorItem = Box; +#[tracing::instrument(level = "trace", skip(aggr_expr))] fn create_accumulators( aggr_expr: &[Arc], ) -> Result> { @@ -1057,6 +1108,7 @@ fn create_accumulators( .collect() } +#[tracing::instrument(level = "trace", skip(accumulators, mode))] /// returns a vector of ArrayRefs, where each entry corresponds to either the /// final value (mode = Final, FinalPartitioned and Single) or states (mode = Partial) fn finalize_aggregation( @@ -1091,6 +1143,7 @@ fn finalize_aggregation( } } +#[tracing::instrument(level = "trace", skip(expr, batch))] /// Evaluates expressions against a record batch. fn evaluate( expr: &[Arc], @@ -1104,6 +1157,7 @@ fn evaluate( .collect() } +#[tracing::instrument(level = "trace", skip(expr, batch))] /// Evaluates expressions against a record batch. pub(crate) fn evaluate_many( expr: &[Vec>], @@ -1112,6 +1166,7 @@ pub(crate) fn evaluate_many( expr.iter().map(|expr| evaluate(expr, batch)).collect() } +#[tracing::instrument(level = "trace", skip(expr, batch))] fn evaluate_optional( expr: &[Option>], batch: &RecordBatch, @@ -1128,6 +1183,7 @@ fn evaluate_optional( .collect() } +#[tracing::instrument(level = "trace", skip(group_by, batch))] /// Evaluate a group by expression against a `RecordBatch` /// /// Arguments: @@ -1211,6 +1267,7 @@ mod tests { use futures::{FutureExt, Stream}; // Generate a schema which consists of 5 columns (a, b, c, d, e) + #[tracing::instrument(level = "trace", skip())] fn create_test_schema() -> Result { let a = Field::new("a", DataType::Int32, true); let b = Field::new("b", DataType::Int32, true); @@ -1222,6 +1279,7 @@ mod tests { Ok(schema) } + #[tracing::instrument(level = "trace", skip())] /// some mock data to aggregates fn some_data() -> (Arc, Vec) { // define a schema. @@ -1254,6 +1312,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip())] /// Generates some mock data for aggregate tests. fn some_data_v2() -> (Arc, Vec) { // Define a schema: @@ -1305,6 +1364,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip(batch_size, max_memory))] fn new_spill_ctx(batch_size: usize, max_memory: usize) -> Arc { let session_config = SessionConfig::new().with_batch_size(batch_size); let runtime = Arc::new( @@ -1320,6 +1380,7 @@ mod tests { Arc::new(task_ctx) } + #[tracing::instrument(level = "trace", skip(input, spill))] async fn check_grouping_sets( input: Arc, spill: bool, @@ -1475,6 +1536,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(input, spill))] /// build the aggregates on the data from some_data() and check the results async fn check_aggregates(input: Arc, spill: bool) -> Result<()> { let input_schema = input.schema(); @@ -1594,12 +1656,14 @@ mod tests { } impl TestYieldingExec { + #[tracing::instrument(level = "trace", skip(yield_first))] fn new(yield_first: bool) -> Self { let schema = some_data().0; let cache = Self::compute_properties(schema); Self { yield_first, cache } } + #[tracing::instrument(level = "trace", skip(schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -1614,6 +1678,7 @@ mod tests { } impl DisplayAs for TestYieldingExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -1628,22 +1693,27 @@ mod tests { } impl ExecutionPlan for TestYieldingExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "TestYieldingExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -1651,6 +1721,7 @@ mod tests { internal_err!("Children cannot be replaced in {self:?}") } + #[tracing::instrument(level = "trace", skip(self, _partition, _context))] fn execute( &self, _partition: usize, @@ -1665,6 +1736,7 @@ mod tests { Ok(Box::pin(stream)) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { let (_, batches) = some_data(); Ok(common::compute_record_batch_statistics( @@ -1686,6 +1758,7 @@ mod tests { impl Stream for TestYieldingStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: std::pin::Pin<&mut Self>, cx: &mut Context<'_>, @@ -1710,6 +1783,7 @@ mod tests { } impl RecordBatchStream for TestYieldingStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { some_data().0 } @@ -1962,6 +2036,7 @@ mod tests { // // and checks whether the function `merge_batch` works correctly for // FIRST_VALUE and LAST_VALUE functions. + #[tracing::instrument(level = "trace", skip(use_coalesce_batches, is_first_acc, spill))] async fn first_last_multi_partitions( use_coalesce_batches: bool, is_first_acc: bool, diff --git a/datafusion/physical-plan/src/aggregates/no_grouping.rs b/datafusion/physical-plan/src/aggregates/no_grouping.rs index 5ec95bd799424..904b19d4a145a 100644 --- a/datafusion/physical-plan/src/aggregates/no_grouping.rs +++ b/datafusion/physical-plan/src/aggregates/no_grouping.rs @@ -65,6 +65,7 @@ struct AggregateStreamInner { } impl AggregateStream { + #[tracing::instrument(level = "trace", skip(agg, context, partition))] /// Create a new AggregateStream pub fn new( agg: &AggregateExec, @@ -170,6 +171,7 @@ impl AggregateStream { impl Stream for AggregateStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: std::pin::Pin<&mut Self>, cx: &mut Context<'_>, @@ -180,11 +182,13 @@ impl Stream for AggregateStream { } impl RecordBatchStream for AggregateStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } } +#[tracing::instrument(level = "trace", skip(mode, batch, accumulators, expressions, filters))] /// Perform group-by aggregation for the given [`RecordBatch`]. /// /// If successful, this returns the additional number of bytes that were allocated during this process. diff --git a/datafusion/physical-plan/src/aggregates/order/full.rs b/datafusion/physical-plan/src/aggregates/order/full.rs index c15538e8ab8ef..f793056ebfbc6 100644 --- a/datafusion/physical-plan/src/aggregates/order/full.rs +++ b/datafusion/physical-plan/src/aggregates/order/full.rs @@ -72,6 +72,7 @@ enum State { } impl GroupOrderingFull { + #[tracing::instrument(level = "trace", skip())] pub fn new() -> Self { Self { state: State::Start, @@ -79,6 +80,7 @@ impl GroupOrderingFull { } // How many groups be emitted, or None if no data can be emitted + #[tracing::instrument(level = "trace", skip(self))] pub fn emit_to(&self) -> Option { match &self.state { State::Start => None, @@ -95,6 +97,7 @@ impl GroupOrderingFull { } } + #[tracing::instrument(level = "trace", skip(self, n))] /// remove the first n groups from the internal state, shifting /// all existing indexes down by `n` pub fn remove_groups(&mut self, n: usize) { @@ -109,11 +112,13 @@ impl GroupOrderingFull { } } + #[tracing::instrument(level = "trace", skip(self))] /// Note that the input is complete so any outstanding groups are done as well pub fn input_done(&mut self) { self.state = State::Complete; } + #[tracing::instrument(level = "trace", skip(self, total_num_groups))] /// Called when new groups are added in a batch. See documentation /// on [`super::GroupOrdering::new_groups`] pub fn new_groups(&mut self, total_num_groups: usize) { @@ -138,6 +143,7 @@ impl GroupOrderingFull { }; } + #[tracing::instrument(level = "trace", skip(self))] pub(crate) fn size(&self) -> usize { std::mem::size_of::() } diff --git a/datafusion/physical-plan/src/aggregates/order/mod.rs b/datafusion/physical-plan/src/aggregates/order/mod.rs index 556103e1e2228..21d449844804a 100644 --- a/datafusion/physical-plan/src/aggregates/order/mod.rs +++ b/datafusion/physical-plan/src/aggregates/order/mod.rs @@ -40,6 +40,7 @@ pub(crate) enum GroupOrdering { } impl GroupOrdering { + #[tracing::instrument(level = "trace", skip(input_schema, mode, ordering))] /// Create a `GroupOrdering` for the specified ordering pub fn try_new( input_schema: &Schema, @@ -57,6 +58,7 @@ impl GroupOrdering { } // How many groups be emitted, or None if no data can be emitted + #[tracing::instrument(level = "trace", skip(self))] pub fn emit_to(&self) -> Option { match self { GroupOrdering::None => None, @@ -65,6 +67,7 @@ impl GroupOrdering { } } + #[tracing::instrument(level = "trace", skip(self))] /// Updates the state the input is done pub fn input_done(&mut self) { match self { @@ -74,6 +77,7 @@ impl GroupOrdering { } } + #[tracing::instrument(level = "trace", skip(self, n))] /// remove the first n groups from the internal state, shifting /// all existing indexes down by `n` pub fn remove_groups(&mut self, n: usize) { @@ -84,6 +88,7 @@ impl GroupOrdering { } } + #[tracing::instrument(level = "trace", skip(self, batch_group_values, group_indices, total_num_groups))] /// Called when new groups are added in a batch /// /// * `total_num_groups`: total number of groups (so max @@ -116,6 +121,7 @@ impl GroupOrdering { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] /// Return the size of memory used by the ordering state, in bytes pub(crate) fn size(&self) -> usize { std::mem::size_of::() diff --git a/datafusion/physical-plan/src/aggregates/order/partial.rs b/datafusion/physical-plan/src/aggregates/order/partial.rs index ecd37c913e980..34d2b4d3f7f0e 100644 --- a/datafusion/physical-plan/src/aggregates/order/partial.rs +++ b/datafusion/physical-plan/src/aggregates/order/partial.rs @@ -102,6 +102,7 @@ enum State { } impl GroupOrderingPartial { + #[tracing::instrument(level = "trace", skip(input_schema, order_indices, ordering))] pub fn try_new( input_schema: &Schema, order_indices: &[usize], @@ -128,6 +129,7 @@ impl GroupOrderingPartial { }) } + #[tracing::instrument(level = "trace", skip(self, group_values))] /// Creates sort keys from the group values /// /// For example, if group_values had `A, B, C` but the input was @@ -144,6 +146,7 @@ impl GroupOrderingPartial { Ok(self.row_converter.convert_columns(&sort_values)?) } + #[tracing::instrument(level = "trace", skip(self))] /// How many groups be emitted, or None if no data can be emitted pub fn emit_to(&self) -> Option { match &self.state { @@ -163,6 +166,7 @@ impl GroupOrderingPartial { } } + #[tracing::instrument(level = "trace", skip(self, n))] /// remove the first n groups from the internal state, shifting /// all existing indexes down by `n` pub fn remove_groups(&mut self, n: usize) { @@ -184,6 +188,7 @@ impl GroupOrderingPartial { } } + #[tracing::instrument(level = "trace", skip(self))] /// Note that the input is complete so any outstanding groups are done as well pub fn input_done(&mut self) { self.state = match self.state { @@ -192,6 +197,7 @@ impl GroupOrderingPartial { }; } + #[tracing::instrument(level = "trace", skip(self, batch_group_values, group_indices, total_num_groups))] /// Called when new groups are added in a batch. See documentation /// on [`super::GroupOrdering::new_groups`] pub fn new_groups( @@ -241,6 +247,7 @@ impl GroupOrderingPartial { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] /// Return the size of memory allocated by this structure pub(crate) fn size(&self) -> usize { std::mem::size_of::() diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs index ad0860b93a3a5..5ffd239f2f0af 100644 --- a/datafusion/physical-plan/src/aggregates/row_hash.rs +++ b/datafusion/physical-plan/src/aggregates/row_hash.rs @@ -277,6 +277,7 @@ pub(crate) struct GroupedHashAggregateStream { } impl GroupedHashAggregateStream { + #[tracing::instrument(level = "trace", skip(agg, context, partition))] /// Create a new GroupedHashAggregateStream pub fn new( agg: &AggregateExec, @@ -387,6 +388,7 @@ impl GroupedHashAggregateStream { } } +#[tracing::instrument(level = "trace", skip(agg_expr))] /// Create an accumulator for `agg_expr` -- a [`GroupsAccumulator`] if /// that is supported by the aggregate, or a /// [`GroupsAccumulatorAdapter`] if not. @@ -420,6 +422,7 @@ macro_rules! extract_ok { impl Stream for GroupedHashAggregateStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: std::pin::Pin<&mut Self>, cx: &mut Context<'_>, @@ -514,12 +517,14 @@ impl Stream for GroupedHashAggregateStream { } impl RecordBatchStream for GroupedHashAggregateStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } } impl GroupedHashAggregateStream { + #[tracing::instrument(level = "trace", skip(self, batch))] /// Perform group-by aggregation for the given [`RecordBatch`]. fn group_aggregate_batch(&mut self, batch: RecordBatch) -> Result<()> { // Evaluate the grouping expressions @@ -612,6 +617,7 @@ impl GroupedHashAggregateStream { } } + #[tracing::instrument(level = "trace", skip(self))] fn update_memory_reservation(&mut self) -> Result<()> { let acc = self.accumulators.iter().map(|x| x.size()).sum::(); self.reservation.try_resize( @@ -621,6 +627,7 @@ impl GroupedHashAggregateStream { ) } + #[tracing::instrument(level = "trace", skip(self, emit_to, spilling))] /// Create an output RecordBatch with the group keys and /// accumulator states/values specified in emit_to fn emit(&mut self, emit_to: EmitTo, spilling: bool) -> Result { @@ -661,6 +668,7 @@ impl GroupedHashAggregateStream { Ok(batch) } + #[tracing::instrument(level = "trace", skip(self, batch))] /// Optimistically, [`Self::group_aggregate_batch`] allows to exceed the memory target slightly /// (~ 1 [`RecordBatch`]) for simplicity. In such cases, spill the data to disk and clear the /// memory. Currently only [`GroupOrdering::None`] is supported for spilling. @@ -682,6 +690,7 @@ impl GroupedHashAggregateStream { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] /// Emit all rows, sort them, and store them on disk. fn spill(&mut self) -> Result<()> { let emit = self.emit(EmitTo::All, true)?; @@ -704,6 +713,7 @@ impl GroupedHashAggregateStream { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, batch))] /// Clear memory and shirk capacities to the size of the batch. fn clear_shrink(&mut self, batch: &RecordBatch) { self.group_values.clear_shrink(batch); @@ -711,12 +721,14 @@ impl GroupedHashAggregateStream { self.current_group_indices.shrink_to(batch.num_rows()); } + #[tracing::instrument(level = "trace", skip(self))] /// Clear memory and shirk capacities to zero. fn clear_all(&mut self) { let s = self.schema(); self.clear_shrink(&RecordBatch::new_empty(s)); } + #[tracing::instrument(level = "trace", skip(self))] /// Emit if the used memory exceeds the target for partial aggregation. /// Currently only [`GroupOrdering::None`] is supported for early emitting. /// TODO: support group_ordering for early emitting @@ -733,6 +745,7 @@ impl GroupedHashAggregateStream { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] /// At this point, all the inputs are read and there are some spills. /// Emit the remaining rows and create a batch. /// Conduct a streaming merge sort between the batch and spilled data. Since the stream is fully @@ -770,6 +783,7 @@ impl GroupedHashAggregateStream { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] /// returns true if there is a soft groups limit and the number of distinct /// groups we have seen is over that limit fn hit_soft_group_limit(&self) -> bool { @@ -779,6 +793,7 @@ impl GroupedHashAggregateStream { group_values_soft_limit <= self.group_values.len() } + #[tracing::instrument(level = "trace", skip(self))] /// common function for signalling end of processing of the input stream fn set_input_done_and_produce_output(&mut self) -> Result<()> { self.input_done = true; diff --git a/datafusion/physical-plan/src/aggregates/topk/hash_table.rs b/datafusion/physical-plan/src/aggregates/topk/hash_table.rs index bae4c6133b9f7..f0d37d7c4584c 100644 --- a/datafusion/physical-plan/src/aggregates/topk/hash_table.rs +++ b/datafusion/physical-plan/src/aggregates/topk/hash_table.rs @@ -102,6 +102,7 @@ where } impl StringHashTable { + #[tracing::instrument(level = "trace", skip(limit))] pub fn new(limit: usize) -> Self { let vals: Vec<&str> = Vec::new(); let owned = Arc::new(StringArray::from(vals)); @@ -114,27 +115,33 @@ impl StringHashTable { } impl ArrowHashTable for StringHashTable { + #[tracing::instrument(level = "trace", skip(self, ids))] fn set_batch(&mut self, ids: ArrayRef) { self.owned = ids; } + #[tracing::instrument(level = "trace", skip(self))] fn len(&self) -> usize { self.map.len() } + #[tracing::instrument(level = "trace", skip(self, mapper))] unsafe fn update_heap_idx(&mut self, mapper: &[(usize, usize)]) { self.map.update_heap_idx(mapper); } + #[tracing::instrument(level = "trace", skip(self, map_idx))] unsafe fn heap_idx_at(&self, map_idx: usize) -> usize { self.map.heap_idx_at(map_idx) } + #[tracing::instrument(level = "trace", skip(self, indexes))] unsafe fn take_all(&mut self, indexes: Vec) -> ArrayRef { let ids = self.map.take_all(indexes); Arc::new(StringArray::from(ids)) } + #[tracing::instrument(level = "trace", skip(self, row_idx, replace_idx, mapper))] unsafe fn find_or_insert( &mut self, row_idx: usize, @@ -175,6 +182,7 @@ where Option<::Native>: Comparable, Option<::Native>: HashValue, { + #[tracing::instrument(level = "trace", skip(limit))] pub fn new(limit: usize) -> Self { let owned = Arc::new(PrimitiveArray::::builder(0).finish()); Self { @@ -190,22 +198,27 @@ where Option<::Native>: Comparable, Option<::Native>: HashValue, { + #[tracing::instrument(level = "trace", skip(self, ids))] fn set_batch(&mut self, ids: ArrayRef) { self.owned = ids; } + #[tracing::instrument(level = "trace", skip(self))] fn len(&self) -> usize { self.map.len() } + #[tracing::instrument(level = "trace", skip(self, mapper))] unsafe fn update_heap_idx(&mut self, mapper: &[(usize, usize)]) { self.map.update_heap_idx(mapper); } + #[tracing::instrument(level = "trace", skip(self, map_idx))] unsafe fn heap_idx_at(&self, map_idx: usize) -> usize { self.map.heap_idx_at(map_idx) } + #[tracing::instrument(level = "trace", skip(self, indexes))] unsafe fn take_all(&mut self, indexes: Vec) -> ArrayRef { let ids = self.map.take_all(indexes); let mut builder: PrimitiveBuilder = PrimitiveArray::builder(ids.len()); @@ -219,6 +232,7 @@ where Arc::new(ids) } + #[tracing::instrument(level = "trace", skip(self, row_idx, replace_idx, mapper))] unsafe fn find_or_insert( &mut self, row_idx: usize, @@ -247,6 +261,7 @@ where } impl TopKHashTable { + #[tracing::instrument(level = "trace", skip(limit, capacity))] pub fn new(limit: usize, capacity: usize) -> Self { Self { map: RawTable::with_capacity(capacity), @@ -254,6 +269,7 @@ impl TopKHashTable { } } + #[tracing::instrument(level = "trace", skip(self, hash, eq))] pub fn find(&self, hash: u64, mut eq: impl FnMut(&ID) -> bool) -> Option { let bucket = self.map.find(hash, |mi| eq(&mi.id))?; // JUSTIFICATION @@ -263,11 +279,13 @@ impl TopKHashTable { Some(idx) } + #[tracing::instrument(level = "trace", skip(self, map_idx))] pub unsafe fn heap_idx_at(&self, map_idx: usize) -> usize { let bucket = unsafe { self.map.bucket(map_idx) }; bucket.as_ref().heap_idx } + #[tracing::instrument(level = "trace", skip(self, replace_idx))] pub unsafe fn remove_if_full(&mut self, replace_idx: usize) -> usize { if self.map.len() >= self.limit { self.map.erase(self.map.bucket(replace_idx)); @@ -277,12 +295,14 @@ impl TopKHashTable { } } + #[tracing::instrument(level = "trace", skip(self, mapper))] unsafe fn update_heap_idx(&mut self, mapper: &[(usize, usize)]) { for (m, h) in mapper { self.map.bucket(*m).as_mut().heap_idx = *h } } + #[tracing::instrument(level = "trace", skip(self, hash, id, heap_idx, mapper))] pub fn insert( &mut self, hash: u64, @@ -315,10 +335,12 @@ impl TopKHashTable { unsafe { self.map.bucket_index(&bucket) } } + #[tracing::instrument(level = "trace", skip(self))] pub fn len(&self) -> usize { self.map.len() } + #[tracing::instrument(level = "trace", skip(self, idxs))] pub unsafe fn take_all(&mut self, idxs: Vec) -> Vec { let ids = idxs .into_iter() @@ -330,12 +352,14 @@ impl TopKHashTable { } impl HashTableItem { + #[tracing::instrument(level = "trace", skip(hash, id, heap_idx))] pub fn new(hash: u64, id: ID, heap_idx: usize) -> Self { Self { hash, id, heap_idx } } } impl HashValue for Option { + #[tracing::instrument(level = "trace", skip(self, state))] fn hash(&self, state: &RandomState) -> u64 { state.hash_one(self) } @@ -365,6 +389,7 @@ has_integer!(i8, i16, i32, i64, i128, i256); has_integer!(u8, u16, u32, u64); hash_float!(f16, f32, f64); +#[tracing::instrument(level = "trace", skip(limit, kt))] pub fn new_hash_table(limit: usize, kt: DataType) -> Result> { macro_rules! downcast_helper { ($kt:ty, $d:ident) => { diff --git a/datafusion/physical-plan/src/aggregates/topk/heap.rs b/datafusion/physical-plan/src/aggregates/topk/heap.rs index 41826ed728539..334a7c342c640 100644 --- a/datafusion/physical-plan/src/aggregates/topk/heap.rs +++ b/datafusion/physical-plan/src/aggregates/topk/heap.rs @@ -35,6 +35,7 @@ pub trait Comparable { } impl Comparable for Option { + #[tracing::instrument(level = "trace", skip(self, other))] fn comp(&self, other: &Self) -> Ordering { self.cmp(other) } @@ -95,6 +96,7 @@ impl PrimitiveHeap where ::Native: Comparable, { + #[tracing::instrument(level = "trace", skip(limit, desc, data_type))] pub fn new(limit: usize, desc: bool, data_type: DataType) -> Self { let owned: ArrayRef = Arc::new(PrimitiveArray::::builder(0).finish()); Self { @@ -110,10 +112,12 @@ impl ArrowHeap for PrimitiveHeap where ::Native: Comparable, { + #[tracing::instrument(level = "trace", skip(self, vals))] fn set_batch(&mut self, vals: ArrayRef) { self.batch = vals; } + #[tracing::instrument(level = "trace", skip(self, row_idx))] fn is_worse(&self, row_idx: usize) -> bool { if !self.heap.is_full() { return false; @@ -124,20 +128,24 @@ where (!self.desc && new_val > *worst_val) || (self.desc && new_val < *worst_val) } + #[tracing::instrument(level = "trace", skip(self))] fn worst_map_idx(&self) -> usize { self.heap.worst_map_idx() } + #[tracing::instrument(level = "trace", skip(self, heap_to_map))] fn renumber(&mut self, heap_to_map: &[(usize, usize)]) { self.heap.renumber(heap_to_map); } + #[tracing::instrument(level = "trace", skip(self, row_idx, map_idx, map))] fn insert(&mut self, row_idx: usize, map_idx: usize, map: &mut Vec<(usize, usize)>) { let vals = self.batch.as_primitive::(); let new_val = vals.value(row_idx); self.heap.append_or_replace(new_val, map_idx, map); } + #[tracing::instrument(level = "trace", skip(self, heap_idx, row_idx, map))] fn replace_if_better( &mut self, heap_idx: usize, @@ -149,6 +157,7 @@ where self.heap.replace_if_better(heap_idx, new_val, map); } + #[tracing::instrument(level = "trace", skip(self))] fn drain(&mut self) -> (ArrayRef, Vec) { let (vals, map_idxs) = self.heap.drain(); let vals = Arc::new(PrimitiveArray::::from_iter_values(vals)); @@ -158,6 +167,7 @@ where } impl TopKHeap { + #[tracing::instrument(level = "trace", skip(limit, desc))] pub fn new(limit: usize, desc: bool) -> Self { Self { desc, @@ -167,6 +177,7 @@ impl TopKHeap { } } + #[tracing::instrument(level = "trace", skip(self))] pub fn worst_val(&self) -> Option<&VAL> { let root = self.heap.first()?; let hi = match root { @@ -176,18 +187,22 @@ impl TopKHeap { Some(&hi.val) } + #[tracing::instrument(level = "trace", skip(self))] pub fn worst_map_idx(&self) -> usize { self.heap[0].as_ref().map(|hi| hi.map_idx).unwrap_or(0) } + #[tracing::instrument(level = "trace", skip(self))] pub fn is_full(&self) -> bool { self.len >= self.capacity } + #[tracing::instrument(level = "trace", skip(self))] pub fn len(&self) -> usize { self.len } + #[tracing::instrument(level = "trace", skip(self, new_val, map_idx, map))] pub fn append_or_replace( &mut self, new_val: VAL, @@ -201,6 +216,7 @@ impl TopKHeap { } } + #[tracing::instrument(level = "trace", skip(self, new_val, map_idx, mapper))] fn append(&mut self, new_val: VAL, map_idx: usize, mapper: &mut Vec<(usize, usize)>) { let hi = HeapItem::new(new_val, map_idx); self.heap[self.len] = Some(hi); @@ -208,6 +224,7 @@ impl TopKHeap { self.len += 1; } + #[tracing::instrument(level = "trace", skip(self, map))] fn pop(&mut self, map: &mut Vec<(usize, usize)>) -> Option> { if self.len() == 0 { return None; @@ -223,6 +240,7 @@ impl TopKHeap { former_root } + #[tracing::instrument(level = "trace", skip(self))] pub fn drain(&mut self) -> (Vec, Vec) { let mut map = Vec::with_capacity(self.len); let mut vals = Vec::with_capacity(self.len); @@ -236,6 +254,7 @@ impl TopKHeap { (vals, map_idxs) } + #[tracing::instrument(level = "trace", skip(self, new_val, map_idx, mapper))] fn replace_root( &mut self, new_val: VAL, @@ -248,6 +267,7 @@ impl TopKHeap { self.heapify_down(0, mapper); } + #[tracing::instrument(level = "trace", skip(self, heap_idx, new_val, mapper))] pub fn replace_if_better( &mut self, heap_idx: usize, @@ -264,6 +284,7 @@ impl TopKHeap { self.heapify_down(heap_idx, mapper); } + #[tracing::instrument(level = "trace", skip(self, heap_to_map))] pub fn renumber(&mut self, heap_to_map: &[(usize, usize)]) { for (heap_idx, map_idx) in heap_to_map.iter() { if let Some(Some(hi)) = self.heap.get_mut(*heap_idx) { @@ -272,6 +293,7 @@ impl TopKHeap { } } + #[tracing::instrument(level = "trace", skip(self, idx, mapper))] fn heapify_up(&mut self, mut idx: usize, mapper: &mut Vec<(usize, usize)>) { let desc = self.desc; while idx != 0 { @@ -288,6 +310,7 @@ impl TopKHeap { } } + #[tracing::instrument(level = "trace", skip(self, a_idx, b_idx, mapper))] fn swap(&mut self, a_idx: usize, b_idx: usize, mapper: &mut Vec<(usize, usize)>) { let a_hi = self.heap[a_idx].take().expect("Missing heap entry"); let b_hi = self.heap[b_idx].take().expect("Missing heap entry"); @@ -299,6 +322,7 @@ impl TopKHeap { self.heap[b_idx] = Some(a_hi); } + #[tracing::instrument(level = "trace", skip(self, node_idx, mapper))] fn heapify_down(&mut self, node_idx: usize, mapper: &mut Vec<(usize, usize)>) { let left_child = node_idx * 2 + 1; let desc = self.desc; @@ -322,6 +346,7 @@ impl TopKHeap { } } + #[tracing::instrument(level = "trace", skip(self, idx))] #[cfg(test)] fn _tree_print(&self, idx: usize) -> Option> { let hi = self.heap.get(idx)?; @@ -339,6 +364,7 @@ impl TopKHeap { } } + #[tracing::instrument(level = "trace", skip(self))] #[cfg(test)] fn tree_print(&self) -> String { match self._tree_print(0) { @@ -349,6 +375,7 @@ impl TopKHeap { } impl HeapItem { + #[tracing::instrument(level = "trace", skip(val, buk_idx))] pub fn new(val: VAL, buk_idx: usize) -> Self { Self { val, @@ -358,6 +385,7 @@ impl HeapItem { } impl Debug for HeapItem { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.write_str("bucket=")?; self.map_idx.fmt(f)?; @@ -371,18 +399,21 @@ impl Debug for HeapItem { impl Eq for HeapItem {} impl PartialEq for HeapItem { + #[tracing::instrument(level = "trace", skip(self, other))] fn eq(&self, other: &Self) -> bool { self.cmp(other) == Ordering::Equal } } impl PartialOrd for HeapItem { + #[tracing::instrument(level = "trace", skip(self, other))] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for HeapItem { + #[tracing::instrument(level = "trace", skip(self, other))] fn cmp(&self, other: &Self) -> Ordering { let res = self.val.comp(&other.val); if res != Ordering::Equal { @@ -433,6 +464,7 @@ compare_integer!(i8, i16, i32, i64, i128, i256); compare_integer!(u8, u16, u32, u64); compare_float!(f16, f32, f64); +#[tracing::instrument(level = "trace", skip(limit, desc, vt))] pub fn new_heap(limit: usize, desc: bool, vt: DataType) -> Result> { macro_rules! downcast_helper { ($vt:ty, $d:ident) => { diff --git a/datafusion/physical-plan/src/aggregates/topk/priority_map.rs b/datafusion/physical-plan/src/aggregates/topk/priority_map.rs index 668018b9c24ca..f17a766131c8d 100644 --- a/datafusion/physical-plan/src/aggregates/topk/priority_map.rs +++ b/datafusion/physical-plan/src/aggregates/topk/priority_map.rs @@ -37,6 +37,7 @@ pub struct PriorityMap { unsafe impl Send for PriorityMap {} impl PriorityMap { + #[tracing::instrument(level = "trace", skip(key_type, val_type, capacity, descending))] pub fn new( key_type: DataType, val_type: DataType, @@ -51,11 +52,13 @@ impl PriorityMap { }) } + #[tracing::instrument(level = "trace", skip(self, ids, vals))] pub fn set_batch(&mut self, ids: ArrayRef, vals: ArrayRef) { self.map.set_batch(ids); self.heap.set_batch(vals); } + #[tracing::instrument(level = "trace", skip(self, row_idx))] pub fn insert(&mut self, row_idx: usize) -> Result<()> { assert!(self.map.len() <= self.capacity, "Overflow"); @@ -99,12 +102,14 @@ impl PriorityMap { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] pub fn emit(&mut self) -> Result> { let (vals, map_idxs) = self.heap.drain(); let ids = unsafe { self.map.take_all(map_idxs) }; Ok(vec![ids, vals]) } + #[tracing::instrument(level = "trace", skip(self))] pub fn is_empty(&self) -> bool { self.map.len() == 0 } @@ -371,6 +376,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] fn test_schema() -> SchemaRef { Arc::new(Schema::new(vec![ Field::new("trace_id", DataType::Utf8, true), diff --git a/datafusion/physical-plan/src/aggregates/topk_stream.rs b/datafusion/physical-plan/src/aggregates/topk_stream.rs index 9f25473cb9b42..1b4f5747a6207 100644 --- a/datafusion/physical-plan/src/aggregates/topk_stream.rs +++ b/datafusion/physical-plan/src/aggregates/topk_stream.rs @@ -48,6 +48,7 @@ pub struct GroupedTopKAggregateStream { } impl GroupedTopKAggregateStream { + #[tracing::instrument(level = "trace", skip(aggr, context, partition, limit))] pub fn new( aggr: &AggregateExec, context: Arc, @@ -83,12 +84,14 @@ impl GroupedTopKAggregateStream { } impl RecordBatchStream for GroupedTopKAggregateStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } } impl GroupedTopKAggregateStream { + #[tracing::instrument(level = "trace", skip(self, ids, vals))] fn intern(&mut self, ids: ArrayRef, vals: ArrayRef) -> Result<()> { let len = ids.len(); self.priority_map.set_batch(ids, vals.clone()); @@ -107,6 +110,7 @@ impl GroupedTopKAggregateStream { impl Stream for GroupedTopKAggregateStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs index c420581c4323e..b3c80a05c2309 100644 --- a/datafusion/physical-plan/src/analyze.rs +++ b/datafusion/physical-plan/src/analyze.rs @@ -52,6 +52,7 @@ pub struct AnalyzeExec { } impl AnalyzeExec { + #[tracing::instrument(level = "trace", skip(verbose, show_statistics, input, schema))] /// Create a new AnalyzeExec pub fn new( verbose: bool, @@ -69,21 +70,25 @@ impl AnalyzeExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// access to verbose pub fn verbose(&self) -> bool { self.verbose } + #[tracing::instrument(level = "trace", skip(self))] /// access to show_statistics pub fn show_statistics(&self) -> bool { self.show_statistics } + #[tracing::instrument(level = "trace", skip(self))] /// The input plan pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(input, schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( input: &Arc, @@ -97,6 +102,7 @@ impl AnalyzeExec { } impl DisplayAs for AnalyzeExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -111,28 +117,34 @@ impl DisplayAs for AnalyzeExec { } impl ExecutionPlan for AnalyzeExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "AnalyzeExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self))] /// AnalyzeExec is handled specially so this value is ignored fn required_input_distribution(&self) -> Vec { vec![] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, mut children: Vec>, @@ -145,6 +157,7 @@ impl ExecutionPlan for AnalyzeExec { ))) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -202,6 +215,7 @@ impl ExecutionPlan for AnalyzeExec { } } +#[tracing::instrument(level = "trace", skip(verbose, show_statistics, total_rows, duration, input, schema))] /// Creates the ouput of AnalyzeExec as a RecordBatch fn create_output_batch( verbose: bool, diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs index bc7c4a3d06730..c1dc99f92cec0 100644 --- a/datafusion/physical-plan/src/coalesce_batches.rs +++ b/datafusion/physical-plan/src/coalesce_batches.rs @@ -52,6 +52,7 @@ pub struct CoalesceBatchesExec { } impl CoalesceBatchesExec { + #[tracing::instrument(level = "trace", skip(input, target_batch_size))] /// Create a new CoalesceBatchesExec pub fn new(input: Arc, target_batch_size: usize) -> Self { let cache = Self::compute_properties(&input); @@ -63,16 +64,19 @@ impl CoalesceBatchesExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// The input plan pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(self))] /// Minimum number of rows for coalesces batches pub fn target_batch_size(&self) -> usize { self.target_batch_size } + #[tracing::instrument(level = "trace", skip(input))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(input: &Arc) -> PlanProperties { // The coalesce batches operator does not make any changes to the @@ -86,6 +90,7 @@ impl CoalesceBatchesExec { } impl DisplayAs for CoalesceBatchesExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -104,31 +109,38 @@ impl DisplayAs for CoalesceBatchesExec { } impl ExecutionPlan for CoalesceBatchesExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "CoalesceBatchesExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { vec![true] } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -139,6 +151,7 @@ impl ExecutionPlan for CoalesceBatchesExec { ))) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -155,10 +168,12 @@ impl ExecutionPlan for CoalesceBatchesExec { })) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { self.input.statistics() } @@ -184,6 +199,7 @@ struct CoalesceBatchesStream { impl Stream for CoalesceBatchesStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -192,6 +208,7 @@ impl Stream for CoalesceBatchesStream { self.baseline_metrics.record_poll(poll) } + #[tracing::instrument(level = "trace", skip(self))] fn size_hint(&self) -> (usize, Option) { // we can't predict the size of incoming batches so re-use the size hint from the input self.input.size_hint() @@ -199,6 +216,7 @@ impl Stream for CoalesceBatchesStream { } impl CoalesceBatchesStream { + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next_inner( self: &mut Pin<&mut Self>, cx: &mut Context<'_>, @@ -271,11 +289,13 @@ impl CoalesceBatchesStream { } impl RecordBatchStream for CoalesceBatchesStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } } +#[tracing::instrument(level = "trace", skip(schema, batches, row_count))] /// Concatenates an array of `RecordBatch` into one batch pub fn concat_batches( schema: &SchemaRef, @@ -298,6 +318,7 @@ mod tests { use arrow::datatypes::{DataType, Field, Schema}; use arrow_array::UInt32Array; + #[tracing::instrument(level = "trace", skip())] #[tokio::test(flavor = "multi_thread")] async fn test_concat_batches() -> Result<()> { let schema = test_schema(); @@ -319,10 +340,12 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] fn test_schema() -> Arc { Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)])) } + #[tracing::instrument(level = "trace", skip(schema, input_partitions, target_batch_size))] async fn coalesce_batches( schema: &SchemaRef, input_partitions: Vec>, @@ -351,6 +374,7 @@ mod tests { Ok(output_partitions) } + #[tracing::instrument(level = "trace", skip(schema, n))] /// Create vector batches fn create_vec_batches(schema: &Schema, n: usize) -> Vec { let batch = create_batch(schema); @@ -361,6 +385,7 @@ mod tests { vec } + #[tracing::instrument(level = "trace", skip(schema))] /// Create batch fn create_batch(schema: &Schema) -> RecordBatch { RecordBatch::try_new( diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs index 1c725ce31f146..23b334e82051d 100644 --- a/datafusion/physical-plan/src/coalesce_partitions.rs +++ b/datafusion/physical-plan/src/coalesce_partitions.rs @@ -45,6 +45,7 @@ pub struct CoalescePartitionsExec { } impl CoalescePartitionsExec { + #[tracing::instrument(level = "trace", skip(input))] /// Create a new CoalescePartitionsExec pub fn new(input: Arc) -> Self { let cache = Self::compute_properties(&input); @@ -55,11 +56,13 @@ impl CoalescePartitionsExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// Input execution plan pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(input))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(input: &Arc) -> PlanProperties { // Coalescing partitions loses existing orderings: @@ -75,6 +78,7 @@ impl CoalescePartitionsExec { } impl DisplayAs for CoalescePartitionsExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -89,27 +93,33 @@ impl DisplayAs for CoalescePartitionsExec { } impl ExecutionPlan for CoalescePartitionsExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "CoalescePartitionsExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -117,6 +127,7 @@ impl ExecutionPlan for CoalescePartitionsExec { Ok(Arc::new(CoalescePartitionsExec::new(children[0].clone()))) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -161,10 +172,12 @@ impl ExecutionPlan for CoalescePartitionsExec { } } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { self.input.statistics() } diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs index c61e9a05bfa6e..1ecce4fcb675b 100644 --- a/datafusion/physical-plan/src/common.rs +++ b/datafusion/physical-plan/src/common.rs @@ -42,11 +42,13 @@ use parking_lot::Mutex; /// [`MemoryReservation`] used across query execution streams pub(crate) type SharedMemoryReservation = Arc>; +#[tracing::instrument(level = "trace", skip(stream))] /// Create a vector of record batches from a stream pub async fn collect(stream: SendableRecordBatchStream) -> Result> { stream.try_collect::>().await } +#[tracing::instrument(level = "trace", skip(dir, ext))] /// Recursively builds a list of files in a directory with a given extension pub fn build_checked_file_list(dir: &str, ext: &str) -> Result> { let mut filenames: Vec = Vec::new(); @@ -57,6 +59,7 @@ pub fn build_checked_file_list(dir: &str, ext: &str) -> Result> { Ok(filenames) } +#[tracing::instrument(level = "trace", skip(dir, ext))] /// Recursively builds a list of files in a directory with a given extension pub fn build_file_list(dir: &str, ext: &str) -> Result> { let mut filenames: Vec = Vec::new(); @@ -64,6 +67,7 @@ pub fn build_file_list(dir: &str, ext: &str) -> Result> { Ok(filenames) } +#[tracing::instrument(level = "trace", skip(dir, filenames, ext))] /// Recursively build a list of files in a directory with a given extension with an accumulator list fn build_file_list_recurse( dir: &str, @@ -93,6 +97,7 @@ fn build_file_list_recurse( Ok(()) } +#[tracing::instrument(level = "trace", skip(input, buffer))] /// If running in a tokio context spawns the execution of `stream` to a separate task /// allowing it to execute in parallel with an intermediate buffer of size `buffer` pub(crate) fn spawn_buffered( @@ -126,6 +131,7 @@ pub(crate) fn spawn_buffered( } } +#[tracing::instrument(level = "trace", skip(batches, schema, projection))] /// Computes the statistics for an in-memory RecordBatch /// /// Only computes statistics that are in arrows metadata (num rows, byte size and nulls) @@ -178,6 +184,7 @@ pub fn compute_record_batch_statistics( } } +#[tracing::instrument(level = "trace", skip(given))] /// Calculates the "meet" of given orderings. /// The meet is the finest ordering that satisfied by all the given /// orderings, see . @@ -191,6 +198,7 @@ pub fn get_meet_of_orderings( .and_then(get_meet_of_orderings_helper) } +#[tracing::instrument(level = "trace", skip(orderings))] fn get_meet_of_orderings_helper( orderings: Vec<&[PhysicalSortExpr]>, ) -> Option<&[PhysicalSortExpr]> { @@ -215,6 +223,7 @@ fn get_meet_of_orderings_helper( idx += 1; } + #[tracing::instrument(level = "trace", skip(first, second))] fn check_expr_alignment(first: &dyn PhysicalExpr, second: &dyn PhysicalExpr) -> bool { match ( first.as_any().downcast_ref::(), @@ -258,6 +267,7 @@ pub struct IPCWriter { } impl IPCWriter { + #[tracing::instrument(level = "trace", skip(path, schema))] /// Create new writer pub fn new(path: &Path, schema: &Schema) -> Result { let file = File::create(path).map_err(|e| { @@ -274,6 +284,7 @@ impl IPCWriter { }) } + #[tracing::instrument(level = "trace", skip(path, schema, write_options))] /// Create new writer with IPC write options pub fn new_with_options( path: &Path, @@ -293,6 +304,7 @@ impl IPCWriter { writer: FileWriter::try_new_with_options(file, schema, write_options)?, }) } + #[tracing::instrument(level = "trace", skip(self, batch))] /// Write one single batch pub fn write(&mut self, batch: &RecordBatch) -> Result<()> { self.writer.write(batch)?; @@ -303,17 +315,20 @@ impl IPCWriter { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] /// Finish the writer pub fn finish(&mut self) -> Result<()> { self.writer.finish().map_err(Into::into) } + #[tracing::instrument(level = "trace", skip(self))] /// Path write to pub fn path(&self) -> &Path { &self.path } } +#[tracing::instrument(level = "trace", skip(schema, projection))] /// Checks if the given projection is valid for the given schema. pub fn can_project( schema: &arrow_schema::SchemaRef, diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs index ca93ce5e7b833..9579208f9759e 100644 --- a/datafusion/physical-plan/src/display.rs +++ b/datafusion/physical-plan/src/display.rs @@ -46,6 +46,7 @@ pub struct DisplayableExecutionPlan<'a> { } impl<'a> DisplayableExecutionPlan<'a> { + #[tracing::instrument(level = "trace", skip(inner))] /// Create a wrapper around an [`ExecutionPlan`] which can be /// pretty printed in a variety of ways pub fn new(inner: &'a dyn ExecutionPlan) -> Self { @@ -56,6 +57,7 @@ impl<'a> DisplayableExecutionPlan<'a> { } } + #[tracing::instrument(level = "trace", skip(inner))] /// Create a wrapper around an [`ExecutionPlan`] which can be /// pretty printed in a variety of ways that also shows aggregated /// metrics @@ -67,6 +69,7 @@ impl<'a> DisplayableExecutionPlan<'a> { } } + #[tracing::instrument(level = "trace", skip(inner))] /// Create a wrapper around an [`ExecutionPlan`] which can be /// pretty printed in a variety of ways that also shows all low /// level metrics @@ -78,12 +81,14 @@ impl<'a> DisplayableExecutionPlan<'a> { } } + #[tracing::instrument(level = "trace", skip(self, show_statistics))] /// Enable display of statistics pub fn set_show_statistics(mut self, show_statistics: bool) -> Self { self.show_statistics = show_statistics; self } + #[tracing::instrument(level = "trace", skip(self, verbose))] /// Return a `format`able structure that produces a single line /// per node. /// @@ -107,6 +112,7 @@ impl<'a> DisplayableExecutionPlan<'a> { show_statistics: bool, } impl<'a> fmt::Display for Wrapper<'a> { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut visitor = IndentVisitor { t: self.format_type, @@ -126,6 +132,7 @@ impl<'a> DisplayableExecutionPlan<'a> { } } + #[tracing::instrument(level = "trace", skip(self))] /// Returns a `format`able structure that produces graphviz format for execution plan, which can /// be directly visualized [here](https://dreampuf.github.io/GraphvizOnline). /// @@ -144,6 +151,7 @@ impl<'a> DisplayableExecutionPlan<'a> { show_statistics: bool, } impl<'a> fmt::Display for Wrapper<'a> { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let t = DisplayFormatType::Default; @@ -172,6 +180,7 @@ impl<'a> DisplayableExecutionPlan<'a> { } } + #[tracing::instrument(level = "trace", skip(self))] /// Return a single-line summary of the root of the plan /// Example: `ProjectionExec: expr=[a@0 as a]`. pub fn one_line(&self) -> impl fmt::Display + 'a { @@ -182,6 +191,7 @@ impl<'a> DisplayableExecutionPlan<'a> { } impl<'a> fmt::Display for Wrapper<'a> { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut visitor = IndentVisitor { f, @@ -202,6 +212,7 @@ impl<'a> DisplayableExecutionPlan<'a> { } } + #[tracing::instrument(level = "trace", skip(self, verbose, plan_type))] /// format as a `StringifiedPlan` pub fn to_stringified(&self, verbose: bool, plan_type: PlanType) -> StringifiedPlan { StringifiedPlan::new(plan_type, self.indent(verbose).to_string()) @@ -236,6 +247,7 @@ struct IndentVisitor<'a, 'b> { impl<'a, 'b> ExecutionPlanVisitor for IndentVisitor<'a, 'b> { type Error = fmt::Error; + #[tracing::instrument(level = "trace", skip(self, plan))] fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result { write!(self.f, "{:indent$}", "", indent = self.indent * 2)?; plan.fmt_as(self.t, self.f)?; @@ -270,6 +282,7 @@ impl<'a, 'b> ExecutionPlanVisitor for IndentVisitor<'a, 'b> { Ok(true) } + #[tracing::instrument(level = "trace", skip(self, _plan))] fn post_visit(&mut self, _plan: &dyn ExecutionPlan) -> Result { self.indent -= 1; Ok(true) @@ -291,10 +304,12 @@ struct GraphvizVisitor<'a, 'b> { } impl GraphvizVisitor<'_, '_> { + #[tracing::instrument(level = "trace", skip(self))] fn start_graph(&mut self) -> fmt::Result { self.graphviz_builder.start_graph(self.f) } + #[tracing::instrument(level = "trace", skip(self))] fn end_graph(&mut self) -> fmt::Result { self.graphviz_builder.end_graph(self.f) } @@ -303,12 +318,14 @@ impl GraphvizVisitor<'_, '_> { impl ExecutionPlanVisitor for GraphvizVisitor<'_, '_> { type Error = fmt::Error; + #[tracing::instrument(level = "trace", skip(self, plan))] fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result { let id = self.graphviz_builder.next_id(); struct Wrapper<'a>(&'a dyn ExecutionPlan, DisplayFormatType); impl<'a> std::fmt::Display for Wrapper<'a> { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.0.fmt_as(self.1, f) } @@ -369,6 +386,7 @@ impl ExecutionPlanVisitor for GraphvizVisitor<'_, '_> { Ok(true) } + #[tracing::instrument(level = "trace", skip(self, _plan))] fn post_visit(&mut self, _plan: &dyn ExecutionPlan) -> Result { self.parents.pop(); Ok(true) @@ -388,6 +406,7 @@ pub trait DisplayAs { pub struct DefaultDisplay(pub T); impl fmt::Display for DefaultDisplay { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.0.fmt_as(DisplayFormatType::Default, f) } @@ -397,6 +416,7 @@ impl fmt::Display for DefaultDisplay { pub struct VerboseDisplay(pub T); impl fmt::Display for VerboseDisplay { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.0.fmt_as(DisplayFormatType::Verbose, f) } @@ -407,6 +427,7 @@ impl fmt::Display for VerboseDisplay { pub struct ProjectSchemaDisplay<'a>(pub &'a SchemaRef); impl<'a> fmt::Display for ProjectSchemaDisplay<'a> { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let parts: Vec<_> = self .0 @@ -423,6 +444,7 @@ impl<'a> fmt::Display for ProjectSchemaDisplay<'a> { pub struct OutputOrderingDisplay<'a>(pub &'a [PhysicalSortExpr]); impl<'a> fmt::Display for OutputOrderingDisplay<'a> { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "[")?; for (i, e) in self.0.iter().enumerate() { @@ -435,6 +457,7 @@ impl<'a> fmt::Display for OutputOrderingDisplay<'a> { } } +#[tracing::instrument(level = "trace", skip(f, orderings))] pub fn display_orderings(f: &mut Formatter, orderings: &[LexOrdering]) -> fmt::Result { if let Some(ordering) = orderings.first() { if !ordering.is_empty() { @@ -479,6 +502,7 @@ mod tests { } impl DisplayAs for TestStatsExecPlan { + #[tracing::instrument(level = "trace", skip(self, _t, f))] fn fmt_as( &self, _t: crate::DisplayFormatType, @@ -489,22 +513,27 @@ mod tests { } impl ExecutionPlan for TestStatsExecPlan { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "TestStatsExecPlan" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn std::any::Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -512,6 +541,7 @@ mod tests { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self))] fn execute( &self, _: usize, @@ -520,6 +550,7 @@ mod tests { todo!() } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { match self { Self::Panic => panic!("expected panic"), @@ -531,6 +562,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(exec, show_stats))] fn test_stats_display(exec: TestStatsExecPlan, show_stats: bool) { let display = DisplayableExecutionPlan::new(&exec).set_show_statistics(show_stats); diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs index 33bf1668b3c91..70cdbd2c8f888 100644 --- a/datafusion/physical-plan/src/empty.rs +++ b/datafusion/physical-plan/src/empty.rs @@ -45,6 +45,7 @@ pub struct EmptyExec { } impl EmptyExec { + #[tracing::instrument(level = "trace", skip(schema))] /// Create a new EmptyExec pub fn new(schema: SchemaRef) -> Self { let cache = Self::compute_properties(schema.clone(), 1); @@ -55,6 +56,7 @@ impl EmptyExec { } } + #[tracing::instrument(level = "trace", skip(self, partitions))] /// Create a new EmptyExec with specified partition number pub fn with_partitions(mut self, partitions: usize) -> Self { self.partitions = partitions; @@ -64,14 +66,17 @@ impl EmptyExec { self } + #[tracing::instrument(level = "trace", skip(self))] fn data(&self) -> Result> { Ok(vec![]) } + #[tracing::instrument(level = "trace", skip(n_partitions))] fn output_partitioning_helper(n_partitions: usize) -> Partitioning { Partitioning::UnknownPartitioning(n_partitions) } + #[tracing::instrument(level = "trace", skip(schema, n_partitions))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef, n_partitions: usize) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -87,6 +92,7 @@ impl EmptyExec { } impl DisplayAs for EmptyExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -101,23 +107,28 @@ impl DisplayAs for EmptyExec { } impl ExecutionPlan for EmptyExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "EmptyExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -125,6 +136,7 @@ impl ExecutionPlan for EmptyExec { Ok(self) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -147,6 +159,7 @@ impl ExecutionPlan for EmptyExec { )?)) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { let batch = self .data() diff --git a/datafusion/physical-plan/src/explain.rs b/datafusion/physical-plan/src/explain.rs index 649946993229b..1a946632aaf28 100644 --- a/datafusion/physical-plan/src/explain.rs +++ b/datafusion/physical-plan/src/explain.rs @@ -47,6 +47,7 @@ pub struct ExplainExec { } impl ExplainExec { + #[tracing::instrument(level = "trace", skip(schema, stringified_plans, verbose))] /// Create a new ExplainExec pub fn new( schema: SchemaRef, @@ -62,16 +63,19 @@ impl ExplainExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// The strings to be printed pub fn stringified_plans(&self) -> &[StringifiedPlan] { &self.stringified_plans } + #[tracing::instrument(level = "trace", skip(self))] /// access to verbose pub fn verbose(&self) -> bool { self.verbose } + #[tracing::instrument(level = "trace", skip(schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -84,6 +88,7 @@ impl ExplainExec { } impl DisplayAs for ExplainExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -98,24 +103,29 @@ impl DisplayAs for ExplainExec { } impl ExecutionPlan for ExplainExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "ExplainExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { // this is a leaf node and has no children vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -123,6 +133,7 @@ impl ExecutionPlan for ExplainExec { Ok(self) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -177,6 +188,7 @@ impl ExecutionPlan for ExplainExec { } } +#[tracing::instrument(level = "trace", skip(previous_plan, this_plan))] /// If this plan should be shown, given the previous plan that was /// displayed. /// diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 6729e3b9e6030..bca7f686660d6 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -66,6 +66,7 @@ pub struct FilterExec { } impl FilterExec { + #[tracing::instrument(level = "trace", skip(predicate, input))] /// Create a FilterExec on an input pub fn try_new( predicate: Arc, @@ -90,6 +91,7 @@ impl FilterExec { } } + #[tracing::instrument(level = "trace", skip(self, default_selectivity))] pub fn with_default_selectivity( mut self, default_selectivity: u8, @@ -101,21 +103,25 @@ impl FilterExec { Ok(self) } + #[tracing::instrument(level = "trace", skip(self))] /// The expression to filter on. This expression must evaluate to a boolean value. pub fn predicate(&self) -> &Arc { &self.predicate } + #[tracing::instrument(level = "trace", skip(self))] /// The input plan pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(self))] /// The default selectivity pub fn default_selectivity(&self) -> u8 { self.default_selectivity } + #[tracing::instrument(level = "trace", skip(input, predicate, default_selectivity))] /// Calculates `Statistics` for `FilterExec`, by applying selectivity (either default, or estimated) to input statistics. fn statistics_helper( input: &Arc, @@ -159,6 +165,7 @@ impl FilterExec { }) } + #[tracing::instrument(level = "trace", skip(input, predicate))] fn extend_constants( input: &Arc, predicate: &Arc, @@ -180,6 +187,7 @@ impl FilterExec { } res_constants } + #[tracing::instrument(level = "trace", skip(input, predicate, default_selectivity))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( input: &Arc, @@ -215,6 +223,7 @@ impl FilterExec { } impl DisplayAs for FilterExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -229,28 +238,34 @@ impl DisplayAs for FilterExec { } impl ExecutionPlan for FilterExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "FilterExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { // tell optimizer this operator doesn't reorder its input vec![true] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, mut children: Vec>, @@ -263,6 +278,7 @@ impl ExecutionPlan for FilterExec { .map(|e| Arc::new(e) as _) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -278,10 +294,12 @@ impl ExecutionPlan for FilterExec { })) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] /// The output statistics of a filtering operation can be estimated if the /// predicate's selectivity value can be determined for the incoming data. fn statistics(&self) -> Result { @@ -289,6 +307,7 @@ impl ExecutionPlan for FilterExec { } } +#[tracing::instrument(level = "trace", skip(input_column_stats, analysis_boundaries))] /// This function ensures that all bounds in the `ExprBoundaries` vector are /// converted to closed bounds. If a lower/upper bound is initially open, it /// is adjusted by using the next/previous value for its data type to convert @@ -339,6 +358,7 @@ struct FilterExecStream { baseline_metrics: BaselineMetrics, } +#[tracing::instrument(level = "trace", skip(batch, predicate))] pub(crate) fn batch_filter( batch: &RecordBatch, predicate: &Arc, @@ -356,6 +376,7 @@ pub(crate) fn batch_filter( impl Stream for FilterExecStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -389,6 +410,7 @@ impl Stream for FilterExecStream { self.baseline_metrics.record_poll(poll) } + #[tracing::instrument(level = "trace", skip(self))] fn size_hint(&self) -> (usize, Option) { // same number of record batches self.input.size_hint() @@ -396,11 +418,13 @@ impl Stream for FilterExecStream { } impl RecordBatchStream for FilterExecStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } } +#[tracing::instrument(level = "trace", skip(predicate))] /// Return the equals Column-Pairs and Non-equals Column-Pairs fn collect_columns_from_predicate(predicate: &Arc) -> EqualAndNonEqual { let mut eq_predicate_columns = Vec::::new(); diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs index 259db644ae0a3..733feea722692 100644 --- a/datafusion/physical-plan/src/insert.rs +++ b/datafusion/physical-plan/src/insert.rs @@ -95,12 +95,14 @@ pub struct DataSinkExec { } impl fmt::Debug for DataSinkExec { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "DataSinkExec schema: {:?}", self.count_schema) } } impl DataSinkExec { + #[tracing::instrument(level = "trace", skip(input, sink, sink_schema, sort_order))] /// Create a plan to write to `sink` pub fn new( input: Arc, @@ -120,6 +122,7 @@ impl DataSinkExec { } } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute_input_stream( &self, partition: usize, @@ -160,26 +163,31 @@ impl DataSinkExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// Input execution plan pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(self))] /// Returns insert sink pub fn sink(&self) -> &dyn DataSink { self.sink.as_ref() } + #[tracing::instrument(level = "trace", skip(self))] /// Optional sort order for output data pub fn sort_order(&self) -> &Option> { &self.sort_order } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the metrics of the underlying [DataSink] pub fn metrics(&self) -> Option { self.sink.metrics() } + #[tracing::instrument(level = "trace", skip(input, schema))] fn create_schema( input: &Arc, schema: SchemaRef, @@ -194,6 +202,7 @@ impl DataSinkExec { } impl DisplayAs for DataSinkExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -209,37 +218,44 @@ impl DisplayAs for DataSinkExec { } impl ExecutionPlan for DataSinkExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "DataSinkExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { // DataSink is responsible for dynamically partitioning its // own input at execution time. vec![false] } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { // DataSink is responsible for dynamically partitioning its // own input at execution time, and so requires a single input partition. vec![Distribution::SinglePartition; self.children().len()] } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_ordering(&self) -> Vec>> { // The required input ordering is set externally (e.g. by a `ListingTable`). // Otherwise, there is no specific requirement (i.e. `sort_expr` is `None`). vec![self.sort_order.as_ref().cloned()] } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { // Maintains ordering in the sense that the written file will reflect // the ordering of the input. For more context, see: @@ -248,10 +264,12 @@ impl ExecutionPlan for DataSinkExec { vec![true] } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -264,6 +282,7 @@ impl ExecutionPlan for DataSinkExec { ))) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] /// Execute the plan and return a stream of `RecordBatch`es for /// the specified partition. fn execute( @@ -291,6 +310,7 @@ impl ExecutionPlan for DataSinkExec { } } +#[tracing::instrument(level = "trace", skip(count))] /// Create a output record batch with a count /// /// ```text @@ -306,6 +326,7 @@ fn make_count_batch(count: u64) -> RecordBatch { RecordBatch::try_from_iter_with_nullable(vec![("count", array, false)]).unwrap() } +#[tracing::instrument(level = "trace", skip())] fn make_count_schema() -> SchemaRef { // define a schema. Arc::new(Schema::new(vec![Field::new( @@ -315,6 +336,7 @@ fn make_count_schema() -> SchemaRef { )])) } +#[tracing::instrument(level = "trace", skip(batch, column_indices))] fn check_not_null_contraits( batch: RecordBatch, column_indices: &Vec, diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 9d1de3715f54f..1a7c319261cf1 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -67,6 +67,7 @@ pub struct CrossJoinExec { } impl CrossJoinExec { + #[tracing::instrument(level = "trace", skip(left, right))] /// Create a new [CrossJoinExec]. pub fn new(left: Arc, right: Arc) -> Self { // left then right @@ -90,16 +91,19 @@ impl CrossJoinExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// left (build) side which gets loaded in memory pub fn left(&self) -> &Arc { &self.left } + #[tracing::instrument(level = "trace", skip(self))] /// right side which gets combined with left side pub fn right(&self) -> &Arc { &self.right } + #[tracing::instrument(level = "trace", skip(left, right, schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( left: &Arc, @@ -138,6 +142,7 @@ impl CrossJoinExec { } } +#[tracing::instrument(level = "trace", skip(left, context, metrics, reservation))] /// Asynchronously collect the result of the left child async fn load_left_input( left: Arc, @@ -181,6 +186,7 @@ async fn load_left_input( } impl DisplayAs for CrossJoinExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -195,26 +201,32 @@ impl DisplayAs for CrossJoinExec { } impl ExecutionPlan for CrossJoinExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "CrossJoinExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.left.clone(), self.right.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -225,6 +237,7 @@ impl ExecutionPlan for CrossJoinExec { ))) } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { vec![ Distribution::SinglePartition, @@ -232,6 +245,7 @@ impl ExecutionPlan for CrossJoinExec { ] } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -265,6 +279,7 @@ impl ExecutionPlan for CrossJoinExec { })) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { Ok(stats_cartesian_product( self.left.statistics()?, @@ -273,6 +288,7 @@ impl ExecutionPlan for CrossJoinExec { } } +#[tracing::instrument(level = "trace", skip(left_stats, right_stats))] /// [left/right]_col_count are required in case the column statistics are None fn stats_cartesian_product( left_stats: Statistics, @@ -336,6 +352,7 @@ struct CrossJoinStream { } impl RecordBatchStream for CrossJoinStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } @@ -350,6 +367,7 @@ enum CrossJoinStreamState { } impl CrossJoinStreamState { + #[tracing::instrument(level = "trace", skip(self))] /// Tries to extract RecordBatch from CrossJoinStreamState enum. /// Returns an error if state is not BuildBatches state. fn try_as_record_batch(&mut self) -> Result<&RecordBatch> { @@ -360,6 +378,7 @@ impl CrossJoinStreamState { } } +#[tracing::instrument(level = "trace", skip(left_index, batch, left_data, schema))] fn build_batch( left_index: usize, batch: &RecordBatch, @@ -392,6 +411,7 @@ fn build_batch( impl Stream for CrossJoinStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: std::pin::Pin<&mut Self>, cx: &mut std::task::Context<'_>, @@ -401,6 +421,7 @@ impl Stream for CrossJoinStream { } impl CrossJoinStream { + #[tracing::instrument(level = "trace", skip(self, cx))] /// Separate implementation function that unpins the [`CrossJoinStream`] so /// that partial borrows work correctly fn poll_next_impl( @@ -422,6 +443,7 @@ impl CrossJoinStream { } } + #[tracing::instrument(level = "trace", skip(self, cx))] /// Collects build (left) side of the join into the state. In case of an empty build batch, /// the execution terminates. Otherwise, the state is updated to fetch probe (right) batch. fn collect_build_side( @@ -445,6 +467,7 @@ impl CrossJoinStream { Poll::Ready(Ok(result)) } + #[tracing::instrument(level = "trace", skip(self, cx))] /// Fetches the probe (right) batch, updates the metrics, and save the batch in the state. /// Then, the state is updated to build result batches. fn fetch_probe_batch( @@ -464,6 +487,7 @@ impl CrossJoinStream { Poll::Ready(Ok(StatefulStreamResult::Continue)) } + #[tracing::instrument(level = "trace", skip(self))] /// Joins the the indexed row of left data with the current probe batch. /// If all the results are produced, the state is set to fetch new probe batch. fn build_batches(&mut self) -> Result>> { @@ -496,6 +520,7 @@ mod tests { use datafusion_common::{assert_batches_sorted_eq, assert_contains}; use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; + #[tracing::instrument(level = "trace", skip(left, right, context))] async fn join_collect( left: Arc, right: Arc, @@ -706,6 +731,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(schema))] /// Returns the column names on the schema fn columns(schema: &Schema) -> Vec { schema.fields().iter().map(|f| f.name().clone()).collect() diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs index d3abedbe3806b..19daabc319d77 100644 --- a/datafusion/physical-plan/src/joins/hash_join.rs +++ b/datafusion/physical-plan/src/joins/hash_join.rs @@ -95,6 +95,7 @@ struct JoinLeftData { } impl JoinLeftData { + #[tracing::instrument(level = "trace", skip(hash_map, batch, visited_indices_bitmap, probe_threads_counter, reservation))] /// Create a new `JoinLeftData` from its parts fn new( hash_map: JoinHashMap, @@ -112,21 +113,25 @@ impl JoinLeftData { } } + #[tracing::instrument(level = "trace", skip(self))] /// return a reference to the hash map fn hash_map(&self) -> &JoinHashMap { &self.hash_map } + #[tracing::instrument(level = "trace", skip(self))] /// returns a reference to the build side batch fn batch(&self) -> &RecordBatch { &self.batch } + #[tracing::instrument(level = "trace", skip(self))] /// returns a reference to the visited indices bitmap fn visited_indices_bitmap(&self) -> &SharedBitmapBuilder { &self.visited_indices_bitmap } + #[tracing::instrument(level = "trace", skip(self))] /// Decrements the counter of running threads, and returns `true` /// if caller is the last running thread fn report_probe_completed(&self) -> bool { @@ -328,6 +333,7 @@ pub struct HashJoinExec { } impl HashJoinExec { + #[tracing::instrument(level = "trace", skip(left, right, on, filter, join_type, projection, partition_mode, null_equals_null))] /// Tries to create a new [HashJoinExec]. /// /// # Error @@ -389,41 +395,49 @@ impl HashJoinExec { }) } + #[tracing::instrument(level = "trace", skip(self))] /// left (build) side which gets hashed pub fn left(&self) -> &Arc { &self.left } + #[tracing::instrument(level = "trace", skip(self))] /// right (probe) side which are filtered by the hash table pub fn right(&self) -> &Arc { &self.right } + #[tracing::instrument(level = "trace", skip(self))] /// Set of common columns used to join on pub fn on(&self) -> &[(PhysicalExprRef, PhysicalExprRef)] { &self.on } + #[tracing::instrument(level = "trace", skip(self))] /// Filters applied before join output pub fn filter(&self) -> Option<&JoinFilter> { self.filter.as_ref() } + #[tracing::instrument(level = "trace", skip(self))] /// How the join is performed pub fn join_type(&self) -> &JoinType { &self.join_type } + #[tracing::instrument(level = "trace", skip(self))] /// The partitioning mode of this hash join pub fn partition_mode(&self) -> &PartitionMode { &self.mode } + #[tracing::instrument(level = "trace", skip(self))] /// Get null_equals_null pub fn null_equals_null(&self) -> bool { self.null_equals_null } + #[tracing::instrument(level = "trace", skip(join_type))] /// Calculate order preservation flags for this hash join. fn maintains_input_order(join_type: JoinType) -> Vec { vec![ @@ -435,17 +449,20 @@ impl HashJoinExec { ] } + #[tracing::instrument(level = "trace", skip())] /// Get probe side information for the hash join. pub fn probe_side() -> JoinSide { // In current implementation right side is always probe side. JoinSide::Right } + #[tracing::instrument(level = "trace", skip(self))] /// Return whether the join contains a projection pub fn contain_projection(&self) -> bool { self.projection.is_some() } + #[tracing::instrument(level = "trace", skip(self, projection))] /// Return new instance of [HashJoinExec] with the given projection. pub fn with_projection(&self, projection: Option>) -> Result { // check if the projection is valid @@ -469,6 +486,7 @@ impl HashJoinExec { ) } + #[tracing::instrument(level = "trace", skip(left, right, schema, join_type, on, mode, projection))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( left: &Arc, @@ -569,6 +587,7 @@ impl HashJoinExec { } impl DisplayAs for HashJoinExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { @@ -610,6 +629,7 @@ impl DisplayAs for HashJoinExec { } } +#[tracing::instrument(level = "trace", skip(projection_index, schema))] fn project_index_to_exprs( projection_index: &[usize], schema: &SchemaRef, @@ -630,18 +650,22 @@ fn project_index_to_exprs( } impl ExecutionPlan for HashJoinExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "HashJoinExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { match self.mode { PartitionMode::CollectLeft => vec![ @@ -679,14 +703,17 @@ impl ExecutionPlan for HashJoinExec { // (probe side) table that have no match in the left (build side) table. Because the rows // are processed sequentially in the probe phase, and unmatched rows are directly output // as results, these results tend to retain the order of the probe side table. + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { Self::maintains_input_order(self.join_type) } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.left.clone(), self.right.clone()] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -703,6 +730,7 @@ impl ExecutionPlan for HashJoinExec { )?)) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -796,10 +824,12 @@ impl ExecutionPlan for HashJoinExec { })) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { // TODO stats: it is not possible in general to know the output size of joins // There are some special cases though, for example: @@ -825,6 +855,7 @@ impl ExecutionPlan for HashJoinExec { } } +#[tracing::instrument(level = "trace", skip(partition, random_state, left, on_left, context, metrics, reservation, with_visited_indices_bitmap, probe_threads_count))] /// Reads the left (build) side of the input, buffering it in memory, to build a /// hash table (`LeftJoinData`) #[allow(clippy::too_many_arguments)] @@ -941,6 +972,7 @@ async fn collect_left_input( Ok(data) } +#[tracing::instrument(level = "trace", skip(on, batch, hash_map, offset, random_state, hashes_buffer, deleted_offset, fifo_hashmap))] /// Updates `hash_map` with new entries from `batch` evaluated against the expressions `on` /// using `offset` as a start value for `batch` row indices. /// @@ -1009,6 +1041,7 @@ struct BuildSideReadyState { } impl BuildSide { + #[tracing::instrument(level = "trace", skip(self))] /// Tries to extract BuildSideInitialState from BuildSide enum. /// Returns an error if state is not Initial. fn try_as_initial_mut(&mut self) -> Result<&mut BuildSideInitialState> { @@ -1018,6 +1051,7 @@ impl BuildSide { } } + #[tracing::instrument(level = "trace", skip(self))] /// Tries to extract BuildSideReadyState from BuildSide enum. /// Returns an error if state is not Ready. fn try_as_ready(&self) -> Result<&BuildSideReadyState> { @@ -1027,6 +1061,7 @@ impl BuildSide { } } + #[tracing::instrument(level = "trace", skip(self))] /// Tries to extract BuildSideReadyState from BuildSide enum. /// Returns an error if state is not Ready. fn try_as_ready_mut(&mut self) -> Result<&mut BuildSideReadyState> { @@ -1066,6 +1101,7 @@ enum HashJoinStreamState { } impl HashJoinStreamState { + #[tracing::instrument(level = "trace", skip(self))] /// Tries to extract ProcessProbeBatchState from HashJoinStreamState enum. /// Returns an error if state is not ProcessProbeBatchState. fn try_as_process_probe_batch_mut(&mut self) -> Result<&mut ProcessProbeBatchState> { @@ -1087,6 +1123,7 @@ struct ProcessProbeBatchState { } impl ProcessProbeBatchState { + #[tracing::instrument(level = "trace", skip(self, offset, joined_probe_idx))] fn advance(&mut self, offset: JoinHashMapOffset, joined_probe_idx: Option) { self.offset = offset; if joined_probe_idx.is_some() { @@ -1135,11 +1172,13 @@ struct HashJoinStream { } impl RecordBatchStream for HashJoinStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } } +#[tracing::instrument(level = "trace", skip(build_hashmap, build_input_buffer, probe_batch, build_on, probe_on, null_equals_null, hashes_buffer, limit, offset))] /// Executes lookups by hash against JoinHashMap and resolves potential /// hash collisions. /// Returns build/probe indices satisfying the equality condition, along with @@ -1232,6 +1271,7 @@ fn lookup_join_hashmap( } // version of eq_dyn supporting equality on null arrays +#[tracing::instrument(level = "trace", skip(left, right, null_equals_null))] fn eq_dyn_null( left: &dyn Array, right: &dyn Array, @@ -1243,6 +1283,7 @@ fn eq_dyn_null( } } +#[tracing::instrument(level = "trace", skip(indices_left, indices_right, left_arrays, right_arrays, null_equals_null))] pub fn equal_rows_arr( indices_left: &UInt64Array, indices_right: &UInt32Array, @@ -1285,6 +1326,7 @@ pub fn equal_rows_arr( )) } +#[tracing::instrument(level = "trace", skip(shared_bitmap, join_type))] fn get_final_indices_from_shared_bitmap( shared_bitmap: &SharedBitmapBuilder, join_type: JoinType, @@ -1294,6 +1336,7 @@ fn get_final_indices_from_shared_bitmap( } impl HashJoinStream { + #[tracing::instrument(level = "trace", skip(self, cx))] /// Separate implementation function that unpins the [`HashJoinStream`] so /// that partial borrows work correctly fn poll_next_impl( @@ -1319,6 +1362,7 @@ impl HashJoinStream { } } + #[tracing::instrument(level = "trace", skip(self, cx))] /// Collects build-side data by polling `OnceFut` future from initialized build-side /// /// Updates build-side to `Ready`, and state to `FetchProbeSide` @@ -1341,6 +1385,7 @@ impl HashJoinStream { Poll::Ready(Ok(StatefulStreamResult::Continue)) } + #[tracing::instrument(level = "trace", skip(self, cx))] /// Fetches next batch from probe-side /// /// If non-empty batch has been fetched, updates state to `ProcessProbeBatchState`, @@ -1381,6 +1426,7 @@ impl HashJoinStream { Poll::Ready(Ok(StatefulStreamResult::Continue)) } + #[tracing::instrument(level = "trace", skip(self))] /// Joins current probe batch with build-side data and produces batch with matched output /// /// Updates state to `FetchProbeBatch` @@ -1492,6 +1538,7 @@ impl HashJoinStream { Ok(StatefulStreamResult::Ready(Some(result))) } + #[tracing::instrument(level = "trace", skip(self))] /// Processes unmatched build-side rows for certain join types and produces output batch /// /// Updates state to `Completed` @@ -1546,6 +1593,7 @@ impl HashJoinStream { impl Stream for HashJoinStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: std::pin::Pin<&mut Self>, cx: &mut std::task::Context<'_>, @@ -1578,19 +1626,23 @@ mod tests { use rstest::*; use rstest_reuse::*; + #[tracing::instrument(level = "trace", skip(a, b))] fn div_ceil(a: usize, b: usize) -> usize { (a + b - 1) / b } + #[tracing::instrument(level = "trace", skip(batch_size))] #[template] #[rstest] fn batch_sizes(#[values(8192, 10, 5, 2, 1)] batch_size: usize) {} + #[tracing::instrument(level = "trace", skip(batch_size))] fn prepare_task_ctx(batch_size: usize) -> Arc { let session_config = SessionConfig::default().with_batch_size(batch_size); Arc::new(TaskContext::default().with_session_config(session_config)) } + #[tracing::instrument(level = "trace", skip(a, b, c))] fn build_table( a: (&str, &Vec), b: (&str, &Vec), @@ -1601,6 +1653,7 @@ mod tests { Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) } + #[tracing::instrument(level = "trace", skip(left, right, on, join_type, null_equals_null))] fn join( left: Arc, right: Arc, @@ -1620,6 +1673,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip(left, right, on, filter, join_type, null_equals_null))] fn join_with_filter( left: Arc, right: Arc, @@ -1640,6 +1694,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip(left, right, on, join_type, null_equals_null, context))] async fn join_collect( left: Arc, right: Arc, @@ -1657,6 +1712,7 @@ mod tests { Ok((columns_header, batches)) } + #[tracing::instrument(level = "trace", skip(left, right, on, join_type, null_equals_null, context))] async fn partitioned_join_collect( left: Arc, right: Arc, @@ -1677,6 +1733,7 @@ mod tests { .await } + #[tracing::instrument(level = "trace", skip(left, right, on, join_type, partition_mode, null_equals_null, context))] async fn join_collect_with_partition_mode( left: Arc, right: Arc, @@ -2168,6 +2225,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(a, b, c))] fn build_table_two_batches( a: (&str, &Vec), b: (&str, &Vec), @@ -2429,6 +2487,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] fn build_semi_anti_left_table() -> Arc { // just two line match // b1 = 10 @@ -2439,6 +2498,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip())] fn build_semi_anti_right_table() -> Arc { // just two line match // b2 = 10 @@ -3185,6 +3245,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] fn prepare_join_filter() -> JoinFilter { let column_indices = vec![ ColumnIndex { @@ -3863,6 +3924,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(schema))] /// Returns the column names on the schema fn columns(schema: &Schema) -> Vec { schema.fields().iter().map(|f| f.name().clone()).collect() diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index 47e262c3c8f6d..22ce6d8d80ea1 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -74,6 +74,7 @@ struct JoinLeftData { } impl JoinLeftData { + #[tracing::instrument(level = "trace", skip(batch, bitmap, probe_threads_counter, reservation))] fn new( batch: RecordBatch, bitmap: SharedBitmapBuilder, @@ -88,14 +89,17 @@ impl JoinLeftData { } } + #[tracing::instrument(level = "trace", skip(self))] fn batch(&self) -> &RecordBatch { &self.batch } + #[tracing::instrument(level = "trace", skip(self))] fn bitmap(&self) -> &SharedBitmapBuilder { &self.bitmap } + #[tracing::instrument(level = "trace", skip(self))] /// Decrements counter of running threads, and returns `true` /// if caller is the last running thread fn report_probe_completed(&self) -> bool { @@ -161,6 +165,7 @@ pub struct NestedLoopJoinExec { } impl NestedLoopJoinExec { + #[tracing::instrument(level = "trace", skip(left, right, filter, join_type))] /// Try to create a nwe [`NestedLoopJoinExec`] pub fn try_new( left: Arc, @@ -189,26 +194,31 @@ impl NestedLoopJoinExec { }) } + #[tracing::instrument(level = "trace", skip(self))] /// left side pub fn left(&self) -> &Arc { &self.left } + #[tracing::instrument(level = "trace", skip(self))] /// right side pub fn right(&self) -> &Arc { &self.right } + #[tracing::instrument(level = "trace", skip(self))] /// Filters applied before join output pub fn filter(&self) -> Option<&JoinFilter> { self.filter.as_ref() } + #[tracing::instrument(level = "trace", skip(self))] /// How the join is performed pub fn join_type(&self) -> &JoinType { &self.join_type } + #[tracing::instrument(level = "trace", skip(left, right, schema, join_type))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( left: &Arc, @@ -255,6 +265,7 @@ impl NestedLoopJoinExec { } impl DisplayAs for NestedLoopJoinExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { @@ -273,18 +284,22 @@ impl DisplayAs for NestedLoopJoinExec { } impl ExecutionPlan for NestedLoopJoinExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "NestedLoopJoinExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { vec![ Distribution::SinglePartition, @@ -292,10 +307,12 @@ impl ExecutionPlan for NestedLoopJoinExec { ] } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.left.clone(), self.right.clone()] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -308,6 +325,7 @@ impl ExecutionPlan for NestedLoopJoinExec { )?)) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -344,10 +362,12 @@ impl ExecutionPlan for NestedLoopJoinExec { })) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { estimate_join_statistics( self.left.clone(), @@ -359,6 +379,7 @@ impl ExecutionPlan for NestedLoopJoinExec { } } +#[tracing::instrument(level = "trace", skip(input, context, join_metrics, reservation, with_visited_left_side, probe_threads_count))] /// Asynchronously collect input into a single batch, and creates `JoinLeftData` from it async fn collect_left_input( input: Arc, @@ -444,6 +465,7 @@ struct NestedLoopJoinStream { join_metrics: BuildProbeJoinMetrics, } +#[tracing::instrument(level = "trace", skip(left_row_index, right_batch, left_batch, filter))] fn build_join_indices( left_row_index: usize, right_batch: &RecordBatch, @@ -472,6 +494,7 @@ fn build_join_indices( } impl NestedLoopJoinStream { + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next_impl( &mut self, cx: &mut std::task::Context<'_>, @@ -568,6 +591,7 @@ impl NestedLoopJoinStream { } } +#[tracing::instrument(level = "trace", skip(left_batch, right_batch, join_type, filter, column_indices, schema, visited_left_side))] fn join_left_and_right_batch( left_batch: &RecordBatch, right_batch: &RecordBatch, @@ -634,6 +658,7 @@ fn join_left_and_right_batch( } } +#[tracing::instrument(level = "trace", skip(shared_bitmap, join_type))] fn get_final_indices_from_shared_bitmap( shared_bitmap: &SharedBitmapBuilder, join_type: JoinType, @@ -645,6 +670,7 @@ fn get_final_indices_from_shared_bitmap( impl Stream for NestedLoopJoinStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: std::pin::Pin<&mut Self>, cx: &mut std::task::Context<'_>, @@ -654,6 +680,7 @@ impl Stream for NestedLoopJoinStream { } impl RecordBatchStream for NestedLoopJoinStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } @@ -675,6 +702,7 @@ mod tests { use datafusion_physical_expr::expressions::{BinaryExpr, Literal}; use datafusion_physical_expr::PhysicalExpr; + #[tracing::instrument(level = "trace", skip(a, b, c))] fn build_table( a: (&str, &Vec), b: (&str, &Vec), @@ -685,6 +713,7 @@ mod tests { Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) } + #[tracing::instrument(level = "trace", skip())] fn build_left_table() -> Arc { build_table( ("a1", &vec![5, 9, 11]), @@ -693,6 +722,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip())] fn build_right_table() -> Arc { build_table( ("a2", &vec![12, 2, 10]), @@ -701,6 +731,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip())] fn prepare_join_filter() -> JoinFilter { let column_indices = vec![ ColumnIndex { @@ -745,6 +776,7 @@ mod tests { JoinFilter::new(filter_expression, column_indices, intermediate_schema) } + #[tracing::instrument(level = "trace", skip(left, right, join_type, join_filter, context))] async fn multi_partitioned_join_collect( left: Arc, right: Arc, @@ -1070,6 +1102,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(schema))] /// Returns the column names on the schema fn columns(schema: &Schema) -> Vec { schema.fields().iter().map(|f| f.name().clone()).collect() diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index 1cc7bf4700d1f..b58bc56f5c26e 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -88,6 +88,7 @@ pub struct SortMergeJoinExec { } impl SortMergeJoinExec { + #[tracing::instrument(level = "trace", skip(left, right, on, filter, join_type, sort_options, null_equals_null))] /// Tries to create a new [SortMergeJoinExec]. /// The inputs are sorted using `sort_options` are applied to the columns in the `on` /// # Error @@ -155,6 +156,7 @@ impl SortMergeJoinExec { }) } + #[tracing::instrument(level = "trace", skip(join_type))] /// Get probe side (e.g streaming side) information for this sort merge join. /// In current implementation, probe side is determined according to join type. pub fn probe_side(join_type: &JoinType) -> JoinSide { @@ -172,6 +174,7 @@ impl SortMergeJoinExec { } } + #[tracing::instrument(level = "trace", skip(join_type))] /// Calculate order preservation flags for this sort merge join. fn maintains_input_order(join_type: JoinType) -> Vec { match join_type { @@ -184,23 +187,28 @@ impl SortMergeJoinExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// Set of common columns used to join on pub fn on(&self) -> &[(PhysicalExprRef, PhysicalExprRef)] { &self.on } + #[tracing::instrument(level = "trace", skip(self))] pub fn right(&self) -> &Arc { &self.right } + #[tracing::instrument(level = "trace", skip(self))] pub fn join_type(&self) -> JoinType { self.join_type } + #[tracing::instrument(level = "trace", skip(self))] pub fn left(&self) -> &Arc { &self.left } + #[tracing::instrument(level = "trace", skip(left, right, schema, join_type, join_on))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( left: &Arc, @@ -237,6 +245,7 @@ impl SortMergeJoinExec { } impl DisplayAs for SortMergeJoinExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result { let display_filter = self.filter.as_ref().map_or_else( || "".to_string(), @@ -262,18 +271,22 @@ impl DisplayAs for SortMergeJoinExec { } impl ExecutionPlan for SortMergeJoinExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "SortMergeJoinExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { let (left_expr, right_expr) = self.on.iter().map(|(l, r)| (l.clone(), r.clone())).unzip(); @@ -283,6 +296,7 @@ impl ExecutionPlan for SortMergeJoinExec { ] } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_ordering(&self) -> Vec>> { vec![ Some(PhysicalSortRequirement::from_sort_exprs( @@ -294,14 +308,17 @@ impl ExecutionPlan for SortMergeJoinExec { ] } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { Self::maintains_input_order(self.join_type) } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.left.clone(), self.right.clone()] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -320,6 +337,7 @@ impl ExecutionPlan for SortMergeJoinExec { } } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -369,10 +387,12 @@ impl ExecutionPlan for SortMergeJoinExec { )?)) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { // TODO stats: it is not possible in general to know the output size of joins // There are some special cases though, for example: @@ -406,6 +426,7 @@ struct SortMergeJoinMetrics { } impl SortMergeJoinMetrics { + #[tracing::instrument(level = "trace", skip(partition, metrics))] #[allow(dead_code)] pub fn new(partition: usize, metrics: &ExecutionPlanMetricsSet) -> Self { let join_time = MetricBuilder::new(metrics).subset_time("join_time", partition); @@ -499,6 +520,7 @@ struct StreamedBatch { } impl StreamedBatch { + #[tracing::instrument(level = "trace", skip(batch, on_column))] fn new(batch: RecordBatch, on_column: &[Arc]) -> Self { let join_arrays = join_arrays(&batch, on_column); StreamedBatch { @@ -511,6 +533,7 @@ impl StreamedBatch { } } + #[tracing::instrument(level = "trace", skip(schema))] fn new_empty(schema: SchemaRef) -> Self { StreamedBatch { batch: RecordBatch::new_empty(schema), @@ -522,6 +545,7 @@ impl StreamedBatch { } } + #[tracing::instrument(level = "trace", skip(self, buffered_batch_idx, buffered_idx))] /// Appends new pair consisting of current streamed index and `buffered_idx` /// index of buffered batch with `buffered_batch_idx` index. fn append_output_pair( @@ -568,6 +592,7 @@ struct BufferedBatch { } impl BufferedBatch { + #[tracing::instrument(level = "trace", skip(batch, range, on_column))] fn new( batch: RecordBatch, range: Range, @@ -656,6 +681,7 @@ struct SMJStream { } impl RecordBatchStream for SMJStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } @@ -664,6 +690,7 @@ impl RecordBatchStream for SMJStream { impl Stream for SMJStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -758,6 +785,7 @@ impl Stream for SMJStream { } impl SMJStream { + #[tracing::instrument(level = "trace", skip(schema, sort_options, null_equals_null, streamed, buffered, on_streamed, on_buffered, filter, join_type, batch_size, join_metrics, reservation))] #[allow(clippy::too_many_arguments)] pub fn try_new( schema: SchemaRef, @@ -803,6 +831,7 @@ impl SMJStream { }) } + #[tracing::instrument(level = "trace", skip(self, cx))] /// Poll next streamed row fn poll_streamed_row(&mut self, cx: &mut Context) -> Poll>> { loop { @@ -845,6 +874,7 @@ impl SMJStream { } } + #[tracing::instrument(level = "trace", skip(self, cx))] /// Poll next buffered batches fn poll_buffered_batches(&mut self, cx: &mut Context) -> Poll>> { loop { @@ -954,6 +984,7 @@ impl SMJStream { } } + #[tracing::instrument(level = "trace", skip(self))] /// Get comparison result of streamed row and buffered batches fn compare_streamed_buffered(&self) -> Result { if self.streamed_state == StreamedState::Exhausted { @@ -973,6 +1004,7 @@ impl SMJStream { ); } + #[tracing::instrument(level = "trace", skip(self))] /// Produce join and fill output buffer until reaching target batch size /// or the join is finished fn join_partial(&mut self) -> Result<()> { @@ -1078,6 +1110,7 @@ impl SMJStream { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] fn freeze_all(&mut self) -> Result<()> { self.freeze_streamed()?; self.freeze_buffered(self.buffered_data.batches.len())?; @@ -1088,6 +1121,7 @@ impl SMJStream { // no longer needed: // 1. freezes all indices joined to streamed side // 2. freezes NULLs joined to dequeued buffered batch to "release" it + #[tracing::instrument(level = "trace", skip(self))] fn freeze_dequeuing_buffered(&mut self) -> Result<()> { self.freeze_streamed()?; self.freeze_buffered(1)?; @@ -1098,6 +1132,7 @@ impl SMJStream { // NULLs on streamed side. // // Applicable only in case of Full join. + #[tracing::instrument(level = "trace", skip(self, batch_count))] fn freeze_buffered(&mut self, batch_count: usize) -> Result<()> { if !matches!(self.join_type, JoinType::Full) { return Ok(()); @@ -1139,6 +1174,7 @@ impl SMJStream { // Produces and stages record batch for all output indices found // for current streamed batch and clears staged output indices. + #[tracing::instrument(level = "trace", skip(self))] fn freeze_streamed(&mut self) -> Result<()> { for chunk in self.streamed_batch.output_indices.iter_mut() { let streamed_indices = chunk.streamed_indices.finish(); @@ -1355,6 +1391,7 @@ impl SMJStream { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] fn output_record_batch_and_reset(&mut self) -> Result { let record_batch = concat_batches(&self.schema, &self.output_record_batches)?; self.join_metrics.output_batches.add(1); @@ -1374,6 +1411,7 @@ impl SMJStream { } } +#[tracing::instrument(level = "trace", skip(join_filter, streamed_columns, buffered_columns))] /// Gets the arrays which join filters are applied on. fn get_filter_column( join_filter: &Option, @@ -1404,6 +1442,7 @@ fn get_filter_column( filter_columns } +#[tracing::instrument(level = "trace", skip(buffered_data, buffered_batch_idx, buffered_indices))] /// Get `buffered_indices` rows for `buffered_data[buffered_batch_idx]` #[inline(always)] fn get_buffered_columns( @@ -1425,6 +1464,7 @@ fn get_buffered_columns( // true = the row index matches the join filter // false = the row index doesn't match the join filter // `streamed_indices` have the same length as `mask` +#[tracing::instrument(level = "trace", skip(join_type, streamed_indices, mask))] fn get_filtered_join_mask( join_type: JoinType, streamed_indices: UInt64Array, @@ -1479,27 +1519,33 @@ struct BufferedData { } impl BufferedData { + #[tracing::instrument(level = "trace", skip(self))] pub fn head_batch(&self) -> &BufferedBatch { self.batches.front().unwrap() } + #[tracing::instrument(level = "trace", skip(self))] pub fn tail_batch(&self) -> &BufferedBatch { self.batches.back().unwrap() } + #[tracing::instrument(level = "trace", skip(self))] pub fn tail_batch_mut(&mut self) -> &mut BufferedBatch { self.batches.back_mut().unwrap() } + #[tracing::instrument(level = "trace", skip(self))] pub fn has_buffered_rows(&self) -> bool { self.batches.iter().any(|batch| !batch.range.is_empty()) } + #[tracing::instrument(level = "trace", skip(self))] pub fn scanning_reset(&mut self) { self.scanning_batch_idx = 0; self.scanning_offset = 0; } + #[tracing::instrument(level = "trace", skip(self))] pub fn scanning_advance(&mut self) { self.scanning_offset += 1; while !self.scanning_finished() && self.scanning_batch_finished() { @@ -1508,32 +1554,39 @@ impl BufferedData { } } + #[tracing::instrument(level = "trace", skip(self))] pub fn scanning_batch(&self) -> &BufferedBatch { &self.batches[self.scanning_batch_idx] } + #[tracing::instrument(level = "trace", skip(self))] pub fn scanning_batch_mut(&mut self) -> &mut BufferedBatch { &mut self.batches[self.scanning_batch_idx] } + #[tracing::instrument(level = "trace", skip(self))] pub fn scanning_idx(&self) -> usize { self.scanning_batch().range.start + self.scanning_offset } + #[tracing::instrument(level = "trace", skip(self))] pub fn scanning_batch_finished(&self) -> bool { self.scanning_offset == self.scanning_batch().range.len() } + #[tracing::instrument(level = "trace", skip(self))] pub fn scanning_finished(&self) -> bool { self.scanning_batch_idx == self.batches.len() } + #[tracing::instrument(level = "trace", skip(self))] pub fn scanning_finish(&mut self) { self.scanning_batch_idx = self.batches.len(); self.scanning_offset = 0; } } +#[tracing::instrument(level = "trace", skip(batch, on_column))] /// Get join array refs of given batch and join columns fn join_arrays(batch: &RecordBatch, on_column: &[PhysicalExprRef]) -> Vec { on_column @@ -1546,6 +1599,7 @@ fn join_arrays(batch: &RecordBatch, on_column: &[PhysicalExprRef]) -> Vec), b: (&str, &Vec), @@ -1738,11 +1794,13 @@ mod tests { Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) } + #[tracing::instrument(level = "trace", skip(batches))] fn build_table_from_batches(batches: Vec) -> Arc { let schema = batches.first().unwrap().schema(); Arc::new(MemoryExec::try_new(&[batches], schema, None).unwrap()) } + #[tracing::instrument(level = "trace", skip(a, b, c))] fn build_date_table( a: (&str, &Vec), b: (&str, &Vec), @@ -1768,6 +1826,7 @@ mod tests { Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) } + #[tracing::instrument(level = "trace", skip(a, b, c))] fn build_date64_table( a: (&str, &Vec), b: (&str, &Vec), @@ -1793,6 +1852,7 @@ mod tests { Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) } + #[tracing::instrument(level = "trace", skip(a, b, c))] /// returns a table with 3 columns of i32 in memory pub fn build_table_i32_nullable( a: (&str, &Vec>), @@ -1816,6 +1876,7 @@ mod tests { Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) } + #[tracing::instrument(level = "trace", skip(left, right, on, join_type))] fn join( left: Arc, right: Arc, @@ -1826,6 +1887,7 @@ mod tests { SortMergeJoinExec::try_new(left, right, on, None, join_type, sort_options, false) } + #[tracing::instrument(level = "trace", skip(left, right, on, join_type, sort_options, null_equals_null))] fn join_with_options( left: Arc, right: Arc, @@ -1845,6 +1907,7 @@ mod tests { ) } + #[tracing::instrument(level = "trace", skip(left, right, on, join_type))] async fn join_collect( left: Arc, right: Arc, @@ -1855,6 +1918,7 @@ mod tests { join_collect_with_options(left, right, on, join_type, sort_options, false).await } + #[tracing::instrument(level = "trace", skip(left, right, on, join_type, sort_options, null_equals_null))] async fn join_collect_with_options( left: Arc, right: Arc, @@ -1879,6 +1943,7 @@ mod tests { Ok((columns, batches)) } + #[tracing::instrument(level = "trace", skip(left, right, on, join_type))] async fn join_collect_batch_size_equals_two( left: Arc, right: Arc, @@ -2813,6 +2878,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(schema))] /// Returns the column names on the schema fn columns(schema: &Schema) -> Vec { schema.fields().iter().map(|f| f.name().clone()).collect() diff --git a/datafusion/physical-plan/src/joins/stream_join_utils.rs b/datafusion/physical-plan/src/joins/stream_join_utils.rs index f19eb30313e68..373948b97b9a5 100644 --- a/datafusion/physical-plan/src/joins/stream_join_utils.rs +++ b/datafusion/physical-plan/src/joins/stream_join_utils.rs @@ -53,20 +53,24 @@ impl JoinHashMapType for PruningJoinHashMap { type NextType = VecDeque; // Extend with zero + #[tracing::instrument(level = "trace", skip(self, len))] fn extend_zero(&mut self, len: usize) { self.next.resize(self.next.len() + len, 0) } + #[tracing::instrument(level = "trace", skip(self))] /// Get mutable references to the hash map and the next. fn get_mut(&mut self) -> (&mut RawTable<(u64, u64)>, &mut Self::NextType) { (&mut self.map, &mut self.next) } + #[tracing::instrument(level = "trace", skip(self))] /// Get a reference to the hash map. fn get_map(&self) -> &RawTable<(u64, u64)> { &self.map } + #[tracing::instrument(level = "trace", skip(self))] /// Get a reference to the next. fn get_list(&self) -> &Self::NextType { &self.next @@ -116,6 +120,7 @@ pub struct PruningJoinHashMap { } impl PruningJoinHashMap { + #[tracing::instrument(level = "trace", skip(capacity))] /// Constructs a new `PruningJoinHashMap` with the given capacity. /// Both the map and the list are pre-allocated with the provided capacity. /// @@ -131,6 +136,7 @@ impl PruningJoinHashMap { } } + #[tracing::instrument(level = "trace", skip(self, scale_factor))] /// Shrinks the capacity of the hash map, if necessary, based on the /// provided scale factor. /// @@ -154,6 +160,7 @@ impl PruningJoinHashMap { } } + #[tracing::instrument(level = "trace", skip(self))] /// Calculates the size of the `PruningJoinHashMap` in bytes. /// /// # Returns @@ -163,6 +170,7 @@ impl PruningJoinHashMap { + self.next.capacity() * std::mem::size_of::() } + #[tracing::instrument(level = "trace", skip(self, prune_length, deleting_offset, shrink_factor))] /// Removes hash values from the map and the list based on the given pruning /// length and deleting offset. /// @@ -203,6 +211,7 @@ impl PruningJoinHashMap { } } +#[tracing::instrument(level = "trace", skip(expr, reference))] fn check_filter_expr_contains_sort_information( expr: &Arc, reference: &Arc, @@ -214,6 +223,7 @@ fn check_filter_expr_contains_sort_information( .any(|e| check_filter_expr_contains_sort_information(e, reference)) } +#[tracing::instrument(level = "trace", skip(filter, schema, side))] /// Create a one to one mapping from main columns to filter columns using /// filter column indices. A column index looks like: /// ```text @@ -246,6 +256,7 @@ pub fn map_origin_col_to_filter_col( Ok(col_to_col_map) } +#[tracing::instrument(level = "trace", skip(side, filter, schema, sort_expr))] /// This function analyzes [`PhysicalSortExpr`] graphs with respect to monotonicity /// (sorting) properties. This is necessary since monotonically increasing and/or /// decreasing expressions are required when using join filter expressions for @@ -306,6 +317,7 @@ pub fn convert_sort_expr_with_filter_schema( Ok(None) } +#[tracing::instrument(level = "trace", skip(side, filter, schema, order))] /// This function is used to build the filter expression based on the sort order of input columns. /// /// It first calls the [`convert_sort_expr_with_filter_schema`] method to determine if the sort @@ -329,6 +341,7 @@ pub fn build_filter_input_order( .transpose() } +#[tracing::instrument(level = "trace", skip(input, column_map))] /// Convert a physical expression into a filter expression using the given /// column mapping information. fn convert_filter_columns( @@ -368,6 +381,7 @@ pub struct SortedFilterExpr { } impl SortedFilterExpr { + #[tracing::instrument(level = "trace", skip(origin_sorted_expr, filter_expr, filter_schema))] /// Constructor pub fn try_new( origin_sorted_expr: PhysicalSortExpr, @@ -382,32 +396,39 @@ impl SortedFilterExpr { node_index: 0, }) } + #[tracing::instrument(level = "trace", skip(self))] /// Get origin expr information pub fn origin_sorted_expr(&self) -> &PhysicalSortExpr { &self.origin_sorted_expr } + #[tracing::instrument(level = "trace", skip(self))] /// Get filter expr information pub fn filter_expr(&self) -> &Arc { &self.filter_expr } + #[tracing::instrument(level = "trace", skip(self))] /// Get interval information pub fn interval(&self) -> &Interval { &self.interval } + #[tracing::instrument(level = "trace", skip(self, interval))] /// Sets interval pub fn set_interval(&mut self, interval: Interval) { self.interval = interval; } + #[tracing::instrument(level = "trace", skip(self))] /// Node index in ExprIntervalGraph pub fn node_index(&self) -> usize { self.node_index } + #[tracing::instrument(level = "trace", skip(self, node_index))] /// Node index setter in ExprIntervalGraph pub fn set_node_index(&mut self, node_index: usize) { self.node_index = node_index; } } +#[tracing::instrument(level = "trace", skip(build_input_buffer, build_sorted_filter_expr, probe_batch, probe_sorted_filter_expr))] /// Calculate the filter expression intervals. /// /// This function updates the `interval` field of each `SortedFilterExpr` based @@ -490,6 +511,7 @@ pub fn calculate_filter_expr_intervals( ) } +#[tracing::instrument(level = "trace", skip(batch, sorted_expr))] /// This is a subroutine of the function [`calculate_filter_expr_intervals`]. /// It constructs the current interval using the given `batch` and updates /// the filter expression (i.e. `sorted_expr`) with this interval. @@ -518,6 +540,7 @@ pub fn update_filter_expr_interval( Ok(()) } +#[tracing::instrument(level = "trace", skip(prune_length, deleted_offset, visited_rows))] /// Get the anti join indices from the visited hash set. /// /// This method returns the indices from the original input that were not present in the visited hash set. @@ -552,6 +575,7 @@ where .collect() } +#[tracing::instrument(level = "trace", skip(prune_length, deleted_offset, visited_rows))] /// This method creates a boolean buffer from the visited rows hash set /// and the indices of the pruned record batch slice. /// @@ -587,6 +611,7 @@ where .collect() } +#[tracing::instrument(level = "trace", skip(output_schema, left_batch, right_batch))] pub fn combine_two_batches( output_schema: &SchemaRef, left_batch: Option, @@ -610,6 +635,7 @@ pub fn combine_two_batches( } } +#[tracing::instrument(level = "trace", skip(visited, offset, indices))] /// Records the visited indices from the input `PrimitiveArray` of type `T` into the given hash set `visited`. /// This function will insert the indices (offset by `offset`) into the `visited` hash set. /// @@ -999,6 +1025,7 @@ pub struct StreamJoinMetrics { } impl StreamJoinMetrics { + #[tracing::instrument(level = "trace", skip(partition, metrics))] pub fn new(partition: usize, metrics: &ExecutionPlanMetricsSet) -> Self { let input_batches = MetricBuilder::new(metrics).counter("input_batches", partition); @@ -1034,6 +1061,7 @@ impl StreamJoinMetrics { } } +#[tracing::instrument(level = "trace", skip(graph, sorted_exprs))] /// Updates sorted filter expressions with corresponding node indices from the /// expression interval graph. /// @@ -1061,6 +1089,7 @@ fn update_sorted_exprs_with_node_indices( } } +#[tracing::instrument(level = "trace", skip(filter, left, right, left_sort_exprs, right_sort_exprs))] /// Prepares and sorts expressions based on a given filter, left and right execution plans, and sort expressions. /// /// # Arguments diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 9d48c2a7d408b..548be7031431d 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -194,6 +194,7 @@ pub struct SymmetricHashJoinExec { } impl SymmetricHashJoinExec { + #[tracing::instrument(level = "trace", skip(left, right, on, filter, join_type, null_equals_null, left_sort_exprs, right_sort_exprs, mode))] /// Tries to create a new [SymmetricHashJoinExec]. /// # Error /// This function errors when: @@ -251,6 +252,7 @@ impl SymmetricHashJoinExec { }) } + #[tracing::instrument(level = "trace", skip(left, right, schema, join_type, join_on))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( left: &Arc, @@ -286,51 +288,61 @@ impl SymmetricHashJoinExec { PlanProperties::new(eq_properties, output_partitioning, mode) } + #[tracing::instrument(level = "trace", skip(self))] /// left stream pub fn left(&self) -> &Arc { &self.left } + #[tracing::instrument(level = "trace", skip(self))] /// right stream pub fn right(&self) -> &Arc { &self.right } + #[tracing::instrument(level = "trace", skip(self))] /// Set of common columns used to join on pub fn on(&self) -> &[(PhysicalExprRef, PhysicalExprRef)] { &self.on } + #[tracing::instrument(level = "trace", skip(self))] /// Filters applied before join output pub fn filter(&self) -> Option<&JoinFilter> { self.filter.as_ref() } + #[tracing::instrument(level = "trace", skip(self))] /// How the join is performed pub fn join_type(&self) -> &JoinType { &self.join_type } + #[tracing::instrument(level = "trace", skip(self))] /// Get null_equals_null pub fn null_equals_null(&self) -> bool { self.null_equals_null } + #[tracing::instrument(level = "trace", skip(self))] /// Get partition mode pub fn partition_mode(&self) -> StreamJoinPartitionMode { self.mode } + #[tracing::instrument(level = "trace", skip(self))] /// Get left_sort_exprs pub fn left_sort_exprs(&self) -> Option<&[PhysicalSortExpr]> { self.left_sort_exprs.as_deref() } + #[tracing::instrument(level = "trace", skip(self))] /// Get right_sort_exprs pub fn right_sort_exprs(&self) -> Option<&[PhysicalSortExpr]> { self.right_sort_exprs.as_deref() } + #[tracing::instrument(level = "trace", skip(self))] /// Check if order information covers every column in the filter expression. pub fn check_if_order_information_available(&self) -> Result { if let Some(filter) = self.filter() { @@ -361,6 +373,7 @@ impl SymmetricHashJoinExec { } impl DisplayAs for SymmetricHashJoinExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { @@ -385,18 +398,22 @@ impl DisplayAs for SymmetricHashJoinExec { } impl ExecutionPlan for SymmetricHashJoinExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "SymmetricHashJoinExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { match self.mode { StreamJoinPartitionMode::Partitioned => { @@ -416,6 +433,7 @@ impl ExecutionPlan for SymmetricHashJoinExec { } } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_ordering(&self) -> Vec>> { vec![ self.left_sort_exprs @@ -427,10 +445,12 @@ impl ExecutionPlan for SymmetricHashJoinExec { ] } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.left.clone(), self.right.clone()] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -448,15 +468,18 @@ impl ExecutionPlan for SymmetricHashJoinExec { )?)) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { // TODO stats: it is not possible in general to know the output size of joins Ok(Statistics::new_unknown(&self.schema())) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -564,6 +587,7 @@ struct SymmetricHashJoinStream { } impl RecordBatchStream for SymmetricHashJoinStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } @@ -572,6 +596,7 @@ impl RecordBatchStream for SymmetricHashJoinStream { impl Stream for SymmetricHashJoinStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: std::pin::Pin<&mut Self>, cx: &mut std::task::Context<'_>, @@ -580,6 +605,7 @@ impl Stream for SymmetricHashJoinStream { } } +#[tracing::instrument(level = "trace", skip(buffer, build_side_filter_expr))] /// Determine the pruning length for `buffer`. /// /// This function evaluates the build side filter expression, converts the @@ -621,6 +647,7 @@ fn determine_prune_length( bisect::(&[batch_arr], &[target], &[origin_sorted_expr.options]) } +#[tracing::instrument(level = "trace", skip(build_side, join_type))] /// This method determines if the result of the join should be produced in the final step or not. /// /// # Arguments @@ -649,6 +676,7 @@ fn need_to_produce_result_in_final(build_side: JoinSide, join_type: JoinType) -> } } +#[tracing::instrument(level = "trace", skip(build_side, prune_length, visited_rows, deleted_offset, join_type))] /// Calculate indices by join type. /// /// This method returns a tuple of two arrays: build and probe indices. @@ -706,6 +734,7 @@ where Ok(result) } +#[tracing::instrument(level = "trace", skip(build_hash_joiner, output_schema, prune_length, probe_schema, join_type, column_indices))] /// This function produces unmatched record results based on the build side, /// join type and other parameters. /// @@ -763,6 +792,7 @@ pub(crate) fn build_side_determined_results( } } +#[tracing::instrument(level = "trace", skip(build_hash_joiner, probe_hash_joiner, schema, join_type, filter, probe_batch, column_indices, random_state, null_equals_null))] /// This method performs a join between the build side input buffer and the probe side batch. /// /// # Arguments @@ -858,6 +888,7 @@ pub(crate) fn join_with_probe_batch( } } +#[tracing::instrument(level = "trace", skip(build_hashmap, build_batch, probe_batch, build_on, probe_on, random_state, null_equals_null, hashes_buffer, deleted_offset))] /// This method performs lookups against JoinHashMap by hash values of join-key columns, and handles potential /// hash collisions. /// @@ -974,6 +1005,7 @@ pub struct OneSideHashJoiner { } impl OneSideHashJoiner { + #[tracing::instrument(level = "trace", skip(self))] pub fn size(&self) -> usize { let mut size = 0; size += std::mem::size_of_val(self); @@ -987,6 +1019,7 @@ impl OneSideHashJoiner { size += std::mem::size_of_val(&self.deleted_offset); size } + #[tracing::instrument(level = "trace", skip(build_side, on, schema))] pub fn new( build_side: JoinSide, on: Vec, @@ -1004,6 +1037,7 @@ impl OneSideHashJoiner { } } + #[tracing::instrument(level = "trace", skip(self, batch, random_state))] /// Updates the internal state of the [OneSideHashJoiner] with the incoming batch. /// /// # Arguments @@ -1038,6 +1072,7 @@ impl OneSideHashJoiner { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, build_side_sorted_filter_expr, probe_side_sorted_filter_expr, graph))] /// Calculate prune length. /// /// # Arguments @@ -1082,6 +1117,7 @@ impl OneSideHashJoiner { determine_prune_length(&self.input_buffer, build_side_sorted_filter_expr) } + #[tracing::instrument(level = "trace", skip(self, prune_length))] pub(crate) fn prune_internal_state(&mut self, prune_length: usize) -> Result<()> { // Prune the hash values: self.hashmap.prune_hash_values( @@ -1104,6 +1140,7 @@ impl OneSideHashJoiner { } impl EagerJoinStream for SymmetricHashJoinStream { + #[tracing::instrument(level = "trace", skip(self, batch))] fn process_batch_from_right( &mut self, batch: RecordBatch, @@ -1118,6 +1155,7 @@ impl EagerJoinStream for SymmetricHashJoinStream { }) } + #[tracing::instrument(level = "trace", skip(self, batch))] fn process_batch_from_left( &mut self, batch: RecordBatch, @@ -1132,6 +1170,7 @@ impl EagerJoinStream for SymmetricHashJoinStream { }) } + #[tracing::instrument(level = "trace", skip(self, right_batch))] fn process_batch_after_left_end( &mut self, right_batch: RecordBatch, @@ -1139,6 +1178,7 @@ impl EagerJoinStream for SymmetricHashJoinStream { self.process_batch_from_right(right_batch) } + #[tracing::instrument(level = "trace", skip(self, left_batch))] fn process_batch_after_right_end( &mut self, left_batch: RecordBatch, @@ -1146,6 +1186,7 @@ impl EagerJoinStream for SymmetricHashJoinStream { self.process_batch_from_left(left_batch) } + #[tracing::instrument(level = "trace", skip(self))] fn process_batches_before_finalization( &mut self, ) -> Result>> { @@ -1181,24 +1222,29 @@ impl EagerJoinStream for SymmetricHashJoinStream { Ok(StatefulStreamResult::Continue) } + #[tracing::instrument(level = "trace", skip(self))] fn right_stream(&mut self) -> &mut SendableRecordBatchStream { &mut self.right_stream } + #[tracing::instrument(level = "trace", skip(self))] fn left_stream(&mut self) -> &mut SendableRecordBatchStream { &mut self.left_stream } + #[tracing::instrument(level = "trace", skip(self, state))] fn set_state(&mut self, state: EagerJoinStreamState) { self.state = state; } + #[tracing::instrument(level = "trace", skip(self))] fn state(&mut self) -> EagerJoinStreamState { self.state.clone() } } impl SymmetricHashJoinStream { + #[tracing::instrument(level = "trace", skip(self))] fn size(&self) -> usize { let mut size = 0; size += std::mem::size_of_val(&self.schema); @@ -1216,6 +1262,7 @@ impl SymmetricHashJoinStream { size } + #[tracing::instrument(level = "trace", skip(self, probe_batch, probe_side))] /// Performs a join operation for the specified `probe_side` (either left or right). /// This function: /// 1. Determines which side is the probe and which is the build side. @@ -1353,6 +1400,7 @@ mod tests { static TABLE_CACHE: Lazy>> = Lazy::new(|| Mutex::new(HashMap::new())); + #[tracing::instrument(level = "trace", skip(cardinality, batch_size))] fn get_or_create_table( cardinality: (i32, i32), batch_size: usize, @@ -1385,6 +1433,7 @@ mod tests { Ok((left_partition, right_partition)) } + #[tracing::instrument(level = "trace", skip(left, right, filter, join_type, on, task_ctx))] pub async fn experiment( left: Arc, right: Arc, @@ -1411,6 +1460,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(join_type, cardinality))] #[rstest] #[tokio::test(flavor = "multi_thread")] async fn complex_join_all_one_ascending_numeric( @@ -1495,6 +1545,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(join_type, case_expr))] #[rstest] #[tokio::test(flavor = "multi_thread")] async fn join_all_one_ascending_numeric( @@ -1562,6 +1613,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(join_type, case_expr))] #[rstest] #[tokio::test(flavor = "multi_thread")] async fn join_without_sort_information( @@ -1616,6 +1668,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(join_type))] #[rstest] #[tokio::test(flavor = "multi_thread")] async fn join_without_filter( @@ -1646,6 +1699,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(join_type, case_expr))] #[rstest] #[tokio::test(flavor = "multi_thread")] async fn join_all_one_descending_numeric_particular( @@ -1717,6 +1771,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] #[tokio::test(flavor = "multi_thread")] async fn build_null_columns_first() -> Result<()> { let join_type = JoinType::Full; @@ -1777,6 +1832,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] #[tokio::test(flavor = "multi_thread")] async fn build_null_columns_last() -> Result<()> { let join_type = JoinType::Full; @@ -1839,6 +1895,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] #[tokio::test(flavor = "multi_thread")] async fn build_null_columns_first_descending() -> Result<()> { let join_type = JoinType::Full; @@ -1902,6 +1959,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] #[tokio::test(flavor = "multi_thread")] async fn complex_join_all_one_ascending_numeric_missing_stat() -> Result<()> { let cardinality = (3, 4); @@ -1962,6 +2020,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] #[tokio::test(flavor = "multi_thread")] async fn complex_join_all_one_ascending_equivalence() -> Result<()> { let cardinality = (3, 4); @@ -2029,6 +2088,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(join_type, cardinality, case_expr))] #[rstest] #[tokio::test(flavor = "multi_thread")] async fn testing_with_temporal_columns( @@ -2114,6 +2174,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(join_type, cardinality))] #[rstest] #[tokio::test(flavor = "multi_thread")] async fn test_with_interval_columns( @@ -2191,6 +2252,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(join_type, cardinality, case_expr))] #[rstest] #[tokio::test(flavor = "multi_thread")] async fn testing_ascending_float_pruning( diff --git a/datafusion/physical-plan/src/joins/test_utils.rs b/datafusion/physical-plan/src/joins/test_utils.rs index 6fb3aef5d5bfd..0c3e7634bf21a 100644 --- a/datafusion/physical-plan/src/joins/test_utils.rs +++ b/datafusion/physical-plan/src/joins/test_utils.rs @@ -46,6 +46,7 @@ use datafusion_physical_expr::{LexOrdering, PhysicalExpr}; use rand::prelude::StdRng; use rand::{Rng, SeedableRng}; +#[tracing::instrument(level = "trace", skip(collected_1, collected_2))] pub fn compare_batches(collected_1: &[RecordBatch], collected_2: &[RecordBatch]) { // compare let first_formatted = pretty_format_batches(collected_1).unwrap().to_string(); @@ -67,6 +68,7 @@ pub fn compare_batches(collected_1: &[RecordBatch], collected_2: &[RecordBatch]) } } +#[tracing::instrument(level = "trace", skip(left, right, on, filter, join_type, null_equals_null, context))] pub async fn partitioned_sym_join_with_filter( left: Arc, right: Arc, @@ -115,6 +117,7 @@ pub async fn partitioned_sym_join_with_filter( Ok(batches) } +#[tracing::instrument(level = "trace", skip(left, right, on, filter, join_type, null_equals_null, context))] pub async fn partitioned_hash_join_with_filter( left: Arc, right: Arc, @@ -162,6 +165,7 @@ pub async fn partitioned_hash_join_with_filter( Ok(batches) } +#[tracing::instrument(level = "trace", skip(batch, batch_size))] pub fn split_record_batches( batch: &RecordBatch, batch_size: usize, @@ -184,6 +188,7 @@ struct AscendingRandomFloatIterator { } impl AscendingRandomFloatIterator { + #[tracing::instrument(level = "trace", skip(min, max))] fn new(min: f64, max: f64) -> Self { let mut rng = StdRng::seed_from_u64(42); let initial = rng.gen_range(min..max); @@ -198,6 +203,7 @@ impl AscendingRandomFloatIterator { impl Iterator for AscendingRandomFloatIterator { type Item = f64; + #[tracing::instrument(level = "trace", skip(self))] fn next(&mut self) -> Option { let value = self.rng.gen_range(self.prev..self.max); self.prev = value; @@ -205,6 +211,7 @@ impl Iterator for AscendingRandomFloatIterator { } } +#[tracing::instrument(level = "trace", skip(expr_id, left_col, right_col, schema))] pub fn join_expr_tests_fixture_temporal( expr_id: usize, left_col: Arc, @@ -404,6 +411,7 @@ macro_rules! join_expr_tests { join_expr_tests!(join_expr_tests_fixture_i32, i32, Int32); join_expr_tests!(join_expr_tests_fixture_f64, f64, Float64); +#[tracing::instrument(level = "trace", skip(table_size, key_cardinality))] pub fn build_sides_record_batches( table_size: i32, key_cardinality: (i32, i32), @@ -500,6 +508,7 @@ pub fn build_sides_record_batches( Ok((left, right)) } +#[tracing::instrument(level = "trace", skip(left_partition, right_partition, left_sorted, right_sorted))] pub fn create_memory_table( left_partition: Vec, right_partition: Vec, @@ -515,6 +524,7 @@ pub fn create_memory_table( Ok((Arc::new(left), Arc::new(right))) } +#[tracing::instrument(level = "trace", skip(filter_schema))] /// Filter expr for a + b > c + 10 AND a + b < c + 100 pub(crate) fn complicated_filter( filter_schema: &Schema, diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs index acf9ed4d7ec87..47b9b96e08ef4 100644 --- a/datafusion/physical-plan/src/joins/utils.rs +++ b/datafusion/physical-plan/src/joins/utils.rs @@ -126,11 +126,13 @@ pub struct JoinHashMap { } impl JoinHashMap { + #[tracing::instrument(level = "trace", skip(map, next))] #[cfg(test)] pub(crate) fn new(map: RawTable<(u64, u64)>, next: Vec) -> Self { Self { map, next } } + #[tracing::instrument(level = "trace", skip(capacity))] pub(crate) fn with_capacity(capacity: usize) -> Self { JoinHashMap { map: RawTable::with_capacity(capacity), @@ -353,18 +355,22 @@ impl JoinHashMapType for JoinHashMap { type NextType = Vec; // Void implementation + #[tracing::instrument(level = "trace", skip(self))] fn extend_zero(&mut self, _: usize) {} + #[tracing::instrument(level = "trace", skip(self))] /// Get mutable references to the hash map and the next. fn get_mut(&mut self) -> (&mut RawTable<(u64, u64)>, &mut Self::NextType) { (&mut self.map, &mut self.next) } + #[tracing::instrument(level = "trace", skip(self))] /// Get a reference to the hash map. fn get_map(&self) -> &RawTable<(u64, u64)> { &self.map } + #[tracing::instrument(level = "trace", skip(self))] /// Get a reference to the next. fn get_list(&self) -> &Self::NextType { &self.next @@ -372,6 +378,7 @@ impl JoinHashMapType for JoinHashMap { } impl fmt::Debug for JoinHashMap { + #[tracing::instrument(level = "trace", skip(self, _f))] fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { Ok(()) } @@ -382,6 +389,7 @@ pub type JoinOn = Vec<(PhysicalExprRef, PhysicalExprRef)>; /// Reference for JoinOn. pub type JoinOnRef<'a> = &'a [(PhysicalExprRef, PhysicalExprRef)]; +#[tracing::instrument(level = "trace", skip(left, right, on))] /// Checks whether the schemas "left" and "right" and columns "on" represent a valid join. /// They are valid whenever their columns' intersection equals the set `on` pub fn check_join_is_valid(left: &Schema, right: &Schema, on: JoinOnRef) -> Result<()> { @@ -401,6 +409,7 @@ pub fn check_join_is_valid(left: &Schema, right: &Schema, on: JoinOnRef) -> Resu check_join_set_is_valid(&left, &right, on) } +#[tracing::instrument(level = "trace", skip(left, right, on))] /// Checks whether the sets left, right and on compose a valid join. /// They are valid whenever their intersection equals the set `on` fn check_join_set_is_valid( @@ -429,6 +438,7 @@ fn check_join_set_is_valid( Ok(()) } +#[tracing::instrument(level = "trace", skip(join_type, left_partitioning, right_partitioning, left_columns_len))] /// Calculate the OutputPartitioning for Partitioned Join pub fn partitioned_join_output_partitioning( join_type: JoinType, @@ -450,6 +460,7 @@ pub fn partitioned_join_output_partitioning( } } +#[tracing::instrument(level = "trace", skip(right_partitioning, left_columns_len))] /// Adjust the right out partitioning to new Column Index pub fn adjust_right_output_partitioning( right_partitioning: &Partitioning, @@ -467,6 +478,7 @@ pub fn adjust_right_output_partitioning( } } +#[tracing::instrument(level = "trace", skip(on_columns, right_ordering))] /// Replaces the right column (first index in the `on_column` tuple) with /// the left column (zeroth index in the tuple) inside `right_ordering`. fn replace_on_columns_of_right_ordering( @@ -492,6 +504,7 @@ fn replace_on_columns_of_right_ordering( Ok(()) } +#[tracing::instrument(level = "trace", skip(ordering, join_type, offset))] fn offset_ordering( ordering: LexOrderingRef, join_type: &JoinType, @@ -511,6 +524,7 @@ fn offset_ordering( } } +#[tracing::instrument(level = "trace", skip(left_ordering, right_ordering, join_type, on_columns, left_columns_len, maintains_input_order, probe_side))] /// Calculate the output ordering of a given join operation. pub fn calculate_join_output_ordering( left_ordering: LexOrderingRef, @@ -583,6 +597,7 @@ pub struct JoinFilter { } impl JoinFilter { + #[tracing::instrument(level = "trace", skip(expression, column_indices, schema))] /// Creates new JoinFilter pub fn new( expression: Arc, @@ -596,6 +611,7 @@ impl JoinFilter { } } + #[tracing::instrument(level = "trace", skip(left_indices, right_indices))] /// Helper for building ColumnIndex vector from left and right indices pub fn build_column_indices( left_indices: Vec, @@ -614,22 +630,26 @@ impl JoinFilter { .collect() } + #[tracing::instrument(level = "trace", skip(self))] /// Filter expression pub fn expression(&self) -> &Arc { &self.expression } + #[tracing::instrument(level = "trace", skip(self))] /// Column indices for intermediate batch creation pub fn column_indices(&self) -> &[ColumnIndex] { &self.column_indices } + #[tracing::instrument(level = "trace", skip(self))] /// Intermediate batch schema pub fn schema(&self) -> &Schema { &self.schema } } +#[tracing::instrument(level = "trace", skip(old_field, join_type, is_left))] /// Returns the output field given the input field. Outer joins may /// insert nulls even if the input was not null /// @@ -652,6 +672,7 @@ fn output_join_field(old_field: &Field, join_type: &JoinType, is_left: bool) -> } } +#[tracing::instrument(level = "trace", skip(left, right, join_type))] /// Creates a schema for a join operation. /// The fields from the left side are first pub fn build_join_schema( @@ -738,6 +759,7 @@ pub(crate) struct OnceAsync { } impl Default for OnceAsync { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self { fut: Mutex::new(None), @@ -746,12 +768,14 @@ impl Default for OnceAsync { } impl std::fmt::Debug for OnceAsync { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "OnceAsync") } } impl OnceAsync { + #[tracing::instrument(level = "trace", skip(self, f))] /// If this is the first call to this function on this object, will invoke /// `f` to obtain a future and return a [`OnceFut`] referring to this /// @@ -780,6 +804,7 @@ pub(crate) struct OnceFut { } impl Clone for OnceFut { + #[tracing::instrument(level = "trace", skip(self))] fn clone(&self) -> Self { Self { state: self.state.clone(), @@ -795,6 +820,7 @@ struct PartialJoinStatistics { pub column_statistics: Vec, } +#[tracing::instrument(level = "trace", skip(left, right, on, join_type, schema))] /// Estimate the statistics for the given join's output. pub(crate) fn estimate_join_statistics( left: Arc, @@ -819,6 +845,7 @@ pub(crate) fn estimate_join_statistics( } // Estimate the cardinality for the given join with input statistics. +#[tracing::instrument(level = "trace", skip(join_type, left_stats, right_stats, on))] fn estimate_join_cardinality( join_type: &JoinType, left_stats: Statistics, @@ -923,6 +950,7 @@ fn estimate_join_cardinality( } } +#[tracing::instrument(level = "trace", skip(left_stats, right_stats))] /// Estimate the inner join cardinality by using the basic building blocks of /// column-level statistics and the total row count. This is a very naive and /// a very conservative implementation that can quickly give up if there is not @@ -984,6 +1012,7 @@ fn estimate_inner_join_cardinality( } } +#[tracing::instrument(level = "trace", skip(left_stats, right_stats))] /// Estimates if inputs are non-overlapping, using input statistics. /// If inputs are disjoint, returns zero estimation, otherwise returns None fn estimate_disjoint_inputs( @@ -1036,6 +1065,7 @@ fn estimate_disjoint_inputs( None } +#[tracing::instrument(level = "trace", skip(num_rows, stats))] /// Estimate the number of maximum distinct values that can be present in the /// given column from its statistics. If distinct_count is available, uses it /// directly. Otherwise, if the column is numeric and has min/max values, it @@ -1105,6 +1135,7 @@ enum OnceFutState { } impl Clone for OnceFutState { + #[tracing::instrument(level = "trace", skip(self))] fn clone(&self) -> Self { match self { Self::Pending(p) => Self::Pending(p.clone()), @@ -1114,6 +1145,7 @@ impl Clone for OnceFutState { } impl OnceFut { + #[tracing::instrument(level = "trace", skip(fut))] /// Create a new [`OnceFut`] from a [`Future`] pub(crate) fn new(fut: Fut) -> Self where @@ -1128,6 +1160,7 @@ impl OnceFut { } } + #[tracing::instrument(level = "trace", skip(self, cx))] /// Get the result of the computation if it is ready, without consuming it pub(crate) fn get(&mut self, cx: &mut Context<'_>) -> Poll> { if let OnceFutState::Pending(fut) = &mut self.state { @@ -1146,6 +1179,7 @@ impl OnceFut { } } + #[tracing::instrument(level = "trace", skip(self, cx))] /// Get shared reference to the result of the computation if it is ready, without consuming it pub(crate) fn get_shared(&mut self, cx: &mut Context<'_>) -> Poll>> { if let OnceFutState::Pending(fut) = &mut self.state { @@ -1163,6 +1197,7 @@ impl OnceFut { } } +#[tracing::instrument(level = "trace", skip(join_type))] /// Some type `join_type` of join need to maintain the matched indices bit map for the left side, and /// use the bit map to generate the part of result of the join. /// @@ -1175,6 +1210,7 @@ pub(crate) fn need_produce_result_in_final(join_type: JoinType) -> bool { ) } +#[tracing::instrument(level = "trace", skip(left_bit_map, join_type))] /// In the end of join execution, need to use bit map of the matched /// indices to generate the final left and right indices. /// @@ -1208,6 +1244,7 @@ pub(crate) fn get_final_indices_from_bit_map( (left_indices, right_indices) } +#[tracing::instrument(level = "trace", skip(build_input_buffer, probe_batch, build_indices, probe_indices, filter, build_side))] pub(crate) fn apply_join_filter_to_indices( build_input_buffer: &RecordBatch, probe_batch: &RecordBatch, @@ -1243,6 +1280,7 @@ pub(crate) fn apply_join_filter_to_indices( )) } +#[tracing::instrument(level = "trace", skip(schema, build_input_buffer, probe_batch, build_indices, probe_indices, column_indices, build_side))] /// Returns a new [RecordBatch] by combining the `left` and `right` according to `indices`. /// The resulting batch has [Schema] `schema`. pub(crate) fn build_batch_from_indices( @@ -1297,6 +1335,7 @@ pub(crate) fn build_batch_from_indices( Ok(RecordBatch::try_new(Arc::new(schema.clone()), columns)?) } +#[tracing::instrument(level = "trace", skip(left_indices, right_indices, adjust_range, join_type))] /// The input is the matched indices for left and right and /// adjust the indices according to the join type pub(crate) fn adjust_indices_by_join_type( @@ -1346,6 +1385,7 @@ pub(crate) fn adjust_indices_by_join_type( } } +#[tracing::instrument(level = "trace", skip(left_indices, right_indices, right_unmatched_indices))] /// Appends the `right_unmatched_indices` to the `right_indices`, /// and fills Null to tail of `left_indices` to /// keep the length of `right_indices` and `left_indices` consistent. @@ -1373,6 +1413,7 @@ pub(crate) fn append_right_indices( } } +#[tracing::instrument(level = "trace", skip(range, input_indices))] /// Returns `range` indices which are not present in `input_indices` pub(crate) fn get_anti_indices( range: Range, @@ -1402,6 +1443,7 @@ where .collect::>() } +#[tracing::instrument(level = "trace", skip(range, input_indices))] /// Returns intersection of `range` and `input_indices` omitting duplicates pub(crate) fn get_semi_indices( range: Range, @@ -1455,6 +1497,7 @@ pub(crate) struct BuildProbeJoinMetrics { } impl BuildProbeJoinMetrics { + #[tracing::instrument(level = "trace", skip(partition, metrics))] pub fn new(partition: usize, metrics: &ExecutionPlanMetricsSet) -> Self { let join_time = MetricBuilder::new(metrics).subset_time("join_time", partition); @@ -1573,6 +1616,7 @@ mod tests { use datafusion_common::stats::Precision::{Absent, Exact, Inexact}; use datafusion_common::{arrow_datafusion_err, arrow_err, ScalarValue}; + #[tracing::instrument(level = "trace", skip(left, right, on))] fn check( left: &[Column], right: &[Column], @@ -1624,6 +1668,7 @@ mod tests { impl Future for TestFut { type Output = ArrowResult<()>; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -1739,6 +1784,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(num_rows, column_stats, is_exact))] fn create_stats( num_rows: Option, column_stats: Vec, @@ -1756,6 +1802,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(min, max, distinct_count, null_count))] fn create_column_stats( min: Precision, max: Precision, diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs index 6a73b944792bc..b30aaa7051921 100644 --- a/datafusion/physical-plan/src/lib.rs +++ b/datafusion/physical-plan/src/lib.rs @@ -470,36 +470,44 @@ pub trait ExecutionPlanProperties { } impl ExecutionPlanProperties for Arc { + #[tracing::instrument(level = "trace", skip(self))] fn output_partitioning(&self) -> &Partitioning { self.properties().output_partitioning() } + #[tracing::instrument(level = "trace", skip(self))] fn execution_mode(&self) -> ExecutionMode { self.properties().execution_mode() } + #[tracing::instrument(level = "trace", skip(self))] fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { self.properties().output_ordering() } + #[tracing::instrument(level = "trace", skip(self))] fn equivalence_properties(&self) -> &EquivalenceProperties { self.properties().equivalence_properties() } } impl ExecutionPlanProperties for &dyn ExecutionPlan { + #[tracing::instrument(level = "trace", skip(self))] fn output_partitioning(&self) -> &Partitioning { self.properties().output_partitioning() } + #[tracing::instrument(level = "trace", skip(self))] fn execution_mode(&self) -> ExecutionMode { self.properties().execution_mode() } + #[tracing::instrument(level = "trace", skip(self))] fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { self.properties().output_ordering() } + #[tracing::instrument(level = "trace", skip(self))] fn equivalence_properties(&self) -> &EquivalenceProperties { self.properties().equivalence_properties() } @@ -527,11 +535,13 @@ pub enum ExecutionMode { } impl ExecutionMode { + #[tracing::instrument(level = "trace", skip(self))] /// Check whether the execution mode is unbounded or not. pub fn is_unbounded(&self) -> bool { matches!(self, ExecutionMode::Unbounded) } + #[tracing::instrument(level = "trace", skip(self))] /// Check whether the execution is pipeline friendly. If so, operator can /// execute safely. pub fn pipeline_friendly(&self) -> bool { @@ -539,6 +549,7 @@ impl ExecutionMode { } } +#[tracing::instrument(level = "trace", skip(children))] /// Conservatively "combines" execution modes of a given collection of operators. fn execution_mode_from_children<'a>( children: impl IntoIterator>, @@ -583,6 +594,7 @@ pub struct PlanProperties { } impl PlanProperties { + #[tracing::instrument(level = "trace", skip(eq_properties, partitioning, execution_mode))] /// Construct a new `PlanPropertiesCache` from the pub fn new( eq_properties: EquivalenceProperties, @@ -599,18 +611,21 @@ impl PlanProperties { } } + #[tracing::instrument(level = "trace", skip(self, partitioning))] /// Overwrite output partitioning with its new value. pub fn with_partitioning(mut self, partitioning: Partitioning) -> Self { self.partitioning = partitioning; self } + #[tracing::instrument(level = "trace", skip(self, execution_mode))] /// Overwrite the execution Mode with its new value. pub fn with_execution_mode(mut self, execution_mode: ExecutionMode) -> Self { self.execution_mode = execution_mode; self } + #[tracing::instrument(level = "trace", skip(self, eq_properties))] /// Overwrite equivalence properties with its new value. pub fn with_eq_properties(mut self, eq_properties: EquivalenceProperties) -> Self { // Changing equivalence properties also changes output ordering, so @@ -620,28 +635,34 @@ impl PlanProperties { self } + #[tracing::instrument(level = "trace", skip(self))] pub fn equivalence_properties(&self) -> &EquivalenceProperties { &self.eq_properties } + #[tracing::instrument(level = "trace", skip(self))] pub fn output_partitioning(&self) -> &Partitioning { &self.partitioning } + #[tracing::instrument(level = "trace", skip(self))] pub fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { self.output_ordering.as_deref() } + #[tracing::instrument(level = "trace", skip(self))] pub fn execution_mode(&self) -> ExecutionMode { self.execution_mode } + #[tracing::instrument(level = "trace", skip(self))] /// Get schema of the node. fn schema(&self) -> &SchemaRef { self.eq_properties.schema() } } +#[tracing::instrument(level = "trace", skip(plan))] /// Indicate whether a data exchange is needed for the input of `plan`, which will be very helpful /// especially for the distributed engine to judge whether need to deal with shuffling. /// Currently there are 3 kinds of execution plan which needs data exchange @@ -670,6 +691,7 @@ pub fn need_data_exchange(plan: Arc) -> bool { } } +#[tracing::instrument(level = "trace", skip(plan, children))] /// Returns a copy of this plan if we change any child according to the pointer comparison. /// The size of `children` must be equal to the size of `ExecutionPlan::children()`. pub fn with_new_children_if_necessary( @@ -691,6 +713,7 @@ pub fn with_new_children_if_necessary( } } +#[tracing::instrument(level = "trace", skip(plan))] /// Return a [wrapper](DisplayableExecutionPlan) around an /// [`ExecutionPlan`] which can be displayed in various easier to /// understand ways. @@ -698,6 +721,7 @@ pub fn displayable(plan: &dyn ExecutionPlan) -> DisplayableExecutionPlan<'_> { DisplayableExecutionPlan::new(plan) } +#[tracing::instrument(level = "trace", skip(plan, context))] /// Execute the [ExecutionPlan] and collect the results in memory pub async fn collect( plan: Arc, @@ -735,6 +759,7 @@ pub fn execute_stream( } } +#[tracing::instrument(level = "trace", skip(plan, context))] /// Execute the [ExecutionPlan] and collect the results in memory pub async fn collect_partitioned( plan: Arc, @@ -775,6 +800,7 @@ pub async fn collect_partitioned( Ok(batches) } +#[tracing::instrument(level = "trace", skip(plan, context))] /// Execute the [ExecutionPlan] and return a vec with one stream per output /// partition /// @@ -794,6 +820,7 @@ pub fn execute_stream_partitioned( Ok(streams) } +#[tracing::instrument(level = "trace", skip(plan))] /// Utility function yielding a string representation of the given [`ExecutionPlan`]. pub fn get_plan_string(plan: &Arc) -> Vec { let formatted = displayable(plan.as_ref()).indent(true).to_string(); @@ -821,12 +848,14 @@ mod tests { pub struct EmptyExec; impl EmptyExec { + #[tracing::instrument(level = "trace", skip(_schema))] pub fn new(_schema: SchemaRef) -> Self { Self } } impl DisplayAs for EmptyExec { + #[tracing::instrument(level = "trace", skip(self, _t, _f))] fn fmt_as( &self, _t: DisplayFormatType, @@ -837,18 +866,22 @@ mod tests { } impl ExecutionPlan for EmptyExec { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -856,6 +889,7 @@ mod tests { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, _partition, _context))] fn execute( &self, _partition: usize, @@ -864,6 +898,7 @@ mod tests { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { unimplemented!() } @@ -873,12 +908,14 @@ mod tests { pub struct RenamedEmptyExec; impl RenamedEmptyExec { + #[tracing::instrument(level = "trace", skip(_schema))] pub fn new(_schema: SchemaRef) -> Self { Self } } impl DisplayAs for RenamedEmptyExec { + #[tracing::instrument(level = "trace", skip(self, _t, _f))] fn fmt_as( &self, _t: DisplayFormatType, @@ -889,6 +926,7 @@ mod tests { } impl ExecutionPlan for RenamedEmptyExec { + #[tracing::instrument(level = "trace", skip())] fn static_name() -> &'static str where Self: Sized, @@ -896,18 +934,22 @@ mod tests { "MyRenamedEmptyExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -915,6 +957,7 @@ mod tests { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, _partition, _context))] fn execute( &self, _partition: usize, @@ -923,6 +966,7 @@ mod tests { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { unimplemented!() } diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs index 4f8ff4c5606ee..12134a0c11a67 100644 --- a/datafusion/physical-plan/src/limit.rs +++ b/datafusion/physical-plan/src/limit.rs @@ -54,6 +54,7 @@ pub struct GlobalLimitExec { } impl GlobalLimitExec { + #[tracing::instrument(level = "trace", skip(input, skip, fetch))] /// Create a new GlobalLimitExec pub fn new(input: Arc, skip: usize, fetch: Option) -> Self { let cache = Self::compute_properties(&input); @@ -66,21 +67,25 @@ impl GlobalLimitExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// Input execution plan pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(self))] /// Number of rows to skip before fetch pub fn skip(&self) -> usize { self.skip } + #[tracing::instrument(level = "trace", skip(self))] /// Maximum number of rows to fetch pub fn fetch(&self) -> Option { self.fetch } + #[tracing::instrument(level = "trace", skip(input))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(input: &Arc) -> PlanProperties { PlanProperties::new( @@ -92,6 +97,7 @@ impl GlobalLimitExec { } impl DisplayAs for GlobalLimitExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -111,35 +117,43 @@ impl DisplayAs for GlobalLimitExec { } impl ExecutionPlan for GlobalLimitExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "GlobalLimitExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { vec![Distribution::SinglePartition] } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { vec![true] } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -151,6 +165,7 @@ impl ExecutionPlan for GlobalLimitExec { ))) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -180,10 +195,12 @@ impl ExecutionPlan for GlobalLimitExec { ))) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { let input_stats = self.input.statistics()?; let skip = self.skip; @@ -275,6 +292,7 @@ pub struct LocalLimitExec { } impl LocalLimitExec { + #[tracing::instrument(level = "trace", skip(input, fetch))] /// Create a new LocalLimitExec partition pub fn new(input: Arc, fetch: usize) -> Self { let cache = Self::compute_properties(&input); @@ -286,16 +304,19 @@ impl LocalLimitExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// Input execution plan pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(self))] /// Maximum number of rows to fetch pub fn fetch(&self) -> usize { self.fetch } + #[tracing::instrument(level = "trace", skip(input))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(input: &Arc) -> PlanProperties { PlanProperties::new( @@ -307,6 +328,7 @@ impl LocalLimitExec { } impl DisplayAs for LocalLimitExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -321,31 +343,38 @@ impl DisplayAs for LocalLimitExec { } impl ExecutionPlan for LocalLimitExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "LocalLimitExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false] } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { vec![true] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -359,6 +388,7 @@ impl ExecutionPlan for LocalLimitExec { } } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -375,10 +405,12 @@ impl ExecutionPlan for LocalLimitExec { ))) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { let input_stats = self.input.statistics()?; let col_stats = Statistics::unknown_column(&self.schema()); @@ -446,6 +478,7 @@ pub struct LimitStream { } impl LimitStream { + #[tracing::instrument(level = "trace", skip(input, skip, fetch, baseline_metrics))] pub fn new( input: SendableRecordBatchStream, skip: usize, @@ -462,6 +495,7 @@ impl LimitStream { } } + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_and_skip( &mut self, cx: &mut Context<'_>, @@ -495,6 +529,7 @@ impl LimitStream { } } + #[tracing::instrument(level = "trace", skip(self, batch))] /// fetches from the batch fn stream_limit(&mut self, batch: RecordBatch) -> Option { // records time on drop @@ -522,6 +557,7 @@ impl LimitStream { impl Stream for LimitStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -549,6 +585,7 @@ impl Stream for LimitStream { } impl RecordBatchStream for LimitStream { + #[tracing::instrument(level = "trace", skip(self))] /// Get the schema fn schema(&self) -> SchemaRef { self.schema.clone() @@ -685,6 +722,7 @@ mod tests { } // test cases for "skip" + #[tracing::instrument(level = "trace", skip(skip, fetch))] async fn skip_and_fetch(skip: usize, fetch: Option) -> Result { let task_ctx = Arc::new(TaskContext::default()); @@ -826,6 +864,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(skip, fetch))] async fn row_number_statistics_for_global_limit( skip: usize, fetch: Option, @@ -841,6 +880,7 @@ mod tests { Ok(offset.statistics()?.num_rows) } + #[tracing::instrument(level = "trace", skip(input_schema, columns))] pub fn build_group_by( input_schema: &SchemaRef, columns: Vec, @@ -852,6 +892,7 @@ mod tests { PhysicalGroupBy::new_single(group_by_expr.clone()) } + #[tracing::instrument(level = "trace", skip(skip, fetch))] async fn row_number_inexact_statistics_for_global_limit( skip: usize, fetch: Option, @@ -881,6 +922,7 @@ mod tests { Ok(offset.statistics()?.num_rows) } + #[tracing::instrument(level = "trace", skip(num_partitions, fetch))] async fn row_number_statistics_for_local_limit( num_partitions: usize, fetch: usize, @@ -894,6 +936,7 @@ mod tests { Ok(offset.statistics()?.num_rows) } + #[tracing::instrument(level = "trace", skip(sz))] /// Return a RecordBatch with a single array with row_count sz fn make_batch_no_column(sz: usize) -> RecordBatch { let schema = Arc::new(Schema::empty()); diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs index 883cdb540a9e5..7d4c49711abb9 100644 --- a/datafusion/physical-plan/src/memory.rs +++ b/datafusion/physical-plan/src/memory.rs @@ -55,6 +55,7 @@ pub struct MemoryExec { } impl fmt::Debug for MemoryExec { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "partitions: [...]")?; write!(f, "schema: {:?}", self.projected_schema)?; @@ -67,6 +68,7 @@ impl fmt::Debug for MemoryExec { } impl DisplayAs for MemoryExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -103,24 +105,29 @@ impl DisplayAs for MemoryExec { } impl ExecutionPlan for MemoryExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "MemoryExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { // this is a leaf node and has no children vec![] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -133,6 +140,7 @@ impl ExecutionPlan for MemoryExec { } } + #[tracing::instrument(level = "trace", skip(self, partition, _context))] fn execute( &self, partition: usize, @@ -145,6 +153,7 @@ impl ExecutionPlan for MemoryExec { )?)) } + #[tracing::instrument(level = "trace", skip(self))] /// We recompute the statistics dynamically from the arrow metadata as it is pretty cheap to do so fn statistics(&self) -> Result { Ok(common::compute_record_batch_statistics( @@ -156,6 +165,7 @@ impl ExecutionPlan for MemoryExec { } impl MemoryExec { + #[tracing::instrument(level = "trace", skip(partitions, schema, projection))] /// Create a new execution plan for reading in-memory record batches /// The provided `schema` should not have the projection applied. pub fn try_new( @@ -176,20 +186,24 @@ impl MemoryExec { }) } + #[tracing::instrument(level = "trace", skip(self, show_sizes))] /// set `show_sizes` to determine whether to display partition sizes pub fn with_show_sizes(mut self, show_sizes: bool) -> Self { self.show_sizes = show_sizes; self } + #[tracing::instrument(level = "trace", skip(self))] pub fn partitions(&self) -> &[Vec] { &self.partitions } + #[tracing::instrument(level = "trace", skip(self))] pub fn projection(&self) -> &Option> { &self.projection } + #[tracing::instrument(level = "trace", skip(self, sort_information))] /// A memory table can be ordered by multiple expressions simultaneously. /// [`EquivalenceProperties`] keeps track of expressions that describe the /// global ordering of the schema. These columns are not necessarily same; e.g. @@ -218,10 +232,12 @@ impl MemoryExec { self } + #[tracing::instrument(level = "trace", skip(self))] pub fn original_schema(&self) -> SchemaRef { self.schema.clone() } + #[tracing::instrument(level = "trace", skip(schema, orderings, partitions))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( schema: SchemaRef, @@ -252,6 +268,7 @@ pub struct MemoryStream { } impl MemoryStream { + #[tracing::instrument(level = "trace", skip(data, schema, projection))] /// Create an iterator for a vector of record batches pub fn try_new( data: Vec, @@ -267,6 +284,7 @@ impl MemoryStream { }) } + #[tracing::instrument(level = "trace", skip(self, reservation))] /// Set the memory reservation for the data pub(super) fn with_reservation(mut self, reservation: MemoryReservation) -> Self { self.reservation = Some(reservation); @@ -277,6 +295,7 @@ impl MemoryStream { impl Stream for MemoryStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self))] fn poll_next( mut self: std::pin::Pin<&mut Self>, _: &mut Context<'_>, @@ -297,12 +316,14 @@ impl Stream for MemoryStream { }) } + #[tracing::instrument(level = "trace", skip(self))] fn size_hint(&self) -> (usize, Option) { (self.data.len(), Some(self.data.len())) } } impl RecordBatchStream for MemoryStream { + #[tracing::instrument(level = "trace", skip(self))] /// Get the schema fn schema(&self) -> SchemaRef { self.schema.clone() diff --git a/datafusion/physical-plan/src/metrics/baseline.rs b/datafusion/physical-plan/src/metrics/baseline.rs index dc345cd8cdcd6..82045bdc5e032 100644 --- a/datafusion/physical-plan/src/metrics/baseline.rs +++ b/datafusion/physical-plan/src/metrics/baseline.rs @@ -56,6 +56,7 @@ pub struct BaselineMetrics { } impl BaselineMetrics { + #[tracing::instrument(level = "trace", skip(metrics, partition))] /// Create a new BaselineMetric structure, and set `start_time` to now pub fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self { let start_time = MetricBuilder::new(metrics).start_timestamp(partition); @@ -68,6 +69,7 @@ impl BaselineMetrics { } } + #[tracing::instrument(level = "trace", skip(self))] /// Returns a [`BaselineMetrics`] that updates the same `elapsed_compute` ignoring /// all other metrics /// @@ -81,16 +83,19 @@ impl BaselineMetrics { } } + #[tracing::instrument(level = "trace", skip(self))] /// return the metric for cpu time spend in this operator pub fn elapsed_compute(&self) -> &Time { &self.elapsed_compute } + #[tracing::instrument(level = "trace", skip(self))] /// return the metric for the total number of output rows produced pub fn output_rows(&self) -> &Count { &self.output_rows } + #[tracing::instrument(level = "trace", skip(self))] /// Records the fact that this operator's execution is complete /// (recording the `end_time` metric). /// @@ -102,6 +107,7 @@ impl BaselineMetrics { self.end_time.record() } + #[tracing::instrument(level = "trace", skip(self, num_rows))] /// Record that some number of rows have been produced as output /// /// See the [`RecordOutput`] for conveniently recording record @@ -110,6 +116,7 @@ impl BaselineMetrics { self.output_rows.add(num_rows); } + #[tracing::instrument(level = "trace", skip(self))] /// If not previously recorded `done()`, record pub fn try_done(&self) { if self.end_time.value().is_none() { @@ -117,6 +124,7 @@ impl BaselineMetrics { } } + #[tracing::instrument(level = "trace", skip(self, poll))] /// Process a poll result of a stream producing output for an /// operator, recording the output rows and stream done time and /// returning the same poll result @@ -138,6 +146,7 @@ impl BaselineMetrics { } impl Drop for BaselineMetrics { + #[tracing::instrument(level = "trace", skip(self))] fn drop(&mut self) { self.try_done() } @@ -153,6 +162,7 @@ pub trait RecordOutput { } impl RecordOutput for usize { + #[tracing::instrument(level = "trace", skip(self, bm))] fn record_output(self, bm: &BaselineMetrics) -> Self { bm.record_output(self); self @@ -160,6 +170,7 @@ impl RecordOutput for usize { } impl RecordOutput for RecordBatch { + #[tracing::instrument(level = "trace", skip(self, bm))] fn record_output(self, bm: &BaselineMetrics) -> Self { bm.record_output(self.num_rows()); self @@ -167,6 +178,7 @@ impl RecordOutput for RecordBatch { } impl RecordOutput for &RecordBatch { + #[tracing::instrument(level = "trace", skip(self, bm))] fn record_output(self, bm: &BaselineMetrics) -> Self { bm.record_output(self.num_rows()); self @@ -174,6 +186,7 @@ impl RecordOutput for &RecordBatch { } impl RecordOutput for Option<&RecordBatch> { + #[tracing::instrument(level = "trace", skip(self, bm))] fn record_output(self, bm: &BaselineMetrics) -> Self { if let Some(record_batch) = &self { record_batch.record_output(bm); @@ -183,6 +196,7 @@ impl RecordOutput for Option<&RecordBatch> { } impl RecordOutput for Option { + #[tracing::instrument(level = "trace", skip(self, bm))] fn record_output(self, bm: &BaselineMetrics) -> Self { if let Some(record_batch) = &self { record_batch.record_output(bm); @@ -192,6 +206,7 @@ impl RecordOutput for Option { } impl RecordOutput for Result { + #[tracing::instrument(level = "trace", skip(self, bm))] fn record_output(self, bm: &BaselineMetrics) -> Self { if let Ok(record_batch) = &self { record_batch.record_output(bm); diff --git a/datafusion/physical-plan/src/metrics/builder.rs b/datafusion/physical-plan/src/metrics/builder.rs index 2037ddb70c2d0..9ba3febe55ec6 100644 --- a/datafusion/physical-plan/src/metrics/builder.rs +++ b/datafusion/physical-plan/src/metrics/builder.rs @@ -55,6 +55,7 @@ pub struct MetricBuilder<'a> { } impl<'a> MetricBuilder<'a> { + #[tracing::instrument(level = "trace", skip(metrics))] /// Create a new `MetricBuilder` that will register the result of `build()` with the `metrics` pub fn new(metrics: &'a ExecutionPlanMetricsSet) -> Self { Self { @@ -64,12 +65,14 @@ impl<'a> MetricBuilder<'a> { } } + #[tracing::instrument(level = "trace", skip(self, label))] /// Add a label to the metric being constructed pub fn with_label(mut self, label: Label) -> Self { self.labels.push(label); self } + #[tracing::instrument(level = "trace", skip(self, name, value))] /// Add a label to the metric being constructed pub fn with_new_label( self, @@ -79,12 +82,14 @@ impl<'a> MetricBuilder<'a> { self.with_label(Label::new(name.into(), value.into())) } + #[tracing::instrument(level = "trace", skip(self, partition))] /// Set the partition of the metric being constructed pub fn with_partition(mut self, partition: usize) -> Self { self.partition = Some(partition); self } + #[tracing::instrument(level = "trace", skip(self, value))] /// Consume self and create a metric of the specified value /// registered with the MetricsSet pub fn build(self, value: MetricValue) { @@ -97,6 +102,7 @@ impl<'a> MetricBuilder<'a> { metrics.register(metric); } + #[tracing::instrument(level = "trace", skip(self, partition))] /// Consume self and create a new counter for recording output rows pub fn output_rows(self, partition: usize) -> Count { let count = Count::new(); @@ -105,6 +111,7 @@ impl<'a> MetricBuilder<'a> { count } + #[tracing::instrument(level = "trace", skip(self, partition))] /// Consume self and create a new counter for recording the number of spills /// triggered by an operator pub fn spill_count(self, partition: usize) -> Count { @@ -114,6 +121,7 @@ impl<'a> MetricBuilder<'a> { count } + #[tracing::instrument(level = "trace", skip(self, partition))] /// Consume self and create a new counter for recording the total spilled bytes /// triggered by an operator pub fn spilled_bytes(self, partition: usize) -> Count { @@ -123,6 +131,7 @@ impl<'a> MetricBuilder<'a> { count } + #[tracing::instrument(level = "trace", skip(self, partition))] /// Consume self and create a new counter for recording the total spilled rows /// triggered by an operator pub fn spilled_rows(self, partition: usize) -> Count { @@ -132,6 +141,7 @@ impl<'a> MetricBuilder<'a> { count } + #[tracing::instrument(level = "trace", skip(self, partition))] /// Consume self and create a new gauge for reporting current memory usage pub fn mem_used(self, partition: usize) -> Gauge { let gauge = Gauge::new(); @@ -140,6 +150,7 @@ impl<'a> MetricBuilder<'a> { gauge } + #[tracing::instrument(level = "trace", skip(self, counter_name, partition))] /// Consumes self and creates a new [`Count`] for recording some /// arbitrary metric of an operator. pub fn counter( @@ -150,6 +161,7 @@ impl<'a> MetricBuilder<'a> { self.with_partition(partition).global_counter(counter_name) } + #[tracing::instrument(level = "trace", skip(self, gauge_name, partition))] /// Consumes self and creates a new [`Gauge`] for reporting some /// arbitrary metric of an operator. pub fn gauge( @@ -160,6 +172,7 @@ impl<'a> MetricBuilder<'a> { self.with_partition(partition).global_gauge(gauge_name) } + #[tracing::instrument(level = "trace", skip(self, counter_name))] /// Consumes self and creates a new [`Count`] for recording a /// metric of an overall operator (not per partition) pub fn global_counter(self, counter_name: impl Into>) -> Count { @@ -171,6 +184,7 @@ impl<'a> MetricBuilder<'a> { count } + #[tracing::instrument(level = "trace", skip(self, gauge_name))] /// Consumes self and creates a new [`Gauge`] for reporting a /// metric of an overall operator (not per partition) pub fn global_gauge(self, gauge_name: impl Into>) -> Gauge { @@ -182,6 +196,7 @@ impl<'a> MetricBuilder<'a> { gauge } + #[tracing::instrument(level = "trace", skip(self, partition))] /// Consume self and create a new Timer for recording the elapsed /// CPU time spent by an operator pub fn elapsed_compute(self, partition: usize) -> Time { @@ -191,6 +206,7 @@ impl<'a> MetricBuilder<'a> { time } + #[tracing::instrument(level = "trace", skip(self, subset_name, partition))] /// Consumes self and creates a new Timer for recording some /// subset of an operators execution time. pub fn subset_time( @@ -206,6 +222,7 @@ impl<'a> MetricBuilder<'a> { time } + #[tracing::instrument(level = "trace", skip(self, partition))] /// Consumes self and creates a new Timestamp for recording the /// starting time of execution for a partition pub fn start_timestamp(self, partition: usize) -> Timestamp { @@ -215,6 +232,7 @@ impl<'a> MetricBuilder<'a> { timestamp } + #[tracing::instrument(level = "trace", skip(self, partition))] /// Consumes self and creates a new Timestamp for recording the /// ending time of execution for a partition pub fn end_timestamp(self, partition: usize) -> Timestamp { diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs index 9232865aa09cc..69c64da0dbd6b 100644 --- a/datafusion/physical-plan/src/metrics/mod.rs +++ b/datafusion/physical-plan/src/metrics/mod.rs @@ -79,6 +79,7 @@ pub struct Metric { } impl Display for Metric { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{}", self.value.name())?; @@ -113,6 +114,7 @@ impl Display for Metric { } impl Metric { + #[tracing::instrument(level = "trace", skip(value, partition))] /// Create a new [`Metric`]. Consider using [`MetricBuilder`] /// rather than this function directly. pub fn new(value: MetricValue, partition: Option) -> Self { @@ -123,6 +125,7 @@ impl Metric { } } + #[tracing::instrument(level = "trace", skip(value, partition, labels))] /// Create a new [`Metric`]. Consider using [`MetricBuilder`] /// rather than this function directly. pub fn new_with_labels( @@ -137,27 +140,32 @@ impl Metric { } } + #[tracing::instrument(level = "trace", skip(self, label))] /// Add a new label to this metric pub fn with_label(mut self, label: Label) -> Self { self.labels.push(label); self } + #[tracing::instrument(level = "trace", skip(self))] /// What labels are present for this metric? pub fn labels(&self) -> &[Label] { &self.labels } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to the value of this metric pub fn value(&self) -> &MetricValue { &self.value } + #[tracing::instrument(level = "trace", skip(self))] /// Return a mutable reference to the value of this metric pub fn value_mut(&mut self) -> &mut MetricValue { &mut self.value } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to the partition pub fn partition(&self) -> Option { self.partition @@ -173,21 +181,25 @@ pub struct MetricsSet { } impl MetricsSet { + #[tracing::instrument(level = "trace", skip())] /// Create a new container of metrics pub fn new() -> Self { Default::default() } + #[tracing::instrument(level = "trace", skip(self, metric))] /// Add the specified metric pub fn push(&mut self, metric: Arc) { self.metrics.push(metric) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns an iterator across all metrics pub fn iter(&self) -> impl Iterator> { self.metrics.iter() } + #[tracing::instrument(level = "trace", skip(self))] /// Convenience: return the number of rows produced, aggregated /// across partitions or `None` if no metric is present pub fn output_rows(&self) -> Option { @@ -195,6 +207,7 @@ impl MetricsSet { .map(|v| v.as_usize()) } + #[tracing::instrument(level = "trace", skip(self))] /// Convenience: return the count of spills, aggregated /// across partitions or `None` if no metric is present pub fn spill_count(&self) -> Option { @@ -202,6 +215,7 @@ impl MetricsSet { .map(|v| v.as_usize()) } + #[tracing::instrument(level = "trace", skip(self))] /// Convenience: return the total byte size of spills, aggregated /// across partitions or `None` if no metric is present pub fn spilled_bytes(&self) -> Option { @@ -209,6 +223,7 @@ impl MetricsSet { .map(|v| v.as_usize()) } + #[tracing::instrument(level = "trace", skip(self))] /// Convenience: return the total rows of spills, aggregated /// across partitions or `None` if no metric is present pub fn spilled_rows(&self) -> Option { @@ -216,6 +231,7 @@ impl MetricsSet { .map(|v| v.as_usize()) } + #[tracing::instrument(level = "trace", skip(self))] /// Convenience: return the amount of elapsed CPU time spent, /// aggregated across partitions or `None` if no metric is present pub fn elapsed_compute(&self) -> Option { @@ -223,6 +239,7 @@ impl MetricsSet { .map(|v| v.as_usize()) } + #[tracing::instrument(level = "trace", skip(self, f))] /// Sums the values for metrics for which `f(metric)` returns /// `true`, and returns the value. Returns `None` if no metrics match /// the predicate. @@ -248,6 +265,7 @@ impl MetricsSet { Some(accum) } + #[tracing::instrument(level = "trace", skip(self, metric_name))] /// Returns the sum of all the metrics with the specified name /// in the returned set. pub fn sum_by_name(&self, metric_name: &str) -> Option { @@ -266,6 +284,7 @@ impl MetricsSet { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns a new derived `MetricsSet` where all metrics /// that had the same name have been /// aggregated together. The resulting `MetricsSet` has all @@ -299,6 +318,7 @@ impl MetricsSet { } } + #[tracing::instrument(level = "trace", skip(self))] /// Sort the order of metrics so the "most useful" show up first pub fn sorted_for_display(mut self) -> Self { self.metrics @@ -306,6 +326,7 @@ impl MetricsSet { self } + #[tracing::instrument(level = "trace", skip(self))] /// Remove all timestamp metrics (for more compact display) pub fn timestamps_removed(self) -> Self { let Self { metrics } = self; @@ -320,6 +341,7 @@ impl MetricsSet { } impl Display for MetricsSet { + #[tracing::instrument(level = "trace", skip(self, f))] /// Format the [`MetricsSet`] as a single string fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let mut is_first = true; @@ -353,6 +375,7 @@ pub struct ExecutionPlanMetricsSet { } impl ExecutionPlanMetricsSet { + #[tracing::instrument(level = "trace", skip())] /// Create a new empty shared metrics set pub fn new() -> Self { Self { @@ -360,11 +383,13 @@ impl ExecutionPlanMetricsSet { } } + #[tracing::instrument(level = "trace", skip(self, metric))] /// Add the specified metric to the underlying metric set pub fn register(&self, metric: Arc) { self.inner.lock().push(metric) } + #[tracing::instrument(level = "trace", skip(self))] /// Return a clone of the inner [`MetricsSet`] pub fn clone_inner(&self) -> MetricsSet { let guard = self.inner.lock(); @@ -392,6 +417,7 @@ pub struct Label { } impl Label { + #[tracing::instrument(level = "trace", skip(name, value))] /// Create a new [`Label`] pub fn new( name: impl Into>, @@ -402,11 +428,13 @@ impl Label { Self { name, value } } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the name of this label pub fn name(&self) -> &str { self.name.as_ref() } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the value of this label pub fn value(&self) -> &str { self.value.as_ref() @@ -414,6 +442,7 @@ impl Label { } impl Display for Label { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{}={}", self.name, self.value) } @@ -670,6 +699,7 @@ mod tests { MetricBuilder::new(&metrics).output_rows(0); let metrics = metrics.clone_inner(); + #[tracing::instrument(level = "trace", skip(metrics))] fn metric_names(metrics: &MetricsSet) -> String { let n = metrics.iter().map(|m| m.value().name()).collect::>(); n.join(", ") diff --git a/datafusion/physical-plan/src/metrics/value.rs b/datafusion/physical-plan/src/metrics/value.rs index 22db8f1e4e886..1f628dc1cb16c 100644 --- a/datafusion/physical-plan/src/metrics/value.rs +++ b/datafusion/physical-plan/src/metrics/value.rs @@ -41,24 +41,28 @@ pub struct Count { } impl PartialEq for Count { + #[tracing::instrument(level = "trace", skip(self, other))] fn eq(&self, other: &Self) -> bool { self.value().eq(&other.value()) } } impl Display for Count { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{}", self.value()) } } impl Default for Count { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl Count { + #[tracing::instrument(level = "trace", skip())] /// create a new counter pub fn new() -> Self { Self { @@ -66,6 +70,7 @@ impl Count { } } + #[tracing::instrument(level = "trace", skip(self, n))] /// Add `n` to the metric's value pub fn add(&self, n: usize) { // relaxed ordering for operations on `value` poses no issues @@ -73,6 +78,7 @@ impl Count { self.value.fetch_add(n, Ordering::Relaxed); } + #[tracing::instrument(level = "trace", skip(self))] /// Get the current value pub fn value(&self) -> usize { self.value.load(Ordering::Relaxed) @@ -90,24 +96,28 @@ pub struct Gauge { } impl PartialEq for Gauge { + #[tracing::instrument(level = "trace", skip(self, other))] fn eq(&self, other: &Self) -> bool { self.value().eq(&other.value()) } } impl Display for Gauge { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{}", self.value()) } } impl Default for Gauge { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl Gauge { + #[tracing::instrument(level = "trace", skip())] /// create a new gauge pub fn new() -> Self { Self { @@ -115,6 +125,7 @@ impl Gauge { } } + #[tracing::instrument(level = "trace", skip(self, n))] /// Add `n` to the metric's value pub fn add(&self, n: usize) { // relaxed ordering for operations on `value` poses no issues @@ -122,6 +133,7 @@ impl Gauge { self.value.fetch_add(n, Ordering::Relaxed); } + #[tracing::instrument(level = "trace", skip(self, n))] /// Sub `n` from the metric's value pub fn sub(&self, n: usize) { // relaxed ordering for operations on `value` poses no issues @@ -129,11 +141,13 @@ impl Gauge { self.value.fetch_sub(n, Ordering::Relaxed); } + #[tracing::instrument(level = "trace", skip(self, n))] /// Set metric's value to maximum of `n` and current value pub fn set_max(&self, n: usize) { self.value.fetch_max(n, Ordering::Relaxed); } + #[tracing::instrument(level = "trace", skip(self, n))] /// Set the metric's value to `n` and return the previous value pub fn set(&self, n: usize) -> usize { // relaxed ordering for operations on `value` poses no issues @@ -141,6 +155,7 @@ impl Gauge { self.value.swap(n, Ordering::Relaxed) } + #[tracing::instrument(level = "trace", skip(self))] /// Get the current value pub fn value(&self) -> usize { self.value.load(Ordering::Relaxed) @@ -155,18 +170,21 @@ pub struct Time { } impl Default for Time { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl PartialEq for Time { + #[tracing::instrument(level = "trace", skip(self, other))] fn eq(&self, other: &Self) -> bool { self.value().eq(&other.value()) } } impl Display for Time { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let duration = std::time::Duration::from_nanos(self.value() as u64); write!(f, "{duration:?}") @@ -174,6 +192,7 @@ impl Display for Time { } impl Time { + #[tracing::instrument(level = "trace", skip())] /// Create a new [`Time`] wrapper suitable for recording elapsed /// times for operations. pub fn new() -> Self { @@ -182,11 +201,13 @@ impl Time { } } + #[tracing::instrument(level = "trace", skip(self, start))] /// Add elapsed nanoseconds since `start`to self pub fn add_elapsed(&self, start: Instant) { self.add_duration(start.elapsed()); } + #[tracing::instrument(level = "trace", skip(self, duration))] /// Add duration of time to self /// /// Note: this will always increment the recorded time by at least 1 nanosecond @@ -202,11 +223,13 @@ impl Time { self.nanos.fetch_add(more_nanos.max(1), Ordering::Relaxed); } + #[tracing::instrument(level = "trace", skip(self, other))] /// Add the number of nanoseconds of other `Time` to self pub fn add(&self, other: &Time) { self.add_duration(Duration::from_nanos(other.value() as u64)) } + #[tracing::instrument(level = "trace", skip(self))] /// return a scoped guard that adds the amount of time elapsed /// between its creation and its drop or call to `stop` to the /// underlying metric. @@ -217,6 +240,7 @@ impl Time { } } + #[tracing::instrument(level = "trace", skip(self))] /// Get the number of nanoseconds record by this Time metric pub fn value(&self) -> usize { self.nanos.load(Ordering::Relaxed) @@ -232,12 +256,14 @@ pub struct Timestamp { } impl Default for Timestamp { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl Timestamp { + #[tracing::instrument(level = "trace", skip())] /// Create a new timestamp and sets its value to 0 pub fn new() -> Self { Self { @@ -245,16 +271,19 @@ impl Timestamp { } } + #[tracing::instrument(level = "trace", skip(self))] /// Sets the timestamps value to the current time pub fn record(&self) { self.set(Utc::now()) } + #[tracing::instrument(level = "trace", skip(self, now))] /// Sets the timestamps value to a specified time pub fn set(&self, now: DateTime) { *self.timestamp.lock() = Some(now); } + #[tracing::instrument(level = "trace", skip(self))] /// return the timestamps value at the last time `record()` was /// called. /// @@ -263,6 +292,7 @@ impl Timestamp { *self.timestamp.lock() } + #[tracing::instrument(level = "trace", skip(self, other))] /// sets the value of this timestamp to the minimum of this and other pub fn update_to_min(&self, other: &Timestamp) { let min = match (self.value(), other.value()) { @@ -275,6 +305,7 @@ impl Timestamp { *self.timestamp.lock() = min; } + #[tracing::instrument(level = "trace", skip(self, other))] /// sets the value of this timestamp to the maximum of this and other pub fn update_to_max(&self, other: &Timestamp) { let max = match (self.value(), other.value()) { @@ -289,12 +320,14 @@ impl Timestamp { } impl PartialEq for Timestamp { + #[tracing::instrument(level = "trace", skip(self, other))] fn eq(&self, other: &Self) -> bool { self.value().eq(&other.value()) } } impl Display for Timestamp { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self.value() { None => write!(f, "NONE"), @@ -314,6 +347,7 @@ pub struct ScopedTimerGuard<'a> { } impl<'a> ScopedTimerGuard<'a> { + #[tracing::instrument(level = "trace", skip(self))] /// Stop the timer timing and record the time taken pub fn stop(&mut self) { if let Some(start) = self.start.take() { @@ -321,11 +355,13 @@ impl<'a> ScopedTimerGuard<'a> { } } + #[tracing::instrument(level = "trace", skip(self))] /// Restarts the timer recording from the current time pub fn restart(&mut self) { self.start = Some(Instant::now()) } + #[tracing::instrument(level = "trace", skip(self))] /// Stop the timer, record the time taken and consume self pub fn done(mut self) { self.stop() @@ -333,6 +369,7 @@ impl<'a> ScopedTimerGuard<'a> { } impl<'a> Drop for ScopedTimerGuard<'a> { + #[tracing::instrument(level = "trace", skip(self))] fn drop(&mut self) { self.stop() } @@ -403,6 +440,7 @@ pub enum MetricValue { } impl MetricValue { + #[tracing::instrument(level = "trace", skip(self))] /// Return the name of this SQL metric pub fn name(&self) -> &str { match self { @@ -420,6 +458,7 @@ impl MetricValue { } } + #[tracing::instrument(level = "trace", skip(self))] /// Return the value of the metric as a usize value pub fn as_usize(&self) -> usize { match self { @@ -445,6 +484,7 @@ impl MetricValue { } } + #[tracing::instrument(level = "trace", skip(self))] /// create a new MetricValue with the same type as `self` suitable /// for accumulating pub fn new_empty(&self) -> Self { @@ -472,6 +512,7 @@ impl MetricValue { } } + #[tracing::instrument(level = "trace", skip(self, other))] /// Aggregates the value of other to `self`. panic's if the types /// are mismatched or aggregating does not make sense for this /// value @@ -524,6 +565,7 @@ impl MetricValue { } } + #[tracing::instrument(level = "trace", skip(self))] /// Returns a number by which to sort metrics by display. Lower /// numbers are "more useful" (and displayed first) pub fn display_sort_key(&self) -> u8 { @@ -542,6 +584,7 @@ impl MetricValue { } } + #[tracing::instrument(level = "trace", skip(self))] /// returns true if this metric has a timestamp value pub fn is_timestamp(&self) -> bool { matches!(self, Self::StartTimestamp(_) | Self::EndTimestamp(_)) @@ -549,6 +592,7 @@ impl MetricValue { } impl Display for MetricValue { + #[tracing::instrument(level = "trace", skip(self, f))] /// Prints the value of this metric fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs index c94c2b0607d77..4d8e4bf626f39 100644 --- a/datafusion/physical-plan/src/placeholder_row.rs +++ b/datafusion/physical-plan/src/placeholder_row.rs @@ -47,6 +47,7 @@ pub struct PlaceholderRowExec { } impl PlaceholderRowExec { + #[tracing::instrument(level = "trace", skip(schema))] /// Create a new PlaceholderRowExec pub fn new(schema: SchemaRef) -> Self { let partitions = 1; @@ -58,6 +59,7 @@ impl PlaceholderRowExec { } } + #[tracing::instrument(level = "trace", skip(self, partitions))] /// Create a new PlaceholderRowExecPlaceholderRowExec with specified partition number pub fn with_partitions(mut self, partitions: usize) -> Self { self.partitions = partitions; @@ -67,6 +69,7 @@ impl PlaceholderRowExec { self } + #[tracing::instrument(level = "trace", skip(self))] fn data(&self) -> Result> { Ok({ let n_field = self.schema.fields.len(); @@ -90,10 +93,12 @@ impl PlaceholderRowExec { }) } + #[tracing::instrument(level = "trace", skip(n_partitions))] fn output_partitioning_helper(n_partitions: usize) -> Partitioning { Partitioning::UnknownPartitioning(n_partitions) } + #[tracing::instrument(level = "trace", skip(schema, n_partitions))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef, n_partitions: usize) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -105,6 +110,7 @@ impl PlaceholderRowExec { } impl DisplayAs for PlaceholderRowExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -119,23 +125,28 @@ impl DisplayAs for PlaceholderRowExec { } impl ExecutionPlan for PlaceholderRowExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "PlaceholderRowExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -143,6 +154,7 @@ impl ExecutionPlan for PlaceholderRowExec { Ok(self) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -165,6 +177,7 @@ impl ExecutionPlan for PlaceholderRowExec { )?)) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { let batch = self .data() diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index f72815c01a9e6..fe6759299082f 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -63,6 +63,7 @@ pub struct ProjectionExec { } impl ProjectionExec { + #[tracing::instrument(level = "trace", skip(expr, input))] /// Create a projection on an input pub fn try_new( expr: Vec<(Arc, String)>, @@ -104,16 +105,19 @@ impl ProjectionExec { }) } + #[tracing::instrument(level = "trace", skip(self))] /// The projection expressions stored as tuples of (expression, output column name) pub fn expr(&self) -> &[(Arc, String)] { &self.expr } + #[tracing::instrument(level = "trace", skip(self))] /// The input plan pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(input, projection_mapping, schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( input: &Arc, @@ -153,6 +157,7 @@ impl ProjectionExec { } impl DisplayAs for ProjectionExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -180,28 +185,34 @@ impl DisplayAs for ProjectionExec { } impl ExecutionPlan for ProjectionExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "ProjectionExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { // tell optimizer this operator doesn't reorder its input vec![true] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, mut children: Vec>, @@ -210,6 +221,7 @@ impl ExecutionPlan for ProjectionExec { .map(|p| Arc::new(p) as _) } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { let all_simple_exprs = self .expr @@ -220,6 +232,7 @@ impl ExecutionPlan for ProjectionExec { vec![!all_simple_exprs] } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -234,10 +247,12 @@ impl ExecutionPlan for ProjectionExec { })) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { Ok(stats_projection( self.input.statistics()?, @@ -247,6 +262,7 @@ impl ExecutionPlan for ProjectionExec { } } +#[tracing::instrument(level = "trace", skip(e, input_schema))] /// If e is a direct column reference, returns the field level /// metadata for that field, if any. Otherwise returns None fn get_field_metadata( @@ -261,6 +277,7 @@ fn get_field_metadata( .cloned() } +#[tracing::instrument(level = "trace", skip(stats, exprs, schema))] fn stats_projection( mut stats: Statistics, exprs: impl Iterator>, @@ -296,6 +313,7 @@ fn stats_projection( } impl ProjectionStream { + #[tracing::instrument(level = "trace", skip(self, batch))] fn batch_project(&self, batch: &RecordBatch) -> Result { // records time on drop let _timer = self.baseline_metrics.elapsed_compute().timer(); @@ -330,6 +348,7 @@ struct ProjectionStream { impl Stream for ProjectionStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -342,6 +361,7 @@ impl Stream for ProjectionStream { self.baseline_metrics.record_poll(poll) } + #[tracing::instrument(level = "trace", skip(self))] fn size_hint(&self) -> (usize, Option) { // same number of record batches self.input.size_hint() @@ -349,6 +369,7 @@ impl Stream for ProjectionStream { } impl RecordBatchStream for ProjectionStream { + #[tracing::instrument(level = "trace", skip(self))] /// Get the schema fn schema(&self) -> SchemaRef { self.schema.clone() @@ -380,6 +401,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] fn get_stats() -> Statistics { Statistics { num_rows: Precision::Exact(5), @@ -407,6 +429,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip())] fn get_schema() -> Schema { let field_0 = Field::new("col0", DataType::Int64, false); let field_1 = Field::new("col1", DataType::Utf8, false); diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs index ed897d78f0c8a..d78f7d578df0f 100644 --- a/datafusion/physical-plan/src/recursive_query.rs +++ b/datafusion/physical-plan/src/recursive_query.rs @@ -72,6 +72,7 @@ pub struct RecursiveQueryExec { } impl RecursiveQueryExec { + #[tracing::instrument(level = "trace", skip(name, static_term, recursive_term, is_distinct))] /// Create a new RecursiveQueryExec pub fn try_new( name: String, @@ -95,6 +96,7 @@ impl RecursiveQueryExec { }) } + #[tracing::instrument(level = "trace", skip(schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -108,32 +110,39 @@ impl RecursiveQueryExec { } impl ExecutionPlan for RecursiveQueryExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "RecursiveQueryExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.static_term.clone(), self.recursive_term.clone()] } // TODO: control these hints and see whether we can // infer some from the child plans (static/recurisve terms). + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { vec![false, false] } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false, false] } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { vec![ datafusion_physical_expr::Distribution::SinglePartition, @@ -141,6 +150,7 @@ impl ExecutionPlan for RecursiveQueryExec { ] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -154,6 +164,7 @@ impl ExecutionPlan for RecursiveQueryExec { .map(|e| Arc::new(e) as _) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -178,16 +189,19 @@ impl ExecutionPlan for RecursiveQueryExec { ))) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { Ok(Statistics::new_unknown(&self.schema())) } } impl DisplayAs for RecursiveQueryExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -248,6 +262,7 @@ struct RecursiveQueryStream { } impl RecursiveQueryStream { + #[tracing::instrument(level = "trace", skip(task_context, work_table, recursive_term, static_stream, baseline_metrics))] /// Create a new recursive query stream fn new( task_context: Arc, @@ -272,6 +287,7 @@ impl RecursiveQueryStream { } } + #[tracing::instrument(level = "trace", skip(self, batch))] /// Push a clone of the given batch to the in memory buffer, and then return /// a poll with it. fn push_batch( @@ -286,6 +302,7 @@ impl RecursiveQueryStream { Poll::Ready(Some(Ok(batch))) } + #[tracing::instrument(level = "trace", skip(self, cx))] /// Start polling for the next iteration, will be called either after the static term /// is completed or another term is completed. It will follow the algorithm above on /// to check whether the recursion has ended. @@ -320,6 +337,7 @@ impl RecursiveQueryStream { } } +#[tracing::instrument(level = "trace", skip(plan, work_table))] fn assign_work_table( plan: Arc, work_table: Arc, @@ -346,6 +364,7 @@ fn assign_work_table( .data() } +#[tracing::instrument(level = "trace", skip(plan))] /// Some plans will change their internal states after execution, making them unable to be executed again. /// This function uses `ExecutionPlan::with_new_children` to fork a new plan with initial states. /// @@ -368,6 +387,7 @@ fn reset_plan_states(plan: Arc) -> Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: std::pin::Pin<&mut Self>, cx: &mut Context<'_>, @@ -403,6 +423,7 @@ impl Stream for RecursiveQueryStream { } impl RecordBatchStream for RecursiveQueryStream { + #[tracing::instrument(level = "trace", skip(self))] /// Get the schema fn schema(&self) -> SchemaRef { self.schema.clone() diff --git a/datafusion/physical-plan/src/repartition/distributor_channels.rs b/datafusion/physical-plan/src/repartition/distributor_channels.rs index 675d26bbfb9fc..38abe7cc59411 100644 --- a/datafusion/physical-plan/src/repartition/distributor_channels.rs +++ b/datafusion/physical-plan/src/repartition/distributor_channels.rs @@ -51,6 +51,7 @@ use std::{ use parking_lot::Mutex; +#[tracing::instrument(level = "trace", skip(n))] /// Create `n` empty channels. pub fn channels( n: usize, @@ -82,6 +83,7 @@ pub fn channels( type PartitionAwareSenders = Vec>>; type PartitionAwareReceivers = Vec>>; +#[tracing::instrument(level = "trace", skip(n_in, n_out))] /// Create `n_out` empty channels for each of the `n_in` inputs. /// This way, each distinct partition will communicate via a dedicated channel. /// This SPSC structure enables us to track which partition input data comes from. @@ -99,12 +101,14 @@ pub fn partition_aware_channels( pub struct SendError(pub T); impl std::fmt::Debug for SendError { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_tuple("SendError").finish() } } impl std::fmt::Display for SendError { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "cannot send data, receiver is gone") } @@ -125,6 +129,7 @@ pub struct DistributionSender { } impl DistributionSender { + #[tracing::instrument(level = "trace", skip(self, element))] /// Send data. /// /// This fails if the [receiver](DistributionReceiver) is gone. @@ -138,6 +143,7 @@ impl DistributionSender { } impl Clone for DistributionSender { + #[tracing::instrument(level = "trace", skip(self))] fn clone(&self) -> Self { self.channel.n_senders.fetch_add(1, Ordering::SeqCst); @@ -149,6 +155,7 @@ impl Clone for DistributionSender { } impl Drop for DistributionSender { + #[tracing::instrument(level = "trace", skip(self))] fn drop(&mut self) { let n_senders_pre = self.channel.n_senders.fetch_sub(1, Ordering::SeqCst); // is the the last copy of the sender side? @@ -206,6 +213,7 @@ pub struct SendFuture<'a, T> { impl<'a, T> Future for SendFuture<'a, T> { type Output = Result<(), SendError>; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let this = &mut *self; assert!(this.element.is_some(), "polled ready future"); @@ -259,6 +267,7 @@ pub struct DistributionReceiver { } impl DistributionReceiver { + #[tracing::instrument(level = "trace", skip(self))] /// Receive data from channel. /// /// Returns `None` if the channel is empty and no [senders](DistributionSender) are left. @@ -272,6 +281,7 @@ impl DistributionReceiver { } impl Drop for DistributionReceiver { + #[tracing::instrument(level = "trace", skip(self))] fn drop(&mut self) { let mut guard_channel_state = self.channel.state.lock(); let data = guard_channel_state.data.take().expect("not dropped yet"); @@ -298,6 +308,7 @@ pub struct RecvFuture<'a, T> { impl<'a, T> Future for RecvFuture<'a, T> { type Output = Option; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let this = &mut *self; assert!(!this.rdy, "polled ready future"); @@ -368,6 +379,7 @@ struct Channel { } impl Channel { + #[tracing::instrument(level = "trace", skip(id))] /// Create new channel with one sender (so we don't need to [fetch-add](AtomicUsize::fetch_add) directly afterwards). fn new_with_one_sender(id: usize) -> Self { Channel { @@ -396,6 +408,7 @@ struct ChannelState { } impl ChannelState { + #[tracing::instrument(level = "trace", skip(self))] /// Get all [`recv_wakers`](Self::recv_wakers) and replace with identically-sized buffer. /// /// The wakers should be woken AFTER the lock to [this state](Self) was dropped. @@ -428,6 +441,7 @@ struct Gate { } impl Gate { + #[tracing::instrument(level = "trace", skip(self, id))] /// Wake senders for a specific channel. /// /// This is helpful to signal that the receiver side is gone and the senders shall now error. @@ -455,6 +469,7 @@ impl Gate { } } + #[tracing::instrument(level = "trace", skip(self))] fn decr_empty_channels(&self) { let old_count = self.empty_channels.fetch_sub(1, Ordering::SeqCst); @@ -796,6 +811,7 @@ mod tests { assert!(waker.woken()); } + #[tracing::instrument(level = "trace", skip(fut))] /// Poll a given [`Future`] and ensure it is [ready](Poll::Ready). #[track_caller] fn poll_ready(fut: &mut F) -> F::Output @@ -808,6 +824,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(fut))] /// Poll a given [`Future`] and ensure it is [pending](Poll::Pending). /// /// Returns a waker that can later be checked. @@ -823,6 +840,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(fut))] fn poll(fut: &mut F) -> (Poll, Arc) where F: Future + Unpin, @@ -841,6 +859,7 @@ mod tests { } impl TestWaker { + #[tracing::instrument(level = "trace", skip(self))] /// Was [`wake`](Waker::wake) called? fn woken(&self) -> bool { self.woken.load(Ordering::SeqCst) @@ -848,6 +867,7 @@ mod tests { } impl ArcWake for TestWaker { + #[tracing::instrument(level = "trace", skip(arc_self))] fn wake_by_ref(arc_self: &Arc) { arc_self.woken.store(true, Ordering::SeqCst); } diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index e31fdc6ee2c21..9fe16b1493559 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -79,6 +79,7 @@ struct RepartitionExecState { } impl RepartitionExecState { + #[tracing::instrument(level = "trace", skip(input, partitioning, metrics, preserve_order, name, context))] fn new( input: Arc, partitioning: Partitioning, @@ -193,6 +194,7 @@ enum BatchPartitionerState { } impl BatchPartitioner { + #[tracing::instrument(level = "trace", skip(partitioning, timer))] /// Create a new [`BatchPartitioner`] with the provided [`Partitioning`] /// /// The time spent repartitioning will be recorded to `timer` @@ -217,6 +219,7 @@ impl BatchPartitioner { Ok(Self { state, timer }) } + #[tracing::instrument(level = "trace", skip(self, batch, f))] /// Partition the provided [`RecordBatch`] into one or more partitioned [`RecordBatch`] /// based on the [`Partitioning`] specified on construction /// @@ -236,6 +239,7 @@ impl BatchPartitioner { }) } + #[tracing::instrument(level = "trace", skip(self, batch))] /// Actual implementation of [`partition`](Self::partition). /// /// The reason this was pulled out is that we need to have a variant of `partition` that works w/ sync functions, @@ -317,6 +321,7 @@ impl BatchPartitioner { } // return the number of output partitions + #[tracing::instrument(level = "trace", skip(self))] fn num_partitions(&self) -> usize { match self.state { BatchPartitionerState::RoundRobin { num_partitions, .. } => num_partitions, @@ -417,6 +422,7 @@ struct RepartitionMetrics { } impl RepartitionMetrics { + #[tracing::instrument(level = "trace", skip(input_partition, num_output_partitions, metrics))] pub fn new( input_partition: usize, num_output_partitions: usize, @@ -450,22 +456,26 @@ impl RepartitionMetrics { } impl RepartitionExec { + #[tracing::instrument(level = "trace", skip(self))] /// Input execution plan pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(self))] /// Partitioning scheme to use pub fn partitioning(&self) -> &Partitioning { &self.partitioning } + #[tracing::instrument(level = "trace", skip(self))] /// Get preserve_order flag of the RepartitionExecutor /// `true` means `SortPreservingRepartitionExec`, `false` means `RepartitionExec` pub fn preserve_order(&self) -> bool { self.preserve_order } + #[tracing::instrument(level = "trace", skip(self))] /// Get name used to display this Exec pub fn name(&self) -> &str { "RepartitionExec" @@ -473,6 +483,7 @@ impl RepartitionExec { } impl DisplayAs for RepartitionExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -506,23 +517,28 @@ impl DisplayAs for RepartitionExec { } impl ExecutionPlan for RepartitionExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "RepartitionExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, mut children: Vec>, @@ -535,14 +551,17 @@ impl ExecutionPlan for RepartitionExec { Ok(Arc::new(repartition)) } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![matches!(self.partitioning, Partitioning::Hash(_, _))] } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { Self::maintains_input_order_helper(self.input(), self.preserve_order) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -653,16 +672,19 @@ impl ExecutionPlan for RepartitionExec { Ok(Box::pin(stream)) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { self.input.statistics() } } impl RepartitionExec { + #[tracing::instrument(level = "trace", skip(input, partitioning))] /// Create a new RepartitionExec, that produces output `partitioning`, and /// does not preserve the order of the input (see [`Self::with_preserve_order`] /// for more details) @@ -683,6 +705,7 @@ impl RepartitionExec { }) } + #[tracing::instrument(level = "trace", skip(input, preserve_order))] fn maintains_input_order_helper( input: &Arc, preserve_order: bool, @@ -691,6 +714,7 @@ impl RepartitionExec { vec![preserve_order || input.output_partitioning().partition_count() <= 1] } + #[tracing::instrument(level = "trace", skip(input, preserve_order))] fn eq_properties_helper( input: &Arc, preserve_order: bool, @@ -704,6 +728,7 @@ impl RepartitionExec { eq_properties } + #[tracing::instrument(level = "trace", skip(input, partitioning, preserve_order))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( input: &Arc, @@ -720,6 +745,7 @@ impl RepartitionExec { ) } + #[tracing::instrument(level = "trace", skip(self))] /// Specify if this reparititoning operation should preserve the order of /// rows from its input when producing output. Preserving order is more /// expensive at runtime, so should only be set if the output of this @@ -739,6 +765,7 @@ impl RepartitionExec { self } + #[tracing::instrument(level = "trace", skip(self))] /// Return the sort expressions that are used to merge fn sort_exprs(&self) -> Option<&[PhysicalSortExpr]> { if self.preserve_order { @@ -748,6 +775,7 @@ impl RepartitionExec { } } + #[tracing::instrument(level = "trace", skip(input, partition, output_channels, partitioning, metrics, context))] /// Pulls data from the specified input plan, feeding it to the /// output partitions based on the desired partitioning /// @@ -830,6 +858,7 @@ impl RepartitionExec { Ok(()) } + #[tracing::instrument(level = "trace", skip(input_task, txs))] /// Waits for `input_task` which is consuming one of the inputs to /// complete. Upon each successful completion, sends a `None` to /// each of the output tx channels to signal one of the inputs is @@ -900,6 +929,7 @@ struct RepartitionStream { impl Stream for RepartitionStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -938,6 +968,7 @@ impl Stream for RepartitionStream { } impl RecordBatchStream for RepartitionStream { + #[tracing::instrument(level = "trace", skip(self))] /// Get the schema fn schema(&self) -> SchemaRef { self.schema.clone() @@ -964,6 +995,7 @@ struct PerPartitionStream { impl Stream for PerPartitionStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -988,6 +1020,7 @@ impl Stream for PerPartitionStream { } impl RecordBatchStream for PerPartitionStream { + #[tracing::instrument(level = "trace", skip(self))] /// Get the schema fn schema(&self) -> SchemaRef { self.schema.clone() @@ -1101,10 +1134,12 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] fn test_schema() -> Arc { Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)])) } + #[tracing::instrument(level = "trace", skip(schema, input_partitions, partitioning))] async fn repartition( schema: &SchemaRef, input_partitions: Vec>, @@ -1380,6 +1415,7 @@ mod tests { assert_eq!(batches_without_drop, batches_with_drop); } + #[tracing::instrument(level = "trace", skip(batches))] fn str_batches_to_vec(batches: &[RecordBatch]) -> Vec<&str> { batches .iter() @@ -1396,6 +1432,7 @@ mod tests { .collect::>() } + #[tracing::instrument(level = "trace", skip())] /// Create a BarrierExec that returns two partitions of two batches each fn make_barrier_exec() -> BarrierExec { let batch1 = RecordBatch::try_from_iter(vec![( @@ -1509,12 +1546,14 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(n))] /// Create vector batches fn create_vec_batches(n: usize) -> Vec { let batch = create_batch(); (0..n).map(|_| batch.clone()).collect() } + #[tracing::instrument(level = "trace", skip())] /// Create batch fn create_batch() -> RecordBatch { let schema = test_schema(); @@ -1624,10 +1663,12 @@ mod test { Ok(()) } + #[tracing::instrument(level = "trace", skip())] fn test_schema() -> Arc { Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)])) } + #[tracing::instrument(level = "trace", skip(schema))] fn sort_exprs(schema: &Schema) -> Vec { let options = SortOptions::default(); vec![PhysicalSortExpr { @@ -1636,10 +1677,12 @@ mod test { }] } + #[tracing::instrument(level = "trace", skip(schema))] fn memory_exec(schema: &SchemaRef) -> Arc { Arc::new(MemoryExec::try_new(&[vec![]], schema.clone(), None).unwrap()) } + #[tracing::instrument(level = "trace", skip(schema, sort_exprs))] fn sorted_memory_exec( schema: &SchemaRef, sort_exprs: Vec, diff --git a/datafusion/physical-plan/src/sorts/builder.rs b/datafusion/physical-plan/src/sorts/builder.rs index 3527d57382230..f66bdf601a102 100644 --- a/datafusion/physical-plan/src/sorts/builder.rs +++ b/datafusion/physical-plan/src/sorts/builder.rs @@ -50,6 +50,7 @@ pub struct BatchBuilder { } impl BatchBuilder { + #[tracing::instrument(level = "trace", skip(schema, stream_count, batch_size, reservation))] /// Create a new [`BatchBuilder`] with the provided `stream_count` and `batch_size` pub fn new( schema: SchemaRef, @@ -66,6 +67,7 @@ impl BatchBuilder { } } + #[tracing::instrument(level = "trace", skip(self, stream_idx, batch))] /// Append a new batch in `stream_idx` pub fn push_batch(&mut self, stream_idx: usize, batch: RecordBatch) -> Result<()> { self.reservation.try_grow(batch.get_array_memory_size())?; @@ -78,6 +80,7 @@ impl BatchBuilder { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, stream_idx))] /// Append the next row from `stream_idx` pub fn push_row(&mut self, stream_idx: usize) { let cursor = &mut self.cursors[stream_idx]; @@ -86,21 +89,25 @@ impl BatchBuilder { self.indices.push((cursor.batch_idx, row_idx)); } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the number of in-progress rows in this [`BatchBuilder`] pub fn len(&self) -> usize { self.indices.len() } + #[tracing::instrument(level = "trace", skip(self))] /// Returns `true` if this [`BatchBuilder`] contains no in-progress rows pub fn is_empty(&self) -> bool { self.indices.is_empty() } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the schema of this [`BatchBuilder`] pub fn schema(&self) -> &SchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self))] /// Drains the in_progress row indexes, and builds a new RecordBatch from them /// /// Will then drop any batches for which all rows have been yielded to the output diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs index df90c97faf68e..50b75eee62d23 100644 --- a/datafusion/physical-plan/src/sorts/cursor.rs +++ b/datafusion/physical-plan/src/sorts/cursor.rs @@ -79,16 +79,19 @@ pub struct Cursor { } impl Cursor { + #[tracing::instrument(level = "trace", skip(values))] /// Create a [`Cursor`] from the given [`CursorValues`] pub fn new(values: T) -> Self { Self { offset: 0, values } } + #[tracing::instrument(level = "trace", skip(self))] /// Returns true if there are no more rows in this cursor pub fn is_finished(&self) -> bool { self.offset == self.values.len() } + #[tracing::instrument(level = "trace", skip(self))] /// Advance the cursor, returning the previous row index pub fn advance(&mut self) -> usize { let t = self.offset; @@ -98,6 +101,7 @@ impl Cursor { } impl PartialEq for Cursor { + #[tracing::instrument(level = "trace", skip(self, other))] fn eq(&self, other: &Self) -> bool { T::eq(&self.values, self.offset, &other.values, other.offset) } @@ -106,12 +110,14 @@ impl PartialEq for Cursor { impl Eq for Cursor {} impl PartialOrd for Cursor { + #[tracing::instrument(level = "trace", skip(self, other))] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for Cursor { + #[tracing::instrument(level = "trace", skip(self, other))] fn cmp(&self, other: &Self) -> Ordering { T::compare(&self.values, self.offset, &other.values, other.offset) } @@ -131,6 +137,7 @@ pub struct RowValues { } impl RowValues { + #[tracing::instrument(level = "trace", skip(rows, reservation))] /// Create a new [`RowValues`] from `rows` and a `reservation` /// that tracks its memory. There must be at least one row /// @@ -148,14 +155,17 @@ impl RowValues { } impl CursorValues for RowValues { + #[tracing::instrument(level = "trace", skip(self))] fn len(&self) -> usize { self.rows.num_rows() } + #[tracing::instrument(level = "trace", skip(l, l_idx, r, r_idx))] fn eq(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> bool { l.rows.row(l_idx) == r.rows.row(r_idx) } + #[tracing::instrument(level = "trace", skip(l, l_idx, r, r_idx))] fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering { l.rows.row(l_idx).cmp(&r.rows.row(r_idx)) } @@ -171,6 +181,7 @@ pub trait CursorArray: Array + 'static { impl CursorArray for PrimitiveArray { type Values = PrimitiveValues; + #[tracing::instrument(level = "trace", skip(self))] fn values(&self) -> Self::Values { PrimitiveValues(self.values().clone()) } @@ -180,14 +191,17 @@ impl CursorArray for PrimitiveArray { pub struct PrimitiveValues(ScalarBuffer); impl CursorValues for PrimitiveValues { + #[tracing::instrument(level = "trace", skip(self))] fn len(&self) -> usize { self.0.len() } + #[tracing::instrument(level = "trace", skip(l, l_idx, r, r_idx))] fn eq(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> bool { l.0[l_idx].is_eq(r.0[r_idx]) } + #[tracing::instrument(level = "trace", skip(l, l_idx, r, r_idx))] fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering { l.0[l_idx].compare(r.0[r_idx]) } @@ -199,6 +213,7 @@ pub struct ByteArrayValues { } impl ByteArrayValues { + #[tracing::instrument(level = "trace", skip(self, idx))] fn value(&self, idx: usize) -> &[u8] { assert!(idx < self.len()); // Safety: offsets are valid and checked bounds above @@ -211,14 +226,17 @@ impl ByteArrayValues { } impl CursorValues for ByteArrayValues { + #[tracing::instrument(level = "trace", skip(self))] fn len(&self) -> usize { self.offsets.len() - 1 } + #[tracing::instrument(level = "trace", skip(l, l_idx, r, r_idx))] fn eq(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> bool { l.value(l_idx) == r.value(r_idx) } + #[tracing::instrument(level = "trace", skip(l, l_idx, r, r_idx))] fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering { l.value(l_idx).cmp(r.value(r_idx)) } @@ -227,6 +245,7 @@ impl CursorValues for ByteArrayValues { impl CursorArray for GenericByteArray { type Values = ByteArrayValues; + #[tracing::instrument(level = "trace", skip(self))] fn values(&self) -> Self::Values { ByteArrayValues { offsets: self.offsets().clone(), @@ -248,6 +267,7 @@ pub struct ArrayValues { } impl ArrayValues { + #[tracing::instrument(level = "trace", skip(options, array))] /// Create a new [`ArrayValues`] from the provided `values` sorted according /// to `options`. /// @@ -266,16 +286,19 @@ impl ArrayValues { } } + #[tracing::instrument(level = "trace", skip(self, idx))] fn is_null(&self, idx: usize) -> bool { (idx < self.null_threshold) == self.options.nulls_first } } impl CursorValues for ArrayValues { + #[tracing::instrument(level = "trace", skip(self))] fn len(&self) -> usize { self.values.len() } + #[tracing::instrument(level = "trace", skip(l, l_idx, r, r_idx))] fn eq(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> bool { match (l.is_null(l_idx), r.is_null(r_idx)) { (true, true) => true, @@ -284,6 +307,7 @@ impl CursorValues for ArrayValues { } } + #[tracing::instrument(level = "trace", skip(l, l_idx, r, r_idx))] fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering { match (l.is_null(l_idx), r.is_null(r_idx)) { (true, true) => Ordering::Equal, @@ -307,6 +331,7 @@ impl CursorValues for ArrayValues { mod tests { use super::*; + #[tracing::instrument(level = "trace", skip(options, values, null_count))] fn new_primitive( options: SortOptions, values: ScalarBuffer, diff --git a/datafusion/physical-plan/src/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs index 422ff3aebdb39..b739c83376464 100644 --- a/datafusion/physical-plan/src/sorts/merge.rs +++ b/datafusion/physical-plan/src/sorts/merge.rs @@ -99,6 +99,7 @@ pub(crate) struct SortPreservingMergeStream { } impl SortPreservingMergeStream { + #[tracing::instrument(level = "trace", skip(streams, schema, metrics, batch_size, fetch, reservation))] pub(crate) fn new( streams: CursorStream, schema: SchemaRef, @@ -123,6 +124,7 @@ impl SortPreservingMergeStream { } } + #[tracing::instrument(level = "trace", skip(self, cx, idx))] /// If the stream at the given index is not exhausted, and the last cursor for the /// stream is finished, poll the stream for the next RecordBatch and create a new /// cursor for the stream from the returned result @@ -146,6 +148,7 @@ impl SortPreservingMergeStream { } } + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next_inner( &mut self, cx: &mut Context<'_>, @@ -201,12 +204,14 @@ impl SortPreservingMergeStream { } } + #[tracing::instrument(level = "trace", skip(self))] fn fetch_reached(&mut self) -> bool { self.fetch .map(|fetch| self.produced + self.in_progress.len() >= fetch) .unwrap_or(false) } + #[tracing::instrument(level = "trace", skip(self, stream_idx))] fn advance(&mut self, stream_idx: usize) -> bool { let slot = &mut self.cursors[stream_idx]; match slot.as_mut() { @@ -221,6 +226,7 @@ impl SortPreservingMergeStream { } } + #[tracing::instrument(level = "trace", skip(self, a, b))] /// Returns `true` if the cursor at index `a` is greater than at index `b` #[inline] fn is_gt(&self, a: usize, b: usize) -> bool { @@ -231,6 +237,7 @@ impl SortPreservingMergeStream { } } + #[tracing::instrument(level = "trace", skip(self, cursor_index))] /// Find the leaf node index in the loser tree for the given cursor index /// /// Note that this is not necessarily a leaf node in the tree, but it can @@ -264,12 +271,14 @@ impl SortPreservingMergeStream { (self.cursors.len() + cursor_index) / 2 } + #[tracing::instrument(level = "trace", skip(self, node_idx))] /// Find the parent node index for the given node index #[inline] fn lt_parent_node_index(&self, node_idx: usize) -> usize { node_idx / 2 } + #[tracing::instrument(level = "trace", skip(self))] /// Attempts to initialize the loser tree with one value from each /// non exhausted input, if possible fn init_loser_tree(&mut self) { @@ -292,6 +301,7 @@ impl SortPreservingMergeStream { self.loser_tree_adjusted = true; } + #[tracing::instrument(level = "trace", skip(self))] /// Attempts to update the loser tree, following winner replacement, if possible fn update_loser_tree(&mut self) { let mut winner = self.loser_tree[0]; @@ -313,6 +323,7 @@ impl SortPreservingMergeStream { impl Stream for SortPreservingMergeStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -323,6 +334,7 @@ impl Stream for SortPreservingMergeStream { } impl RecordBatchStream for SortPreservingMergeStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.in_progress.schema().clone() } diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs index d24bc5a670e5c..050b3620e2b03 100644 --- a/datafusion/physical-plan/src/sorts/partial_sort.rs +++ b/datafusion/physical-plan/src/sorts/partial_sort.rs @@ -98,6 +98,7 @@ pub struct PartialSortExec { } impl PartialSortExec { + #[tracing::instrument(level = "trace", skip(expr, input, common_prefix_length))] /// Create a new partial sort execution plan pub fn new( expr: Vec, @@ -118,11 +119,13 @@ impl PartialSortExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// Whether this `PartialSortExec` preserves partitioning of the children pub fn preserve_partitioning(&self) -> bool { self.preserve_partitioning } + #[tracing::instrument(level = "trace", skip(self, preserve_partitioning))] /// Specify the partitioning behavior of this partial sort exec /// /// If `preserve_partitioning` is true, sorts each partition @@ -141,6 +144,7 @@ impl PartialSortExec { self } + #[tracing::instrument(level = "trace", skip(self, fetch))] /// Modify how many rows to include in the result /// /// If None, then all rows will be returned, in sorted order. @@ -153,21 +157,25 @@ impl PartialSortExec { self } + #[tracing::instrument(level = "trace", skip(self))] /// Input schema pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(self))] /// Sort expressions pub fn expr(&self) -> &[PhysicalSortExpr] { &self.expr } + #[tracing::instrument(level = "trace", skip(self))] /// If `Some(fetch)`, limits output to only the first "fetch" items pub fn fetch(&self) -> Option { self.fetch } + #[tracing::instrument(level = "trace", skip(input, preserve_partitioning))] fn output_partitioning_helper( input: &Arc, preserve_partitioning: bool, @@ -180,6 +188,7 @@ impl PartialSortExec { } } + #[tracing::instrument(level = "trace", skip(input, sort_exprs, preserve_partitioning))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( input: &Arc, @@ -205,6 +214,7 @@ impl PartialSortExec { } impl DisplayAs for PartialSortExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -226,18 +236,22 @@ impl DisplayAs for PartialSortExec { } impl ExecutionPlan for PartialSortExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "PartialSortExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { if self.preserve_partitioning { vec![Distribution::UnspecifiedDistribution] @@ -246,14 +260,17 @@ impl ExecutionPlan for PartialSortExec { } } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false] } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -269,6 +286,7 @@ impl ExecutionPlan for PartialSortExec { Ok(Arc::new(new_partial_sort)) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -298,10 +316,12 @@ impl ExecutionPlan for PartialSortExec { })) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics_set.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { self.input.statistics() } @@ -328,6 +348,7 @@ struct PartialSortStream { impl Stream for PartialSortStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -336,6 +357,7 @@ impl Stream for PartialSortStream { self.baseline_metrics.record_poll(poll) } + #[tracing::instrument(level = "trace", skip(self))] fn size_hint(&self) -> (usize, Option) { // we can't predict the size of incoming batches so re-use the size hint from the input self.input.size_hint() @@ -343,12 +365,14 @@ impl Stream for PartialSortStream { } impl RecordBatchStream for PartialSortStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.input.schema() } } impl PartialSortStream { + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next_inner( self: &mut Pin<&mut Self>, cx: &mut Context<'_>, @@ -383,6 +407,7 @@ impl PartialSortStream { Poll::Ready(Some(result)) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns a sorted RecordBatch from in_mem_batches and clears in_mem_batches /// /// If fetch is specified for PartialSortStream `sort_in_mem_batches` will limit @@ -403,6 +428,7 @@ impl PartialSortStream { Ok(result) } + #[tracing::instrument(level = "trace", skip(self, common_prefix_len, batch))] /// Return the end index of the second last partition if the batch /// can be partitioned based on its already sorted columns /// @@ -651,6 +677,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip())] fn prepare_partitioned_input() -> Arc { let batch1 = test::build_table_i32( ("a", &vec![1; 100]), diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index c684748bb29a9..d91caa63bf496 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -77,6 +77,7 @@ struct ExternalSorterMetrics { } impl ExternalSorterMetrics { + #[tracing::instrument(level = "trace", skip(metrics, partition))] fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self { Self { baseline: BaselineMetrics::new(metrics, partition), @@ -243,6 +244,7 @@ struct ExternalSorter { impl ExternalSorter { // TODO: make a builder or some other nicer API to avoid the // clippy warning + #[tracing::instrument(level = "trace", skip(partition_id, schema, expr, batch_size, fetch, sort_spill_reservation_bytes, sort_in_place_threshold_bytes, metrics, runtime))] #[allow(clippy::too_many_arguments)] pub fn new( partition_id: usize, @@ -281,6 +283,7 @@ impl ExternalSorter { } } + #[tracing::instrument(level = "trace", skip(self, input))] /// Appends an unsorted [`RecordBatch`] to `in_mem_batches` /// /// Updates memory usage metrics, and possibly triggers spilling to disk @@ -316,10 +319,12 @@ impl ExternalSorter { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] fn spilled_before(&self) -> bool { !self.spills.is_empty() } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the final sorted output of all batches inserted via /// [`Self::insert_batch`] as a stream of [`RecordBatch`]es. /// @@ -365,26 +370,31 @@ impl ExternalSorter { } } + #[tracing::instrument(level = "trace", skip(self))] /// How much memory is buffered in this `ExternalSorter`? fn used(&self) -> usize { self.reservation.size() } + #[tracing::instrument(level = "trace", skip(self))] /// How many bytes have been spilled to disk? fn spilled_bytes(&self) -> usize { self.metrics.spilled_bytes.value() } + #[tracing::instrument(level = "trace", skip(self))] /// How many rows have been spilled to disk? fn spilled_rows(&self) -> usize { self.metrics.spilled_rows.value() } + #[tracing::instrument(level = "trace", skip(self))] /// How many spill files have been created? fn spill_count(&self) -> usize { self.metrics.spill_count.value() } + #[tracing::instrument(level = "trace", skip(self))] /// Writes any `in_memory_batches` to a spill file and clears /// the batches. The contents of the spill file are sorted. /// @@ -411,6 +421,7 @@ impl ExternalSorter { Ok(used) } + #[tracing::instrument(level = "trace", skip(self))] /// Sorts the in_mem_batches in place async fn in_mem_sort(&mut self) -> Result<()> { if self.in_mem_batches_sorted { @@ -441,6 +452,7 @@ impl ExternalSorter { Ok(()) } + #[tracing::instrument(level = "trace", skip(self, metrics))] /// Consumes in_mem_batches returning a sorted stream of /// batches. This proceeds in one of two ways: /// @@ -541,6 +553,7 @@ impl ExternalSorter { ) } + #[tracing::instrument(level = "trace", skip(self, batch, metrics, reservation))] /// Sorts a single `RecordBatch` into a single stream. /// /// `reservation` accounts for the memory used by this batch and @@ -566,6 +579,7 @@ impl ExternalSorter { Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream))) } + #[tracing::instrument(level = "trace", skip(self))] /// If this sort may spill, pre-allocates /// `sort_spill_reservation_bytes` of memory to gurarantee memory /// left for the in memory sort/merge. @@ -583,6 +597,7 @@ impl ExternalSorter { } impl Debug for ExternalSorter { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut Formatter) -> fmt::Result { f.debug_struct("ExternalSorter") .field("memory_used", &self.used()) @@ -593,6 +608,7 @@ impl Debug for ExternalSorter { } } +#[tracing::instrument(level = "trace", skip(batch, expressions, fetch))] pub(crate) fn sort_batch( batch: &RecordBatch, expressions: &[PhysicalSortExpr], @@ -625,6 +641,7 @@ pub(crate) fn sort_batch( )?) } +#[tracing::instrument(level = "trace", skip(sort_columns))] #[inline] fn is_multi_column_with_lists(sort_columns: &[SortColumn]) -> bool { sort_columns.iter().any(|c| { @@ -635,6 +652,7 @@ fn is_multi_column_with_lists(sort_columns: &[SortColumn]) -> bool { }) } +#[tracing::instrument(level = "trace", skip(sort_columns, limit))] pub(crate) fn lexsort_to_indices_multi_columns( sort_columns: Vec, limit: Option, @@ -667,6 +685,7 @@ pub(crate) fn lexsort_to_indices_multi_columns( Ok(indices) } +#[tracing::instrument(level = "trace", skip(batches, path, schema))] /// Spills sorted `in_memory_batches` to disk. /// /// Returns number of the rows spilled to disk. @@ -683,6 +702,7 @@ async fn spill_sorted_batches( } } +#[tracing::instrument(level = "trace", skip(path, schema))] pub(crate) fn read_spill_as_stream( path: RefCountedTempFile, schema: SchemaRef, @@ -701,6 +721,7 @@ pub(crate) fn read_spill_as_stream( Ok(builder.build()) } +#[tracing::instrument(level = "trace", skip(batches, path, schema))] fn write_sorted( batches: Vec, path: PathBuf, @@ -720,6 +741,7 @@ fn write_sorted( Ok(writer.num_rows) } +#[tracing::instrument(level = "trace", skip(sender, path))] fn read_spill(sender: Sender>, path: &Path) -> Result<()> { let file = BufReader::new(File::open(path)?); let reader = FileReader::try_new(file, None)?; @@ -753,6 +775,7 @@ pub struct SortExec { } impl SortExec { + #[tracing::instrument(level = "trace", skip(expr, input))] /// Create a new sort execution plan that produces a single, /// sorted output partition. pub fn new(expr: Vec, input: Arc) -> Self { @@ -768,11 +791,13 @@ impl SortExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// Whether this `SortExec` preserves partitioning of the children pub fn preserve_partitioning(&self) -> bool { self.preserve_partitioning } + #[tracing::instrument(level = "trace", skip(self, preserve_partitioning))] /// Specify the partitioning behavior of this sort exec /// /// If `preserve_partitioning` is true, sorts each partition @@ -791,6 +816,7 @@ impl SortExec { self } + #[tracing::instrument(level = "trace", skip(self, fetch))] /// Modify how many rows to include in the result /// /// If None, then all rows will be returned, in sorted order. @@ -803,21 +829,25 @@ impl SortExec { self } + #[tracing::instrument(level = "trace", skip(self))] /// Input schema pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(self))] /// Sort expressions pub fn expr(&self) -> &[PhysicalSortExpr] { &self.expr } + #[tracing::instrument(level = "trace", skip(self))] /// If `Some(fetch)`, limits output to only the first "fetch" items pub fn fetch(&self) -> Option { self.fetch } + #[tracing::instrument(level = "trace", skip(input, preserve_partitioning))] fn output_partitioning_helper( input: &Arc, preserve_partitioning: bool, @@ -830,6 +860,7 @@ impl SortExec { } } + #[tracing::instrument(level = "trace", skip(input, sort_exprs, preserve_partitioning))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( input: &Arc, @@ -860,6 +891,7 @@ impl SortExec { } impl DisplayAs for SortExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -881,18 +913,22 @@ impl DisplayAs for SortExec { } impl ExecutionPlan for SortExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "SortExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { if self.preserve_partitioning { vec![Distribution::UnspecifiedDistribution] @@ -903,14 +939,17 @@ impl ExecutionPlan for SortExec { } } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -922,6 +961,7 @@ impl ExecutionPlan for SortExec { Ok(Arc::new(new_sort)) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -985,10 +1025,12 @@ impl ExecutionPlan for SortExec { } } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics_set.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { self.input.statistics() } diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs index 88c6c312b94be..9ce4ea06e74aa 100644 --- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs +++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs @@ -78,6 +78,7 @@ pub struct SortPreservingMergeExec { } impl SortPreservingMergeExec { + #[tracing::instrument(level = "trace", skip(expr, input))] /// Create a new sort execution plan pub fn new(expr: Vec, input: Arc) -> Self { let cache = Self::compute_properties(&input); @@ -89,27 +90,32 @@ impl SortPreservingMergeExec { cache, } } + #[tracing::instrument(level = "trace", skip(self, fetch))] /// Sets the number of rows to fetch pub fn with_fetch(mut self, fetch: Option) -> Self { self.fetch = fetch; self } + #[tracing::instrument(level = "trace", skip(self))] /// Input schema pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(self))] /// Sort expressions pub fn expr(&self) -> &[PhysicalSortExpr] { &self.expr } + #[tracing::instrument(level = "trace", skip(self))] /// Fetch pub fn fetch(&self) -> Option { self.fetch } + #[tracing::instrument(level = "trace", skip(input))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(input: &Arc) -> PlanProperties { PlanProperties::new( @@ -121,6 +127,7 @@ impl SortPreservingMergeExec { } impl DisplayAs for SortPreservingMergeExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -144,39 +151,48 @@ impl DisplayAs for SortPreservingMergeExec { } impl ExecutionPlan for SortPreservingMergeExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "SortPreservingMergeExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { vec![Distribution::UnspecifiedDistribution] } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false] } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_ordering(&self) -> Vec>> { vec![Some(PhysicalSortRequirement::from_sort_exprs(&self.expr))] } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { vec![true] } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -187,6 +203,7 @@ impl ExecutionPlan for SortPreservingMergeExec { )) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -250,10 +267,12 @@ impl ExecutionPlan for SortPreservingMergeExec { } } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { self.input.statistics() } @@ -510,6 +529,7 @@ mod tests { .await; } + #[tracing::instrument(level = "trace", skip(partitions, exp, context))] async fn _test_merge( partitions: &[Vec], exp: &[&str], @@ -533,6 +553,7 @@ mod tests { assert_batches_eq!(exp, collected.as_slice()); } + #[tracing::instrument(level = "trace", skip(input, sort, context))] async fn sorted_merge( input: Arc, sort: Vec, @@ -544,6 +565,7 @@ mod tests { result.remove(0) } + #[tracing::instrument(level = "trace", skip(input, sort, context))] async fn partition_sort( input: Arc, sort: Vec, @@ -554,6 +576,7 @@ mod tests { sorted_merge(sort_exec, sort, context).await } + #[tracing::instrument(level = "trace", skip(src, sort, context))] async fn basic_sort( src: Arc, sort: Vec, @@ -600,6 +623,7 @@ mod tests { } // Split the provided record batch into multiple batch_size record batches + #[tracing::instrument(level = "trace", skip(sorted, batch_size))] fn split_batch(sorted: &RecordBatch, batch_size: usize) -> Vec { let batches = (sorted.num_rows() + batch_size - 1) / batch_size; @@ -622,6 +646,7 @@ mod tests { .collect() } + #[tracing::instrument(level = "trace", skip(sort, sizes, context))] async fn sorted_partitioned_input( sort: Vec, sizes: &[usize], @@ -916,6 +941,7 @@ mod tests { assert!(saw_end); } + #[tracing::instrument(level = "trace", skip(ts))] fn nanos_from_timestamp(ts: &Timestamp) -> i64 { ts.value().unwrap().timestamp_nanos_opt().unwrap() } diff --git a/datafusion/physical-plan/src/sorts/stream.rs b/datafusion/physical-plan/src/sorts/stream.rs index 135b4fbdece49..146765fbddce9 100644 --- a/datafusion/physical-plan/src/sorts/stream.rs +++ b/datafusion/physical-plan/src/sorts/stream.rs @@ -53,6 +53,7 @@ pub trait PartitionedStream: std::fmt::Debug + Send { struct FusedStreams(Vec>); impl std::fmt::Debug for FusedStreams { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("FusedStreams") .field("num_streams", &self.0.len()) @@ -61,6 +62,7 @@ impl std::fmt::Debug for FusedStreams { } impl FusedStreams { + #[tracing::instrument(level = "trace", skip(self, cx, stream_idx))] fn poll_next( &mut self, cx: &mut Context<'_>, @@ -90,6 +92,7 @@ pub struct RowCursorStream { } impl RowCursorStream { + #[tracing::instrument(level = "trace", skip(schema, expressions, streams, reservation))] pub fn try_new( schema: &Schema, expressions: &[PhysicalSortExpr], @@ -114,6 +117,7 @@ impl RowCursorStream { }) } + #[tracing::instrument(level = "trace", skip(self, batch))] fn convert_batch(&mut self, batch: &RecordBatch) -> Result { let cols = self .column_expressions @@ -134,10 +138,12 @@ impl RowCursorStream { impl PartitionedStream for RowCursorStream { type Output = Result<(RowValues, RecordBatch)>; + #[tracing::instrument(level = "trace", skip(self))] fn partitions(&self) -> usize { self.streams.0.len() } + #[tracing::instrument(level = "trace", skip(self, cx, stream_idx))] fn poll_next( &mut self, cx: &mut Context<'_>, @@ -162,6 +168,7 @@ pub struct FieldCursorStream { } impl std::fmt::Debug for FieldCursorStream { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("PrimitiveCursorStream") .field("num_streams", &self.streams) @@ -170,6 +177,7 @@ impl std::fmt::Debug for FieldCursorStream { } impl FieldCursorStream { + #[tracing::instrument(level = "trace", skip(sort, streams))] pub fn new(sort: PhysicalSortExpr, streams: Vec) -> Self { let streams = streams.into_iter().map(|s| s.fuse()).collect(); Self { @@ -179,6 +187,7 @@ impl FieldCursorStream { } } + #[tracing::instrument(level = "trace", skip(self, batch))] fn convert_batch(&mut self, batch: &RecordBatch) -> Result> { let value = self.sort.expr.evaluate(batch)?; let array = value.into_array(batch.num_rows())?; @@ -190,10 +199,12 @@ impl FieldCursorStream { impl PartitionedStream for FieldCursorStream { type Output = Result<(ArrayValues, RecordBatch)>; + #[tracing::instrument(level = "trace", skip(self))] fn partitions(&self) -> usize { self.streams.0.len() } + #[tracing::instrument(level = "trace", skip(self, cx, stream_idx))] fn poll_next( &mut self, cx: &mut Context<'_>, diff --git a/datafusion/physical-plan/src/sorts/streaming_merge.rs b/datafusion/physical-plan/src/sorts/streaming_merge.rs index 9e6618dd1af58..03cfe3238e6be 100644 --- a/datafusion/physical-plan/src/sorts/streaming_merge.rs +++ b/datafusion/physical-plan/src/sorts/streaming_merge.rs @@ -49,6 +49,7 @@ macro_rules! merge_helper { }}; } +#[tracing::instrument(level = "trace", skip(streams, schema, expressions, metrics, batch_size, fetch, reservation))] /// Perform a streaming merge of [`SendableRecordBatchStream`] based on provided sort expressions /// while preserving order. pub fn streaming_merge( diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs index 99d9367740be4..6deb2c05c913c 100644 --- a/datafusion/physical-plan/src/stream.rs +++ b/datafusion/physical-plan/src/stream.rs @@ -56,6 +56,7 @@ pub(crate) struct ReceiverStreamBuilder { } impl ReceiverStreamBuilder { + #[tracing::instrument(level = "trace", skip(capacity))] /// create new channels with the specified buffer size pub fn new(capacity: usize) -> Self { let (tx, rx) = tokio::sync::mpsc::channel(capacity); @@ -67,11 +68,13 @@ impl ReceiverStreamBuilder { } } + #[tracing::instrument(level = "trace", skip(self))] /// Get a handle for sending data to the output pub fn tx(&self) -> Sender> { self.tx.clone() } + #[tracing::instrument(level = "trace", skip(self, task))] /// Spawn task that will be aborted if this builder (or the stream /// built from it) are dropped pub fn spawn(&mut self, task: F) @@ -82,6 +85,7 @@ impl ReceiverStreamBuilder { self.join_set.spawn(task); } + #[tracing::instrument(level = "trace", skip(self, f))] /// Spawn a blocking task that will be aborted if this builder (or the stream /// built from it) are dropped /// @@ -95,6 +99,7 @@ impl ReceiverStreamBuilder { self.join_set.spawn_blocking(f); } + #[tracing::instrument(level = "trace", skip(self))] /// Create a stream of all data written to `tx` pub fn build(self) -> BoxStream<'static, Result> { let Self { @@ -215,6 +220,7 @@ pub struct RecordBatchReceiverStreamBuilder { } impl RecordBatchReceiverStreamBuilder { + #[tracing::instrument(level = "trace", skip(schema, capacity))] /// create new channels with the specified buffer size pub fn new(schema: SchemaRef, capacity: usize) -> Self { Self { @@ -223,11 +229,13 @@ impl RecordBatchReceiverStreamBuilder { } } + #[tracing::instrument(level = "trace", skip(self))] /// Get a handle for sending [`RecordBatch`] to the output pub fn tx(&self) -> Sender> { self.inner.tx() } + #[tracing::instrument(level = "trace", skip(self, task))] /// Spawn task that will be aborted if this builder (or the stream /// built from it) are dropped /// @@ -242,6 +250,7 @@ impl RecordBatchReceiverStreamBuilder { self.inner.spawn(task) } + #[tracing::instrument(level = "trace", skip(self, f))] /// Spawn a blocking task that will be aborted if this builder (or the stream /// built from it) are dropped /// @@ -256,6 +265,7 @@ impl RecordBatchReceiverStreamBuilder { self.inner.spawn_blocking(f) } + #[tracing::instrument(level = "trace", skip(self, input, partition, context))] /// runs the `partition` of the `input` ExecutionPlan on the /// tokio threadpool and writes its outputs to this stream /// @@ -314,6 +324,7 @@ impl RecordBatchReceiverStreamBuilder { }); } + #[tracing::instrument(level = "trace", skip(self))] /// Create a stream of all [`RecordBatch`] written to `tx` pub fn build(self) -> SendableRecordBatchStream { Box::pin(RecordBatchStreamAdapter::new( @@ -327,6 +338,7 @@ impl RecordBatchReceiverStreamBuilder { pub struct RecordBatchReceiverStream {} impl RecordBatchReceiverStream { + #[tracing::instrument(level = "trace", skip(schema, capacity))] /// Create a builder with an internal buffer of capacity batches. pub fn builder( schema: SchemaRef, @@ -348,6 +360,7 @@ pin_project! { } impl RecordBatchStreamAdapter { + #[tracing::instrument(level = "trace", skip(schema, stream))] /// Creates a new [`RecordBatchStreamAdapter`] from the provided schema and stream pub fn new(schema: SchemaRef, stream: S) -> Self { Self { schema, stream } @@ -355,6 +368,7 @@ impl RecordBatchStreamAdapter { } impl std::fmt::Debug for RecordBatchStreamAdapter { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("RecordBatchStreamAdapter") .field("schema", &self.schema) @@ -368,10 +382,12 @@ where { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { self.project().stream.poll_next(cx) } + #[tracing::instrument(level = "trace", skip(self))] fn size_hint(&self) -> (usize, Option) { self.stream.size_hint() } @@ -381,6 +397,7 @@ impl RecordBatchStream for RecordBatchStreamAdapter where S: Stream>, { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } @@ -394,6 +411,7 @@ pub struct EmptyRecordBatchStream { } impl EmptyRecordBatchStream { + #[tracing::instrument(level = "trace", skip(schema))] /// Create an empty RecordBatchStream pub fn new(schema: SchemaRef) -> Self { Self { schema } @@ -401,6 +419,7 @@ impl EmptyRecordBatchStream { } impl RecordBatchStream for EmptyRecordBatchStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } @@ -409,6 +428,7 @@ impl RecordBatchStream for EmptyRecordBatchStream { impl Stream for EmptyRecordBatchStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, _cx))] fn poll_next( self: Pin<&mut Self>, _cx: &mut Context<'_>, @@ -425,6 +445,7 @@ pub(crate) struct ObservedStream { } impl ObservedStream { + #[tracing::instrument(level = "trace", skip(inner, baseline_metrics))] pub fn new( inner: SendableRecordBatchStream, baseline_metrics: BaselineMetrics, @@ -437,6 +458,7 @@ impl ObservedStream { } impl RecordBatchStream for ObservedStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> arrow::datatypes::SchemaRef { self.inner.schema() } @@ -445,6 +467,7 @@ impl RecordBatchStream for ObservedStream { impl futures::Stream for ObservedStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -464,6 +487,7 @@ mod test { use arrow_schema::{DataType, Field, Schema}; use datafusion_common::exec_err; + #[tracing::instrument(level = "trace", skip())] fn schema() -> SchemaRef { Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, true)])) } @@ -546,6 +570,7 @@ mod test { assert!(stream.next().await.is_none()); } + #[tracing::instrument(level = "trace", skip(input, max_batches))] /// Consumes all the input's partitions into a /// RecordBatchReceiverStream and runs it to completion /// diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs index d174e3b8b6caa..79a5708c4bda9 100644 --- a/datafusion/physical-plan/src/streaming.rs +++ b/datafusion/physical-plan/src/streaming.rs @@ -66,6 +66,7 @@ pub struct StreamingTableExec { } impl StreamingTableExec { + #[tracing::instrument(level = "trace", skip(schema, partitions, projection, projected_output_ordering, infinite, limit))] /// Try to create a new [`StreamingTableExec`] returning an error if the schema is incorrect pub fn try_new( schema: SchemaRef, @@ -110,34 +111,42 @@ impl StreamingTableExec { }) } + #[tracing::instrument(level = "trace", skip(self))] pub fn partitions(&self) -> &Vec> { &self.partitions } + #[tracing::instrument(level = "trace", skip(self))] pub fn partition_schema(&self) -> &SchemaRef { self.partitions[0].schema() } + #[tracing::instrument(level = "trace", skip(self))] pub fn projection(&self) -> &Option> { &self.projection } + #[tracing::instrument(level = "trace", skip(self))] pub fn projected_schema(&self) -> &Schema { &self.projected_schema } + #[tracing::instrument(level = "trace", skip(self))] pub fn projected_output_ordering(&self) -> impl IntoIterator { self.projected_output_ordering.clone() } + #[tracing::instrument(level = "trace", skip(self))] pub fn is_infinite(&self) -> bool { self.infinite } + #[tracing::instrument(level = "trace", skip(self))] pub fn limit(&self) -> Option { self.limit } + #[tracing::instrument(level = "trace", skip(schema, orderings, partitions, is_infinite))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( schema: SchemaRef, @@ -163,12 +172,14 @@ impl StreamingTableExec { } impl std::fmt::Debug for StreamingTableExec { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("LazyMemTableExec").finish_non_exhaustive() } } impl DisplayAs for StreamingTableExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -205,22 +216,27 @@ impl DisplayAs for StreamingTableExec { #[async_trait] impl ExecutionPlan for StreamingTableExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "StreamingTableExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -232,6 +248,7 @@ impl ExecutionPlan for StreamingTableExec { } } + #[tracing::instrument(level = "trace", skip(self, partition, ctx))] fn execute( &self, partition: usize, @@ -261,6 +278,7 @@ impl ExecutionPlan for StreamingTableExec { }) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } @@ -298,6 +316,7 @@ mod test { assert_eq!(counts, vec![75]); } + #[tracing::instrument(level = "trace", skip(exec))] /// Runs the provided execution plan and returns a vector of the number of /// rows in each partition async fn collect_num_rows(exec: Arc) -> Vec { @@ -320,10 +339,12 @@ mod test { } impl TestBuilder { + #[tracing::instrument(level = "trace", skip())] fn new() -> Self { Self::default() } + #[tracing::instrument(level = "trace", skip(self, batches))] /// Set the batches for the stream fn with_batches(mut self, batches: Vec) -> Self { let stream = TestPartitionStream::new_with_batches(batches); @@ -332,12 +353,14 @@ mod test { self } + #[tracing::instrument(level = "trace", skip(self, limit))] /// Set the limit for the stream fn with_limit(mut self, limit: Option) -> Self { self.limit = limit; self } + #[tracing::instrument(level = "trace", skip(self))] fn build(self) -> StreamingTableExec { StreamingTableExec::try_new( self.schema.unwrap(), diff --git a/datafusion/physical-plan/src/test.rs b/datafusion/physical-plan/src/test.rs index 377b919bb4077..53b84c30a5420 100644 --- a/datafusion/physical-plan/src/test.rs +++ b/datafusion/physical-plan/src/test.rs @@ -33,6 +33,7 @@ use crate::ExecutionPlan; pub mod exec; +#[tracing::instrument(level = "trace", skip(fut))] /// Asserts that given future is pending. pub fn assert_is_pending<'a, T>(fut: &mut Pin + Send + 'a>>) { let waker = futures::task::noop_waker(); @@ -42,6 +43,7 @@ pub fn assert_is_pending<'a, T>(fut: &mut Pin + Send assert!(poll.is_pending()); } +#[tracing::instrument(level = "trace", skip())] /// Get the schema for the aggregate_test_* csv files pub fn aggr_test_schema() -> SchemaRef { let mut f1 = Field::new("c1", DataType::Utf8, false); @@ -65,6 +67,7 @@ pub fn aggr_test_schema() -> SchemaRef { Arc::new(schema) } +#[tracing::instrument(level = "trace", skip(a, b, c))] /// returns record batch with 3 columns of i32 in memory pub fn build_table_i32( a: (&str, &Vec), @@ -88,6 +91,7 @@ pub fn build_table_i32( .unwrap() } +#[tracing::instrument(level = "trace", skip(a, b, c))] /// returns memory table scan wrapped around record batch with 3 columns of i32 pub fn build_table_scan_i32( a: (&str, &Vec), @@ -99,6 +103,7 @@ pub fn build_table_scan_i32( Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) } +#[tracing::instrument(level = "trace", skip(sz))] /// Return a RecordBatch with a single Int32 array with values (0..sz) in a field named "i" pub fn make_partition(sz: i32) -> RecordBatch { let seq_start = 0; @@ -111,11 +116,13 @@ pub fn make_partition(sz: i32) -> RecordBatch { RecordBatch::try_new(schema, vec![arr]).unwrap() } +#[tracing::instrument(level = "trace", skip(partitions))] /// Returns a `MemoryExec` that scans `partitions` of 100 batches each pub fn scan_partitioned(partitions: usize) -> Arc { Arc::new(mem_exec(partitions)) } +#[tracing::instrument(level = "trace", skip(partitions))] /// Returns a `MemoryExec` that scans `partitions` of 100 batches each pub fn mem_exec(partitions: usize) -> MemoryExec { let data: Vec> = (0..partitions).map(|_| vec![make_partition(100)]).collect(); @@ -132,6 +139,7 @@ pub struct TestPartitionStream { } impl TestPartitionStream { + #[tracing::instrument(level = "trace", skip(batches))] /// Create a new stream partition with the provided batches pub fn new_with_batches(batches: Vec) -> Self { let schema = batches[0].schema(); @@ -139,9 +147,11 @@ impl TestPartitionStream { } } impl PartitionStream for TestPartitionStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> &SchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self, _ctx))] fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { let stream = futures::stream::iter(self.batches.clone().into_iter().map(Ok)); Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream)) diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs index b4f1eac0a655d..1d3bc85fc3865 100644 --- a/datafusion/physical-plan/src/test/exec.rs +++ b/datafusion/physical-plan/src/test/exec.rs @@ -46,6 +46,7 @@ pub struct BatchIndex { } impl BatchIndex { + #[tracing::instrument(level = "trace", skip(self))] /// Return the current index pub fn value(&self) -> usize { let inner = self.inner.lock().unwrap(); @@ -53,6 +54,7 @@ impl BatchIndex { } // increment the current index by one + #[tracing::instrument(level = "trace", skip(self))] pub fn incr(&self) { let mut inner = self.inner.lock().unwrap(); *inner += 1; @@ -69,6 +71,7 @@ pub struct TestStream { } impl TestStream { + #[tracing::instrument(level = "trace", skip(data))] /// Create an iterator for a vector of record batches. Assumes at /// least one entry in data (for the schema) pub fn new(data: Vec) -> Self { @@ -78,6 +81,7 @@ impl TestStream { } } + #[tracing::instrument(level = "trace", skip(self))] /// Return a handle to the index counter for this stream pub fn index(&self) -> BatchIndex { self.index.clone() @@ -87,6 +91,7 @@ impl TestStream { impl Stream for TestStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self))] fn poll_next(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll> { let next_batch = self.index.value(); @@ -99,12 +104,14 @@ impl Stream for TestStream { }) } + #[tracing::instrument(level = "trace", skip(self))] fn size_hint(&self) -> (usize, Option) { (self.data.len(), Some(self.data.len())) } } impl RecordBatchStream for TestStream { + #[tracing::instrument(level = "trace", skip(self))] /// Get the schema fn schema(&self) -> SchemaRef { self.data[0].schema() @@ -125,6 +132,7 @@ pub struct MockExec { } impl MockExec { + #[tracing::instrument(level = "trace", skip(data, schema))] /// Create a new `MockExec` with a single partition that returns /// the specified `Results`s. /// @@ -142,6 +150,7 @@ impl MockExec { } } + #[tracing::instrument(level = "trace", skip(self, use_task))] /// If `use_task` is true (the default) then the batches are sent /// back using a separate task to ensure the underlying stream is /// not immediately ready @@ -150,6 +159,7 @@ impl MockExec { self } + #[tracing::instrument(level = "trace", skip(schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -163,6 +173,7 @@ impl MockExec { } impl DisplayAs for MockExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -177,18 +188,22 @@ impl DisplayAs for MockExec { } impl ExecutionPlan for MockExec { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -196,6 +211,7 @@ impl ExecutionPlan for MockExec { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, partition, _context))] /// Returns a stream which yields data fn execute( &self, @@ -243,6 +259,7 @@ impl ExecutionPlan for MockExec { } // Panics if one of the batches is an error + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { let data: Result> = self .data @@ -263,6 +280,7 @@ impl ExecutionPlan for MockExec { } } +#[tracing::instrument(level = "trace", skip(e))] fn clone_error(e: &DataFusionError) -> DataFusionError { use DataFusionError::*; match e { @@ -286,6 +304,7 @@ pub struct BarrierExec { } impl BarrierExec { + #[tracing::instrument(level = "trace", skip(data, schema))] /// Create a new exec with some number of partitions. pub fn new(data: Vec>, schema: SchemaRef) -> Self { // wait for all streams and the input @@ -299,6 +318,7 @@ impl BarrierExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// wait until all the input streams and this function is ready pub async fn wait(&self) { println!("BarrierExec::wait waiting on barrier"); @@ -306,6 +326,7 @@ impl BarrierExec { println!("BarrierExec::wait done waiting"); } + #[tracing::instrument(level = "trace", skip(schema, data))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( schema: SchemaRef, @@ -321,6 +342,7 @@ impl BarrierExec { } impl DisplayAs for BarrierExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -335,18 +357,22 @@ impl DisplayAs for BarrierExec { } impl ExecutionPlan for BarrierExec { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -354,6 +380,7 @@ impl ExecutionPlan for BarrierExec { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, partition, _context))] /// Returns a stream which yields data fn execute( &self, @@ -385,6 +412,7 @@ impl ExecutionPlan for BarrierExec { Ok(builder.build()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { Ok(common::compute_record_batch_statistics( &self.data, @@ -401,12 +429,14 @@ pub struct ErrorExec { } impl Default for ErrorExec { + #[tracing::instrument(level = "trace", skip())] fn default() -> Self { Self::new() } } impl ErrorExec { + #[tracing::instrument(level = "trace", skip())] pub fn new() -> Self { let schema = Arc::new(Schema::new(vec![Field::new( "dummy", @@ -417,6 +447,7 @@ impl ErrorExec { Self { cache } } + #[tracing::instrument(level = "trace", skip(schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -430,6 +461,7 @@ impl ErrorExec { } impl DisplayAs for ErrorExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -444,18 +476,22 @@ impl DisplayAs for ErrorExec { } impl ExecutionPlan for ErrorExec { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -463,6 +499,7 @@ impl ExecutionPlan for ErrorExec { unimplemented!() } + #[tracing::instrument(level = "trace", skip(self, partition, _context))] /// Returns a stream which yields data fn execute( &self, @@ -481,6 +518,7 @@ pub struct StatisticsExec { cache: PlanProperties, } impl StatisticsExec { + #[tracing::instrument(level = "trace", skip(stats, schema))] pub fn new(stats: Statistics, schema: Schema) -> Self { assert_eq!( stats @@ -495,6 +533,7 @@ impl StatisticsExec { } } + #[tracing::instrument(level = "trace", skip(schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -508,6 +547,7 @@ impl StatisticsExec { } impl DisplayAs for StatisticsExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -527,18 +567,22 @@ impl DisplayAs for StatisticsExec { } impl ExecutionPlan for StatisticsExec { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -546,6 +590,7 @@ impl ExecutionPlan for StatisticsExec { Ok(self) } + #[tracing::instrument(level = "trace", skip(self, _partition, _context))] fn execute( &self, _partition: usize, @@ -554,6 +599,7 @@ impl ExecutionPlan for StatisticsExec { unimplemented!("This plan only serves for testing statistics") } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { Ok(self.stats.clone()) } @@ -573,6 +619,7 @@ pub struct BlockingExec { } impl BlockingExec { + #[tracing::instrument(level = "trace", skip(schema, n_partitions))] /// Create new [`BlockingExec`] with a give schema and number of partitions. pub fn new(schema: SchemaRef, n_partitions: usize) -> Self { let cache = Self::compute_properties(schema.clone(), n_partitions); @@ -583,6 +630,7 @@ impl BlockingExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// Weak pointer that can be used for ref-counting this execution plan and its streams. /// /// Use [`Weak::strong_count`] to determine if the plan itself and its streams are dropped (should be 0 in that @@ -592,6 +640,7 @@ impl BlockingExec { Arc::downgrade(&self.refs) } + #[tracing::instrument(level = "trace", skip(schema, n_partitions))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef, n_partitions: usize) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -605,6 +654,7 @@ impl BlockingExec { } impl DisplayAs for BlockingExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -619,19 +669,23 @@ impl DisplayAs for BlockingExec { } impl ExecutionPlan for BlockingExec { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { // this is a leaf node and has no children vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -639,6 +693,7 @@ impl ExecutionPlan for BlockingExec { internal_err!("Children cannot be replaced in {self:?}") } + #[tracing::instrument(level = "trace", skip(self, _partition, _context))] fn execute( &self, _partition: usize, @@ -664,6 +719,7 @@ pub struct BlockingStream { impl Stream for BlockingStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, _cx))] fn poll_next( self: Pin<&mut Self>, _cx: &mut Context<'_>, @@ -673,11 +729,13 @@ impl Stream for BlockingStream { } impl RecordBatchStream for BlockingStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { Arc::clone(&self.schema) } } +#[tracing::instrument(level = "trace", skip(refs))] /// Asserts that the strong count of the given [`Weak`] pointer converges to zero. /// /// This might take a while but has a timeout. @@ -711,6 +769,7 @@ pub struct PanicExec { } impl PanicExec { + #[tracing::instrument(level = "trace", skip(schema, n_partitions))] /// Create new [`PanicExec`] with a give schema and number of /// partitions, which will each panic immediately. pub fn new(schema: SchemaRef, n_partitions: usize) -> Self { @@ -723,12 +782,14 @@ impl PanicExec { } } + #[tracing::instrument(level = "trace", skip(self, partition, count))] /// Set the number of batches prior to panic for a partition pub fn with_partition_panic(mut self, partition: usize, count: usize) -> Self { self.batches_until_panics[partition] = count; self } + #[tracing::instrument(level = "trace", skip(schema, batches_until_panics))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( schema: SchemaRef, @@ -746,6 +807,7 @@ impl PanicExec { } impl DisplayAs for PanicExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -760,19 +822,23 @@ impl DisplayAs for PanicExec { } impl ExecutionPlan for PanicExec { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { // this is a leaf node and has no children vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -780,6 +846,7 @@ impl ExecutionPlan for PanicExec { internal_err!("Children cannot be replaced in {:?}", self) } + #[tracing::instrument(level = "trace", skip(self, partition, _context))] fn execute( &self, partition: usize, @@ -813,6 +880,7 @@ struct PanicStream { impl Stream for PanicStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -835,6 +903,7 @@ impl Stream for PanicStream { } impl RecordBatchStream for PanicStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { Arc::clone(&self.schema) } diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs index 6a77bfaf3ccd2..7efe60c052b79 100644 --- a/datafusion/physical-plan/src/topk/mod.rs +++ b/datafusion/physical-plan/src/topk/mod.rs @@ -92,6 +92,7 @@ pub struct TopK { } impl TopK { + #[tracing::instrument(level = "trace", skip(partition_id, schema, expr, k, batch_size, runtime, metrics, partition))] /// Create a new [`TopK`] that stores the top `k` values, as /// defined by the sort expressions in `expr`. // TOOD: make a builder or some other nicer API to avoid the @@ -142,6 +143,7 @@ impl TopK { }) } + #[tracing::instrument(level = "trace", skip(self, batch))] /// Insert `batch`, remembering if any of its values are among /// the top k seen so far. pub fn insert_batch(&mut self, batch: RecordBatch) -> Result<()> { @@ -188,6 +190,7 @@ impl TopK { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the top k results broken into `batch_size` [`RecordBatch`]es, consuming the heap pub fn emit(self) -> Result { let Self { @@ -223,6 +226,7 @@ impl TopK { ))) } + #[tracing::instrument(level = "trace", skip(self))] /// return the size of memory used by this operator, in bytes fn size(&self) -> usize { std::mem::size_of::() @@ -241,6 +245,7 @@ struct TopKMetrics { } impl TopKMetrics { + #[tracing::instrument(level = "trace", skip(metrics, partition))] fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self { Self { baseline: BaselineMetrics::new(metrics, partition), @@ -272,6 +277,7 @@ struct TopKHeap { } impl TopKHeap { + #[tracing::instrument(level = "trace", skip(k, batch_size, schema))] fn new(k: usize, batch_size: usize, schema: SchemaRef) -> Self { assert!(k > 0); Self { @@ -283,18 +289,21 @@ impl TopKHeap { } } + #[tracing::instrument(level = "trace", skip(self, batch))] /// Register a [`RecordBatch`] with the heap, returning the /// appropriate entry pub fn register_batch(&mut self, batch: RecordBatch) -> RecordBatchEntry { self.store.register(batch) } + #[tracing::instrument(level = "trace", skip(self, entry))] /// Insert a [`RecordBatchEntry`] created by a previous call to /// [`Self::register_batch`] into storage. pub fn insert_batch_entry(&mut self, entry: RecordBatchEntry) { self.store.insert(entry) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the largest value stored by the heap if there are k /// items, otherwise returns None. Remember this structure is /// keeping the "smallest" k values @@ -306,6 +315,7 @@ impl TopKHeap { } } + #[tracing::instrument(level = "trace", skip(self, batch_entry, row, index))] /// Adds `row` to this heap. If inserting this new item would /// increase the size past `k`, removes the previously smallest /// item. @@ -345,12 +355,14 @@ impl TopKHeap { self.inner.push(new_top_k) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the values stored in this heap, from values low to /// high, as a single [`RecordBatch`], resetting the inner heap pub fn emit(&mut self) -> Result { Ok(self.emit_with_state()?.0) } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the values stored in this heap, from values low to /// high, as a single [`RecordBatch`], and a sorted vec of the /// current heap's contents @@ -398,6 +410,7 @@ impl TopKHeap { Ok((new_batch, topk_rows)) } + #[tracing::instrument(level = "trace", skip(self))] /// Compact this heap, rewriting all stored batches into a single /// input batch pub fn maybe_compact(&mut self) -> Result<()> { @@ -442,6 +455,7 @@ impl TopKHeap { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] /// return the size of memory used by this heap, in bytes fn size(&self) -> usize { std::mem::size_of::() @@ -469,6 +483,7 @@ struct TopKRow { } impl TopKRow { + #[tracing::instrument(level = "trace", skip(row, batch_id, index))] /// Create a new TopKRow with new allocation fn new(row: impl AsRef<[u8]>, batch_id: u32, index: usize) -> Self { Self { @@ -478,6 +493,7 @@ impl TopKRow { } } + #[tracing::instrument(level = "trace", skip(self, new_row, batch_id, index))] /// Create a new TopKRow reusing the existing allocation fn with_new_row( self, @@ -500,12 +516,14 @@ impl TopKRow { } } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the number of bytes owned by this row in the heap (not /// including itself) fn owned_size(&self) -> usize { self.row.capacity() } + #[tracing::instrument(level = "trace", skip(self))] /// Returns a slice to the owned row value fn row(&self) -> &[u8] { self.row.as_slice() @@ -515,12 +533,14 @@ impl TopKRow { impl Eq for TopKRow {} impl PartialOrd for TopKRow { + #[tracing::instrument(level = "trace", skip(self, other))] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for TopKRow { + #[tracing::instrument(level = "trace", skip(self, other))] fn cmp(&self, other: &Self) -> Ordering { self.row.cmp(&other.row) } @@ -551,6 +571,7 @@ struct RecordBatchStore { } impl RecordBatchStore { + #[tracing::instrument(level = "trace", skip(schema))] fn new(schema: SchemaRef) -> Self { Self { next_id: 0, @@ -560,6 +581,7 @@ impl RecordBatchStore { } } + #[tracing::instrument(level = "trace", skip(self, batch))] /// Register this batch with the store and assign an ID. No /// attempt is made to compare this batch to other batches pub fn register(&mut self, batch: RecordBatch) -> RecordBatchEntry { @@ -568,6 +590,7 @@ impl RecordBatchStore { RecordBatchEntry { id, batch, uses: 0 } } + #[tracing::instrument(level = "trace", skip(self, entry))] /// Insert a record batch entry into this store, tracking its /// memory use, if it has any uses pub fn insert(&mut self, entry: RecordBatchEntry) { @@ -578,21 +601,25 @@ impl RecordBatchStore { } } + #[tracing::instrument(level = "trace", skip(self))] /// Clear all values in this store, invalidating all previous batch ids fn clear(&mut self) { self.batches.clear(); self.batches_size = 0; } + #[tracing::instrument(level = "trace", skip(self, id))] fn get(&self, id: u32) -> Option<&RecordBatchEntry> { self.batches.get(&id) } + #[tracing::instrument(level = "trace", skip(self))] /// returns the total number of batches stored in this store fn len(&self) -> usize { self.batches.len() } + #[tracing::instrument(level = "trace", skip(self))] /// Returns the total number of rows in batches minus the number /// which are in use fn unused_rows(&self) -> usize { @@ -602,16 +629,19 @@ impl RecordBatchStore { .sum() } + #[tracing::instrument(level = "trace", skip(self))] /// returns true if the store has nothing stored fn is_empty(&self) -> bool { self.batches.is_empty() } + #[tracing::instrument(level = "trace", skip(self))] /// return the schema of batches stored fn schema(&self) -> &SchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self, id))] /// remove a use from the specified batch id. If the use count /// reaches zero the batch entry is removed from the store /// @@ -633,6 +663,7 @@ impl RecordBatchStore { } } + #[tracing::instrument(level = "trace", skip(self))] /// returns the size of memory used by this store, including all /// referenced `RecordBatch`es, in bytes pub fn size(&self) -> usize { diff --git a/datafusion/physical-plan/src/tree_node.rs b/datafusion/physical-plan/src/tree_node.rs index 46460cbb66843..e9046d8451642 100644 --- a/datafusion/physical-plan/src/tree_node.rs +++ b/datafusion/physical-plan/src/tree_node.rs @@ -26,10 +26,12 @@ use datafusion_common::tree_node::{ConcreteTreeNode, DynTreeNode}; use datafusion_common::Result; impl DynTreeNode for dyn ExecutionPlan { + #[tracing::instrument(level = "trace", skip(self))] fn arc_children(&self) -> Vec> { self.children() } + #[tracing::instrument(level = "trace", skip(self, arc_self, new_children))] fn with_new_arc_children( &self, arc_self: Arc, @@ -53,6 +55,7 @@ pub struct PlanContext { } impl PlanContext { + #[tracing::instrument(level = "trace", skip(plan, data, children))] pub fn new(plan: Arc, data: T, children: Vec) -> Self { Self { plan, @@ -61,6 +64,7 @@ impl PlanContext { } } + #[tracing::instrument(level = "trace", skip(self))] pub fn update_plan_from_children(mut self) -> Result { let children_plans = self.children.iter().map(|c| c.plan.clone()).collect(); self.plan = with_new_children_if_necessary(self.plan, children_plans)?; @@ -70,6 +74,7 @@ impl PlanContext { } impl PlanContext { + #[tracing::instrument(level = "trace", skip(plan))] pub fn new_default(plan: Arc) -> Self { let children = plan.children().into_iter().map(Self::new_default).collect(); Self::new(plan, Default::default(), children) @@ -77,6 +82,7 @@ impl PlanContext { } impl Display for PlanContext { + #[tracing::instrument(level = "trace", skip(self, f))] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { let node_string = displayable(self.plan.as_ref()).one_line(); write!(f, "Node plan: {}", node_string)?; @@ -86,15 +92,18 @@ impl Display for PlanContext { } impl ConcreteTreeNode for PlanContext { + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec<&Self> { self.children.iter().collect() } + #[tracing::instrument(level = "trace", skip(self))] fn take_children(mut self) -> (Self, Vec) { let children = std::mem::take(&mut self.children); (self, children) } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children(mut self, children: Vec) -> Result { self.children = children; self.update_plan_from_children() diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index 1354644788ea3..009f2c174f081 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -96,6 +96,7 @@ pub struct UnionExec { } impl UnionExec { + #[tracing::instrument(level = "trace", skip(inputs))] /// Create a new UnionExec pub fn new(inputs: Vec>) -> Self { let schema = union_schema(&inputs); @@ -107,11 +108,13 @@ impl UnionExec { } } + #[tracing::instrument(level = "trace", skip(self))] /// Get inputs of the execution plan pub fn inputs(&self) -> &Vec> { &self.inputs } + #[tracing::instrument(level = "trace", skip(inputs, schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( inputs: &[Arc], @@ -169,6 +172,7 @@ impl UnionExec { } impl DisplayAs for UnionExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -183,23 +187,28 @@ impl DisplayAs for UnionExec { } impl ExecutionPlan for UnionExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "UnionExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { self.inputs.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { // If the Union has an output ordering, it maintains at least one // child's ordering (i.e. the meet). @@ -225,6 +234,7 @@ impl ExecutionPlan for UnionExec { } } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -232,6 +242,7 @@ impl ExecutionPlan for UnionExec { Ok(Arc::new(UnionExec::new(children))) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, mut partition: usize, @@ -261,10 +272,12 @@ impl ExecutionPlan for UnionExec { exec_err!("Partition {partition} not found in Union") } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { let stats = self .inputs @@ -278,6 +291,7 @@ impl ExecutionPlan for UnionExec { .unwrap_or_else(|| Statistics::new_unknown(&self.schema()))) } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false; self.children().len()] } @@ -326,6 +340,7 @@ pub struct InterleaveExec { } impl InterleaveExec { + #[tracing::instrument(level = "trace", skip(inputs))] /// Create a new InterleaveExec pub fn try_new(inputs: Vec>) -> Result { if !can_interleave(inputs.iter()) { @@ -341,11 +356,13 @@ impl InterleaveExec { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Get inputs of the execution plan pub fn inputs(&self) -> &Vec> { &self.inputs } + #[tracing::instrument(level = "trace", skip(inputs))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(inputs: &[Arc]) -> PlanProperties { let schema = union_schema(inputs); @@ -360,6 +377,7 @@ impl InterleaveExec { } impl DisplayAs for InterleaveExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -374,27 +392,33 @@ impl DisplayAs for InterleaveExec { } impl ExecutionPlan for InterleaveExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "InterleaveExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { self.inputs.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { vec![false; self.inputs().len()] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -402,6 +426,7 @@ impl ExecutionPlan for InterleaveExec { Ok(Arc::new(InterleaveExec::try_new(children)?)) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -436,10 +461,12 @@ impl ExecutionPlan for InterleaveExec { exec_err!("Partition {partition} not found in InterleaveExec") } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { let stats = self .inputs @@ -453,11 +480,13 @@ impl ExecutionPlan for InterleaveExec { .unwrap_or_else(|| Statistics::new_unknown(&self.schema()))) } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false; self.children().len()] } } +#[tracing::instrument(level = "trace", skip(inputs))] /// If all the input partitions have the same Hash partition spec with the first_input_partition /// The InterleaveExec is partition aware. /// @@ -478,6 +507,7 @@ pub fn can_interleave>>( .all(|partition| partition == *reference) } +#[tracing::instrument(level = "trace", skip(inputs))] fn union_schema(inputs: &[Arc]) -> SchemaRef { let fields: Vec = (0..inputs[0].schema().fields().len()) .map(|i| { @@ -510,6 +540,7 @@ struct CombinedRecordBatchStream { } impl CombinedRecordBatchStream { + #[tracing::instrument(level = "trace", skip(schema, entries))] /// Create an CombinedRecordBatchStream pub fn new(schema: SchemaRef, entries: Vec) -> Self { Self { schema, entries } @@ -517,6 +548,7 @@ impl CombinedRecordBatchStream { } impl RecordBatchStream for CombinedRecordBatchStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } @@ -525,6 +557,7 @@ impl RecordBatchStream for CombinedRecordBatchStream { impl Stream for CombinedRecordBatchStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -568,6 +601,7 @@ impl Stream for CombinedRecordBatchStream { } } +#[tracing::instrument(level = "trace", skip(left, right))] fn col_stats_union( mut left: ColumnStatistics, right: ColumnStatistics, @@ -580,6 +614,7 @@ fn col_stats_union( left } +#[tracing::instrument(level = "trace", skip(left, right))] fn stats_union(mut left: Statistics, right: Statistics) -> Statistics { left.num_rows = left.num_rows.add(&right.num_rows); left.total_byte_size = left.total_byte_size.add(&right.total_byte_size); @@ -605,6 +640,7 @@ mod tests { use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr}; // Generate a schema which consists of 7 columns (a, b, c, d, e, f, g) + #[tracing::instrument(level = "trace", skip())] fn create_test_schema() -> Result { let a = Field::new("a", DataType::Int32, true); let b = Field::new("b", DataType::Int32, true); @@ -619,6 +655,7 @@ mod tests { } // Convert each tuple to PhysicalSortExpr + #[tracing::instrument(level = "trace", skip(in_data))] fn convert_to_sort_exprs( in_data: &[(&Arc, SortOptions)], ) -> Vec { diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs index 06dd8230d39ec..3dcd7799b643c 100644 --- a/datafusion/physical-plan/src/unnest.rs +++ b/datafusion/physical-plan/src/unnest.rs @@ -67,6 +67,7 @@ pub struct UnnestExec { } impl UnnestExec { + #[tracing::instrument(level = "trace", skip(input, columns, schema, options))] /// Create a new [UnnestExec]. pub fn new( input: Arc, @@ -85,6 +86,7 @@ impl UnnestExec { } } + #[tracing::instrument(level = "trace", skip(input, schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( input: &Arc, @@ -101,6 +103,7 @@ impl UnnestExec { } impl DisplayAs for UnnestExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -115,22 +118,27 @@ impl DisplayAs for UnnestExec { } impl ExecutionPlan for UnnestExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "UnnestExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -143,10 +151,12 @@ impl ExecutionPlan for UnnestExec { ))) } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { vec![Distribution::UnspecifiedDistribution] } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -164,6 +174,7 @@ impl ExecutionPlan for UnnestExec { })) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } @@ -184,6 +195,7 @@ struct UnnestMetrics { } impl UnnestMetrics { + #[tracing::instrument(level = "trace", skip(partition, metrics))] fn new(partition: usize, metrics: &ExecutionPlanMetricsSet) -> Self { let elapsed_compute = MetricBuilder::new(metrics).elapsed_compute(partition); @@ -222,6 +234,7 @@ struct UnnestStream { } impl RecordBatchStream for UnnestStream { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } @@ -231,6 +244,7 @@ impl RecordBatchStream for UnnestStream { impl Stream for UnnestStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: std::pin::Pin<&mut Self>, cx: &mut std::task::Context<'_>, @@ -240,6 +254,7 @@ impl Stream for UnnestStream { } impl UnnestStream { + #[tracing::instrument(level = "trace", skip(self, cx))] /// Separate implementation function that unpins the [`UnnestStream`] so /// that partial borrows work correctly fn poll_next_impl( @@ -279,6 +294,7 @@ impl UnnestStream { } } +#[tracing::instrument(level = "trace", skip(batch, schema, columns, options))] /// For each row in a `RecordBatch`, some list columns need to be unnested. /// We will expand the values in each list into multiple rows, /// taking the longest length among these lists, and shorter lists are padded with NULLs. @@ -323,6 +339,7 @@ fn build_batch( batch_from_indices(batch, schema, &unnested_array_map, &take_indicies) } +#[tracing::instrument(level = "trace", skip(list_arrays, options))] /// Find the longest list length among the given list arrays for each row. /// /// For example if we have the following two list arrays: @@ -387,10 +404,12 @@ trait ListArrayType: Array { } impl ListArrayType for ListArray { + #[tracing::instrument(level = "trace", skip(self))] fn values(&self) -> &ArrayRef { self.values() } + #[tracing::instrument(level = "trace", skip(self, row))] fn value_offsets(&self, row: usize) -> (i64, i64) { let offsets = self.value_offsets(); (offsets[row].into(), offsets[row + 1].into()) @@ -398,10 +417,12 @@ impl ListArrayType for ListArray { } impl ListArrayType for LargeListArray { + #[tracing::instrument(level = "trace", skip(self))] fn values(&self) -> &ArrayRef { self.values() } + #[tracing::instrument(level = "trace", skip(self, row))] fn value_offsets(&self, row: usize) -> (i64, i64) { let offsets = self.value_offsets(); (offsets[row], offsets[row + 1]) @@ -409,16 +430,19 @@ impl ListArrayType for LargeListArray { } impl ListArrayType for FixedSizeListArray { + #[tracing::instrument(level = "trace", skip(self))] fn values(&self) -> &ArrayRef { self.values() } + #[tracing::instrument(level = "trace", skip(self, row))] fn value_offsets(&self, row: usize) -> (i64, i64) { let start = self.value_offset(row) as i64; (start, start + self.value_length() as i64) } } +#[tracing::instrument(level = "trace", skip(list_arrays, length_array, capacity))] /// Unnest multiple list arrays according to the length array. fn unnest_list_arrays( list_arrays: &[ArrayRef], @@ -451,6 +475,7 @@ fn unnest_list_arrays( } } +#[tracing::instrument(level = "trace", skip(list_array, length_array, capacity))] /// Unnest a list array according the target length array. /// /// Consider a list array like this: @@ -505,6 +530,7 @@ fn unnest_list_array( )?) } +#[tracing::instrument(level = "trace", skip(length_array, capacity))] /// Creates take indicies that will be used to expand all columns except for the unnest [`columns`](UnnestExec::columns). /// Every column value needs to be repeated multiple times according to the length array. /// @@ -537,6 +563,7 @@ fn create_take_indicies( builder.finish() } +#[tracing::instrument(level = "trace", skip(batch, schema, unnested_list_arrays, indices))] /// Create the final batch given the unnested column arrays and a `indices` array /// that is used by the take kernel to copy values. /// @@ -596,6 +623,7 @@ mod tests { // Create a GenericListArray with the following list values: // [A, B, C], [], NULL, [D], NULL, [NULL, F] + #[tracing::instrument(level = "trace", skip())] fn make_generic_array() -> GenericListArray where OffsetSize: OffsetSizeTrait, @@ -644,6 +672,7 @@ mod tests { // Create a FixedSizeListArray with the following list values: // [A, B], NULL, [C, D], NULL, [NULL, F], [NULL, NULL] + #[tracing::instrument(level = "trace", skip())] fn make_fixed_list() -> FixedSizeListArray { let values = Arc::new(StringArray::from_iter([ Some("A"), @@ -664,6 +693,7 @@ mod tests { FixedSizeListArray::new(field, 2, values, Some(valid)) } + #[tracing::instrument(level = "trace", skip(list_array, lengths, expected))] fn verify_unnest_list_array( list_array: &dyn ListArrayType, lengths: Vec, @@ -721,6 +751,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(list_arrays, preserve_nulls, expected))] fn verify_longest_length( list_arrays: &[ArrayRef], preserve_nulls: bool, diff --git a/datafusion/physical-plan/src/values.rs b/datafusion/physical-plan/src/values.rs index 2aa893fd29165..c5919a41ea51c 100644 --- a/datafusion/physical-plan/src/values.rs +++ b/datafusion/physical-plan/src/values.rs @@ -47,6 +47,7 @@ pub struct ValuesExec { } impl ValuesExec { + #[tracing::instrument(level = "trace", skip(schema, data))] /// create a new values exec from data as expr pub fn try_new( schema: SchemaRef, @@ -93,6 +94,7 @@ impl ValuesExec { Self::try_new_from_batches(schema, data) } + #[tracing::instrument(level = "trace", skip(schema, batches))] /// Create a new plan using the provided schema and batches. /// /// Errors if any of the batches don't match the provided schema, or if no @@ -122,11 +124,13 @@ impl ValuesExec { }) } + #[tracing::instrument(level = "trace", skip(self))] /// provides the data pub fn data(&self) -> Vec { self.data.clone() } + #[tracing::instrument(level = "trace", skip(schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -140,6 +144,7 @@ impl ValuesExec { } impl DisplayAs for ValuesExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -154,23 +159,28 @@ impl DisplayAs for ValuesExec { } impl ExecutionPlan for ValuesExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "ValuesExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -179,6 +189,7 @@ impl ExecutionPlan for ValuesExec { .map(|e| Arc::new(e) as _) } + #[tracing::instrument(level = "trace", skip(self, partition, _context))] fn execute( &self, partition: usize, @@ -198,6 +209,7 @@ impl ExecutionPlan for ValuesExec { )?)) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { let batch = self.data(); Ok(common::compute_record_batch_statistics( diff --git a/datafusion/physical-plan/src/visitor.rs b/datafusion/physical-plan/src/visitor.rs index ca826c50022d4..577f0575e5bdf 100644 --- a/datafusion/physical-plan/src/visitor.rs +++ b/datafusion/physical-plan/src/visitor.rs @@ -17,6 +17,7 @@ use super::ExecutionPlan; +#[tracing::instrument(level = "trace", skip(plan, visitor))] /// Visit all children of this plan, according to the order defined on `ExecutionPlanVisitor`. // Note that this would be really nice if it were a method on // ExecutionPlan, but it can not be because it takes a generic @@ -79,6 +80,7 @@ pub trait ExecutionPlanVisitor { } } +#[tracing::instrument(level = "trace", skip(plan, visitor))] /// Recursively calls `pre_visit` and `post_visit` for this node and /// all of its children, as described on [`ExecutionPlanVisitor`] pub fn visit_execution_plan( diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs index cff91283eb6e3..e4cc548e24db5 100644 --- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs @@ -94,6 +94,7 @@ pub struct BoundedWindowAggExec { } impl BoundedWindowAggExec { + #[tracing::instrument(level = "trace", skip(window_expr, input, partition_keys, input_order_mode))] /// Create a new execution plan for window aggregates pub fn try_new( window_expr: Vec>, @@ -134,16 +135,19 @@ impl BoundedWindowAggExec { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Window expressions pub fn window_expr(&self) -> &[Arc] { &self.window_expr } + #[tracing::instrument(level = "trace", skip(self))] /// Input plan pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(self))] /// Return the output sort order of partition keys: For example /// OVER(PARTITION BY a, ORDER BY b) -> would give sorting of the column a // We are sure that partition by columns are always at the beginning of sort_keys @@ -158,6 +162,7 @@ impl BoundedWindowAggExec { ) } + #[tracing::instrument(level = "trace", skip(self))] /// Initializes the appropriate [`PartitionSearcher`] implementation from /// the state. fn get_search_algo(&self) -> Result> { @@ -184,6 +189,7 @@ impl BoundedWindowAggExec { }) } + #[tracing::instrument(level = "trace", skip(input, schema, window_expr))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( input: &Arc, @@ -208,6 +214,7 @@ impl BoundedWindowAggExec { } impl DisplayAs for BoundedWindowAggExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -237,23 +244,28 @@ impl DisplayAs for BoundedWindowAggExec { } impl ExecutionPlan for BoundedWindowAggExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "BoundedWindowAggExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_ordering(&self) -> Vec>> { let partition_bys = self.window_expr()[0].partition_by(); let order_keys = self.window_expr()[0].order_by(); @@ -270,6 +282,7 @@ impl ExecutionPlan for BoundedWindowAggExec { } } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { if self.partition_keys.is_empty() { debug!("No partition defined for BoundedWindowAggExec!!!"); @@ -279,10 +292,12 @@ impl ExecutionPlan for BoundedWindowAggExec { } } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { vec![true] } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -295,6 +310,7 @@ impl ExecutionPlan for BoundedWindowAggExec { )?)) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -312,10 +328,12 @@ impl ExecutionPlan for BoundedWindowAggExec { Ok(stream) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { let input_stat = self.input.statistics()?; let win_cols = self.window_expr.len(); @@ -458,6 +476,7 @@ pub struct LinearSearch { } impl PartitionSearcher for LinearSearch { + #[tracing::instrument(level = "trace", skip(self, input_buffer, window_agg_states, partition_buffers, window_expr))] /// This method constructs output columns using the result of each window expression. // Assume input buffer is | Partition Buffers would be (Where each partition and its data is seperated) // a, 2 | a, 2 @@ -543,6 +562,7 @@ impl PartitionSearcher for LinearSearch { get_arrayref_at_indices(&new_columns, &sorted_indices).map(Some) } + #[tracing::instrument(level = "trace", skip(self, record_batch, window_expr))] fn evaluate_partition_batches( &mut self, record_batch: &RecordBatch, @@ -565,11 +585,13 @@ impl PartitionSearcher for LinearSearch { .collect() } + #[tracing::instrument(level = "trace", skip(self, n_out))] fn prune(&mut self, n_out: usize) { // Delete hashes for the rows that are outputted. self.input_buffer_hashes.drain(0..n_out); } + #[tracing::instrument(level = "trace", skip(self, partition_buffers))] fn mark_partition_end(&self, partition_buffers: &mut PartitionBatches) { // We should be in the `PartiallySorted` case, otherwise we can not // tell when we are at the end of a given partition. @@ -594,16 +616,19 @@ impl PartitionSearcher for LinearSearch { } } + #[tracing::instrument(level = "trace", skip(self))] fn is_mode_linear(&self) -> bool { self.ordered_partition_by_indices.is_empty() } + #[tracing::instrument(level = "trace", skip(self))] fn input_schema(&self) -> &SchemaRef { &self.input_schema } } impl LinearSearch { + #[tracing::instrument(level = "trace", skip(ordered_partition_by_indices, input_schema))] /// Initialize a new [`LinearSearch`] partition searcher. fn new(ordered_partition_by_indices: Vec, input_schema: SchemaRef) -> Self { LinearSearch { @@ -616,6 +641,7 @@ impl LinearSearch { } } + #[tracing::instrument(level = "trace", skip(self, record_batch, window_expr))] /// Calculates partition by expression results for each window expression /// on `record_batch`. fn evaluate_partition_by_column_values( @@ -635,6 +661,7 @@ impl LinearSearch { .collect() } + #[tracing::instrument(level = "trace", skip(self, columns, batch))] /// Calculate indices of each partition (according to PARTITION BY expression) /// `columns` contain partition by expression results. fn get_per_partition_indices( @@ -670,6 +697,7 @@ impl LinearSearch { Ok(result) } + #[tracing::instrument(level = "trace", skip(self, input_buffer, window_agg_states, window_expr))] /// Calculates partition keys and result indices for each partition. /// The return value is a vector of tuples where the first entry stores /// the partition key (unique for each partition) and the second entry @@ -738,6 +766,7 @@ pub struct SortedSearch { } impl PartitionSearcher for SortedSearch { + #[tracing::instrument(level = "trace", skip(self, _input_buffer, window_agg_states, partition_buffers, _window_expr))] /// This method constructs new output columns using the result of each window expression. fn calculate_out_columns( &mut self, @@ -757,6 +786,7 @@ impl PartitionSearcher for SortedSearch { } } + #[tracing::instrument(level = "trace", skip(self, record_batch, _window_expr))] fn evaluate_partition_batches( &mut self, record_batch: &RecordBatch, @@ -790,6 +820,7 @@ impl PartitionSearcher for SortedSearch { .collect::>>() } + #[tracing::instrument(level = "trace", skip(self, partition_buffers))] fn mark_partition_end(&self, partition_buffers: &mut PartitionBatches) { // In Sorted case. We can mark all partitions besides last partition as ended. // We are sure that those partitions will never receive any values. @@ -801,12 +832,14 @@ impl PartitionSearcher for SortedSearch { } } + #[tracing::instrument(level = "trace", skip(self))] fn input_schema(&self) -> &SchemaRef { &self.input_schema } } impl SortedSearch { + #[tracing::instrument(level = "trace", skip(self, window_agg_states, partition_buffers))] /// Calculates how many rows we can output. fn calculate_n_out_row( &mut self, @@ -850,6 +883,7 @@ impl SortedSearch { } } +#[tracing::instrument(level = "trace", skip(input_schema, window_expr))] fn create_schema( input_schema: &Schema, window_expr: &[Arc], @@ -893,6 +927,7 @@ pub struct BoundedWindowAggStream { } impl BoundedWindowAggStream { + #[tracing::instrument(level = "trace", skip(self, n_out))] /// Prunes sections of the state that are no longer needed when calculating /// results (as determined by window frame boundaries and number of results generated). // For instance, if first `n` (not necessarily same with `n_out`) elements are no longer needed to @@ -916,6 +951,7 @@ impl BoundedWindowAggStream { impl Stream for BoundedWindowAggStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -926,6 +962,7 @@ impl Stream for BoundedWindowAggStream { } impl BoundedWindowAggStream { + #[tracing::instrument(level = "trace", skip(schema, window_expr, input, baseline_metrics, search_mode))] /// Create a new BoundedWindowAggStream fn new( schema: SchemaRef, @@ -949,6 +986,7 @@ impl BoundedWindowAggStream { }) } + #[tracing::instrument(level = "trace", skip(self))] fn compute_aggregates(&mut self) -> Result { // calculate window cols for (cur_window_expr, state) in @@ -982,6 +1020,7 @@ impl BoundedWindowAggStream { } } + #[tracing::instrument(level = "trace", skip(self, cx))] #[inline] fn poll_next_inner( &mut self, @@ -1013,6 +1052,7 @@ impl BoundedWindowAggStream { Poll::Ready(Some(result)) } + #[tracing::instrument(level = "trace", skip(self))] /// Prunes the sections of the record batch (for each partition) /// that we no longer need to calculate the window function result. fn prune_partition_batches(&mut self) { @@ -1064,6 +1104,7 @@ impl BoundedWindowAggStream { } } + #[tracing::instrument(level = "trace", skip(self, n_out))] /// Prunes the section of the input batch whose aggregate results /// are calculated and emitted. fn prune_input_batch(&mut self, n_out: usize) -> Result<()> { @@ -1083,6 +1124,7 @@ impl BoundedWindowAggStream { Ok(()) } + #[tracing::instrument(level = "trace", skip(self))] /// Prunes emitted parts from WindowAggState `out_col` field. fn prune_out_columns(&mut self) { // We store generated columns for each window expression in the `out_col` @@ -1112,6 +1154,7 @@ impl BoundedWindowAggStream { } impl RecordBatchStream for BoundedWindowAggStream { + #[tracing::instrument(level = "trace", skip(self))] /// Get the schema fn schema(&self) -> SchemaRef { self.schema.clone() @@ -1119,11 +1162,13 @@ impl RecordBatchStream for BoundedWindowAggStream { } // Gets the index of minimum entry, returns None if empty. +#[tracing::instrument(level = "trace", skip(data))] fn argmin(data: impl Iterator) -> Option<(usize, T)> { data.enumerate() .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(Ordering::Equal)) } +#[tracing::instrument(level = "trace", skip(partition_window_agg_states, len_to_show))] /// Calculates the section we can show results for expression fn get_aggregate_result_out_column( partition_window_agg_states: &PartitionWindowAggStates, @@ -1161,6 +1206,7 @@ fn get_aggregate_result_out_column( .ok_or_else(|| DataFusionError::Execution("Should contain something".to_string())) } +#[tracing::instrument(level = "trace", skip(batch))] /// Constructs a batch from the last row of batch in the argument. pub(crate) fn get_last_row_batch(batch: &RecordBatch) -> Result { if batch.num_rows() == 0 { @@ -1219,10 +1265,12 @@ mod tests { } impl PartitionStream for TestStreamPartition { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> &SchemaRef { &self.schema } + #[tracing::instrument(level = "trace", skip(self, _ctx))] fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { // We create an iterator from the record batches and map them into Ok values, // converting the iterator into a futures::stream::Stream @@ -1233,6 +1281,7 @@ mod tests { impl Stream for TestStreamPartition { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -1248,6 +1297,7 @@ mod tests { } impl TestStreamPartition { + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next_inner( self: &mut Pin<&mut Self>, cx: &mut Context<'_>, @@ -1286,11 +1336,13 @@ mod tests { } impl RecordBatchStream for TestStreamPartition { + #[tracing::instrument(level = "trace", skip(self))] fn schema(&self) -> SchemaRef { self.schema.clone() } } + #[tracing::instrument(level = "trace", skip(input, n_future_range, hash, order_by))] fn bounded_window_exec_pb_latent_range( input: Arc, n_future_range: usize, @@ -1335,6 +1387,7 @@ mod tests { )?)) } + #[tracing::instrument(level = "trace", skip(input))] fn projection_exec(input: Arc) -> Result> { let schema = input.schema(); let exprs = input @@ -1355,6 +1408,7 @@ mod tests { Ok(Arc::new(ProjectionExec::try_new(exprs, input)?)) } + #[tracing::instrument(level = "trace", skip())] fn task_context_helper() -> TaskContext { let task_ctx = TaskContext::default(); // Create session context with config @@ -1365,10 +1419,12 @@ mod tests { task_ctx.with_session_config(session_config) } + #[tracing::instrument(level = "trace", skip())] fn task_context() -> Arc { Arc::new(task_context_helper()) } + #[tracing::instrument(level = "trace", skip(stream, results))] pub async fn collect_stream( mut stream: SendableRecordBatchStream, results: &mut Vec, @@ -1379,6 +1435,7 @@ mod tests { Ok(()) } + #[tracing::instrument(level = "trace", skip(plan, context, timeout_duration))] /// Execute the [ExecutionPlan] and collect the results in memory pub async fn collect_with_timeout( plan: Arc, @@ -1399,6 +1456,7 @@ mod tests { Ok(results) } + #[tracing::instrument(level = "trace", skip(plan, context))] /// Execute the [ExecutionPlan] and collect the results in memory #[allow(dead_code)] pub async fn collect_bonafide( @@ -1413,6 +1471,7 @@ mod tests { Ok(results) } + #[tracing::instrument(level = "trace", skip())] fn test_schema() -> SchemaRef { Arc::new(Schema::new(vec![ Field::new("sn", DataType::UInt64, true), @@ -1420,6 +1479,7 @@ mod tests { ])) } + #[tracing::instrument(level = "trace", skip(schema))] fn schema_orders(schema: &SchemaRef) -> Result> { let orderings = vec![vec![PhysicalSortExpr { expr: col("sn", schema)?, @@ -1431,10 +1491,12 @@ mod tests { Ok(orderings) } + #[tracing::instrument(level = "trace", skip(lhs, rhs))] fn is_integer_division_safe(lhs: usize, rhs: usize) -> bool { let res = lhs / rhs; res * rhs == lhs } + #[tracing::instrument(level = "trace", skip(schema, n_row, n_chunk))] fn generate_batches( schema: &SchemaRef, n_row: usize, @@ -1472,6 +1534,7 @@ mod tests { Ok(batches) } + #[tracing::instrument(level = "trace", skip(n_rows, chunk_length, n_partition, is_infinite, send_exit, per_batch_wait_duration_in_millis))] fn generate_never_ending_source( n_rows: usize, chunk_length: usize, diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 42c630741cc93..ac179514f2ee0 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -53,6 +53,7 @@ pub use datafusion_physical_expr::window::{ }; pub use window_agg_exec::WindowAggExec; +#[tracing::instrument(level = "trace", skip(args, schema, window_fn, fn_name))] /// Build field from window function and add it into schema pub fn schema_add_window_field( args: &[Arc], @@ -78,6 +79,7 @@ pub fn schema_add_window_field( Ok(Arc::new(Schema::new(window_fields))) } +#[tracing::instrument(level = "trace", skip(fun, name, args, partition_by, order_by, window_frame, input_schema, ignore_nulls))] /// Create a physical expression for window function #[allow(clippy::too_many_arguments)] pub fn create_window_expr( @@ -147,6 +149,7 @@ pub fn create_window_expr( }) } +#[tracing::instrument(level = "trace", skip(partition_by, order_by, window_frame, aggregate))] /// Creates an appropriate [`WindowExpr`] based on the window frame and fn window_expr_from_aggregate_expr( partition_by: &[Arc], @@ -174,6 +177,7 @@ fn window_expr_from_aggregate_expr( } } +#[tracing::instrument(level = "trace", skip(args, index))] fn get_scalar_value_from_args( args: &[Arc], index: usize, @@ -193,6 +197,7 @@ fn get_scalar_value_from_args( }) } +#[tracing::instrument(level = "trace", skip(default_value, dtype))] fn get_casted_value( default_value: Option, dtype: &DataType, @@ -204,6 +209,7 @@ fn get_casted_value( } } +#[tracing::instrument(level = "trace", skip(fun, args, input_schema, name, ignore_nulls))] fn create_built_in_window_expr( fun: &BuiltInWindowFunction, args: &[Arc], @@ -310,6 +316,7 @@ fn create_built_in_window_expr( }) } +#[tracing::instrument(level = "trace", skip(fun, args, input_schema, name))] /// Creates a `BuiltInWindowFunctionExpr` suitable for a user defined window function fn create_udwf_window_expr( fun: &Arc, @@ -345,32 +352,39 @@ struct WindowUDFExpr { } impl BuiltInWindowFunctionExpr for WindowUDFExpr { + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn std::any::Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn field(&self) -> Result { let nullable = true; Ok(Field::new(&self.name, self.data_type.clone(), nullable)) } + #[tracing::instrument(level = "trace", skip(self))] fn expressions(&self) -> Vec> { self.args.clone() } + #[tracing::instrument(level = "trace", skip(self))] fn create_evaluator(&self) -> Result> { self.fun.partition_evaluator_factory() } + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &str { &self.name } + #[tracing::instrument(level = "trace", skip(self))] fn reverse_expr(&self) -> Option> { None } } +#[tracing::instrument(level = "trace", skip(partition_by_exprs, orderby_sort_exprs))] pub(crate) fn calc_requirements< T: Borrow>, S: Borrow, @@ -394,6 +408,7 @@ pub(crate) fn calc_requirements< (!sort_reqs.is_empty()).then_some(sort_reqs) } +#[tracing::instrument(level = "trace", skip(partition_by_exprs, input))] /// This function calculates the indices such that when partition by expressions reordered with the indices /// resulting expressions define a preset for existing ordering. /// For instance, if input is ordered by a, b, c and PARTITION BY b, a is used, @@ -409,6 +424,7 @@ pub fn get_ordered_partition_by_indices( indices } +#[tracing::instrument(level = "trace", skip(input, partition_by_exprs, ordered_partition_by_indices))] pub(crate) fn get_partition_by_sort_exprs( input: &Arc, partition_by_exprs: &[Arc], @@ -430,6 +446,7 @@ pub(crate) fn get_partition_by_sort_exprs( } } +#[tracing::instrument(level = "trace", skip(schema, input, window_expr))] pub(crate) fn window_equivalence_properties( schema: &SchemaRef, input: &Arc, @@ -450,6 +467,7 @@ pub(crate) fn window_equivalence_properties( window_eq_properties } +#[tracing::instrument(level = "trace", skip(window_exprs, input, physical_partition_keys))] /// Constructs the best-fitting windowing operator (a `WindowAggExec` or a /// `BoundedWindowExec`) for the given `input` according to the specifications /// of `window_exprs` and `physical_partition_keys`. Here, best-fitting means @@ -527,6 +545,7 @@ pub fn get_best_fitting_window( } } +#[tracing::instrument(level = "trace", skip(partitionby_exprs, orderby_keys, input))] /// Compares physical ordering (output ordering of the `input` operator) with /// `partitionby_exprs` and `orderby_keys` to decide whether existing ordering /// is sufficient to run the current window operator. @@ -591,6 +610,7 @@ mod tests { use InputOrderMode::{Linear, PartiallySorted, Sorted}; + #[tracing::instrument(level = "trace", skip())] fn create_test_schema() -> Result { let nullable_column = Field::new("nullable_col", DataType::Int32, true); let non_nullable_column = Field::new("non_nullable_col", DataType::Int32, false); @@ -599,6 +619,7 @@ mod tests { Ok(schema) } + #[tracing::instrument(level = "trace", skip())] fn create_test_schema2() -> Result { let a = Field::new("a", DataType::Int32, true); let b = Field::new("b", DataType::Int32, true); @@ -610,6 +631,7 @@ mod tests { } // Generate a schema which consists of 5 columns (a, b, c, d, e) + #[tracing::instrument(level = "trace", skip())] fn create_test_schema3() -> Result { let a = Field::new("a", DataType::Int32, true); let b = Field::new("b", DataType::Int32, false); @@ -620,11 +642,13 @@ mod tests { Ok(schema) } + #[tracing::instrument(level = "trace", skip(name, schema))] /// make PhysicalSortExpr with default options pub fn sort_expr(name: &str, schema: &Schema) -> PhysicalSortExpr { sort_expr_options(name, schema, SortOptions::default()) } + #[tracing::instrument(level = "trace", skip(name, schema, options))] /// PhysicalSortExpr with specified options pub fn sort_expr_options( name: &str, @@ -637,6 +661,7 @@ mod tests { } } + #[tracing::instrument(level = "trace", skip(schema, sort_exprs, infinite_source))] /// Created a sorted Streaming Table exec pub fn streaming_table_exec( schema: &SchemaRef, diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs index 1507902c22ea7..6849d8aeb2324 100644 --- a/datafusion/physical-plan/src/windows/window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs @@ -68,6 +68,7 @@ pub struct WindowAggExec { } impl WindowAggExec { + #[tracing::instrument(level = "trace", skip(window_expr, input, partition_keys))] /// Create a new execution plan for window aggregates pub fn try_new( window_expr: Vec>, @@ -91,16 +92,19 @@ impl WindowAggExec { }) } + #[tracing::instrument(level = "trace", skip(self))] /// Window expressions pub fn window_expr(&self) -> &[Arc] { &self.window_expr } + #[tracing::instrument(level = "trace", skip(self))] /// Input plan pub fn input(&self) -> &Arc { &self.input } + #[tracing::instrument(level = "trace", skip(self))] /// Return the output sort order of partition keys: For example /// OVER(PARTITION BY a, ORDER BY b) -> would give sorting of the column a // We are sure that partition by columns are always at the beginning of sort_keys @@ -115,6 +119,7 @@ impl WindowAggExec { ) } + #[tracing::instrument(level = "trace", skip(schema, input, window_expr))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties( schema: SchemaRef, @@ -143,6 +148,7 @@ impl WindowAggExec { } impl DisplayAs for WindowAggExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -171,27 +177,33 @@ impl DisplayAs for WindowAggExec { } impl ExecutionPlan for WindowAggExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "WindowAggExec" } + #[tracing::instrument(level = "trace", skip(self))] /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![self.input.clone()] } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { vec![true] } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_ordering(&self) -> Vec>> { let partition_bys = self.window_expr()[0].partition_by(); let order_keys = self.window_expr()[0].order_by(); @@ -206,6 +218,7 @@ impl ExecutionPlan for WindowAggExec { } } + #[tracing::instrument(level = "trace", skip(self))] fn required_input_distribution(&self) -> Vec { if self.partition_keys.is_empty() { vec![Distribution::SinglePartition] @@ -214,6 +227,7 @@ impl ExecutionPlan for WindowAggExec { } } + #[tracing::instrument(level = "trace", skip(self, children))] fn with_new_children( self: Arc, children: Vec>, @@ -225,6 +239,7 @@ impl ExecutionPlan for WindowAggExec { )?)) } + #[tracing::instrument(level = "trace", skip(self, partition, context))] fn execute( &self, partition: usize, @@ -242,10 +257,12 @@ impl ExecutionPlan for WindowAggExec { Ok(stream) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { let input_stat = self.input.statistics()?; let win_cols = self.window_expr.len(); @@ -265,6 +282,7 @@ impl ExecutionPlan for WindowAggExec { } } +#[tracing::instrument(level = "trace", skip(input_schema, window_expr))] fn create_schema( input_schema: &Schema, window_expr: &[Arc], @@ -279,6 +297,7 @@ fn create_schema( Ok(builder.finish()) } +#[tracing::instrument(level = "trace", skip(window_expr, batch))] /// Compute the window aggregate columns fn compute_window_aggregates( window_expr: &[Arc], @@ -303,6 +322,7 @@ pub struct WindowAggStream { } impl WindowAggStream { + #[tracing::instrument(level = "trace", skip(schema, window_expr, input, baseline_metrics, partition_by_sort_keys, ordered_partition_by_indices))] /// Create a new WindowAggStream pub fn new( schema: SchemaRef, @@ -328,6 +348,7 @@ impl WindowAggStream { }) } + #[tracing::instrument(level = "trace", skip(self))] fn compute_aggregates(&self) -> Result { // record compute time on drop let _timer = self.baseline_metrics.elapsed_compute().timer(); @@ -373,6 +394,7 @@ impl WindowAggStream { impl Stream for WindowAggStream { type Item = Result; + #[tracing::instrument(level = "trace", skip(self, cx))] fn poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, @@ -383,6 +405,7 @@ impl Stream for WindowAggStream { } impl WindowAggStream { + #[tracing::instrument(level = "trace", skip(self, cx))] #[inline] fn poll_next_inner( &mut self, @@ -410,6 +433,7 @@ impl WindowAggStream { } impl RecordBatchStream for WindowAggStream { + #[tracing::instrument(level = "trace", skip(self))] /// Get the schema fn schema(&self) -> SchemaRef { self.schema.clone() diff --git a/datafusion/physical-plan/src/work_table.rs b/datafusion/physical-plan/src/work_table.rs index b3c9043d4fdc4..342498f44433b 100644 --- a/datafusion/physical-plan/src/work_table.rs +++ b/datafusion/physical-plan/src/work_table.rs @@ -43,6 +43,7 @@ pub(super) struct ReservedBatches { } impl ReservedBatches { + #[tracing::instrument(level = "trace", skip(batches, reservation))] pub(super) fn new(batches: Vec, reservation: MemoryReservation) -> Self { ReservedBatches { batches, @@ -60,6 +61,7 @@ pub(super) struct WorkTable { } impl WorkTable { + #[tracing::instrument(level = "trace", skip())] /// Create a new work table. pub(super) fn new() -> Self { Self { @@ -67,6 +69,7 @@ impl WorkTable { } } + #[tracing::instrument(level = "trace", skip(self))] /// Take the previously written batches from the work table. /// This will be called by the [`WorkTableExec`] when it is executed. fn take(&self) -> Result { @@ -77,6 +80,7 @@ impl WorkTable { .ok_or_else(|| internal_datafusion_err!("Unexpected empty work table")) } + #[tracing::instrument(level = "trace", skip(self, batches))] /// Update the results of a recursive query iteration to the work table. pub(super) fn update(&self, batches: ReservedBatches) { self.batches.lock().unwrap().replace(batches); @@ -108,6 +112,7 @@ pub struct WorkTableExec { } impl WorkTableExec { + #[tracing::instrument(level = "trace", skip(name, schema))] /// Create a new execution plan for a worktable exec. pub fn new(name: String, schema: SchemaRef) -> Self { let cache = Self::compute_properties(schema.clone()); @@ -120,6 +125,7 @@ impl WorkTableExec { } } + #[tracing::instrument(level = "trace", skip(self, work_table))] pub(super) fn with_work_table(&self, work_table: Arc) -> Self { Self { name: self.name.clone(), @@ -130,6 +136,7 @@ impl WorkTableExec { } } + #[tracing::instrument(level = "trace", skip(schema))] /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. fn compute_properties(schema: SchemaRef) -> PlanProperties { let eq_properties = EquivalenceProperties::new(schema); @@ -143,6 +150,7 @@ impl WorkTableExec { } impl DisplayAs for WorkTableExec { + #[tracing::instrument(level = "trace", skip(self, t, f))] fn fmt_as( &self, t: DisplayFormatType, @@ -157,30 +165,37 @@ impl DisplayAs for WorkTableExec { } impl ExecutionPlan for WorkTableExec { + #[tracing::instrument(level = "trace", skip(self))] fn name(&self) -> &'static str { "WorkTableExec" } + #[tracing::instrument(level = "trace", skip(self))] fn as_any(&self) -> &dyn Any { self } + #[tracing::instrument(level = "trace", skip(self))] fn properties(&self) -> &PlanProperties { &self.cache } + #[tracing::instrument(level = "trace", skip(self))] fn children(&self) -> Vec> { vec![] } + #[tracing::instrument(level = "trace", skip(self))] fn maintains_input_order(&self) -> Vec { vec![false] } + #[tracing::instrument(level = "trace", skip(self))] fn benefits_from_input_partitioning(&self) -> Vec { vec![false] } + #[tracing::instrument(level = "trace", skip(self))] fn with_new_children( self: Arc, _: Vec>, @@ -188,6 +203,7 @@ impl ExecutionPlan for WorkTableExec { Ok(self.clone()) } + #[tracing::instrument(level = "trace", skip(self, partition, _context))] /// Stream the batches that were written to the work table. fn execute( &self, @@ -207,10 +223,12 @@ impl ExecutionPlan for WorkTableExec { )) } + #[tracing::instrument(level = "trace", skip(self))] fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } + #[tracing::instrument(level = "trace", skip(self))] fn statistics(&self) -> Result { Ok(Statistics::new_unknown(&self.schema())) }