From a5f36d0166b6fbb30ee34a5c36833ae99673e772 Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Wed, 20 Dec 2023 15:04:55 -0500 Subject: [PATCH 1/2] remove listingtable single_file option --- datafusion/core/src/datasource/listing/table.rs | 12 +----------- .../core/src/datasource/listing_table_factory.rs | 9 ++------- docs/source/user-guide/sql/write_options.md | 13 ++----------- 3 files changed, 5 insertions(+), 29 deletions(-) diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 4c13d9d443ca..127c22ef680c 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -246,9 +246,6 @@ pub struct ListingOptions { /// multiple equivalent orderings, the outer `Vec` will have a /// single element. pub file_sort_order: Vec>, - /// This setting when true indicates that the table is backed by a single file. - /// Any inserts to the table may only append to this existing file. - pub single_file: bool, /// This setting holds file format specific options which should be used /// when inserting into this table. pub file_type_write_options: Option, @@ -269,7 +266,6 @@ impl ListingOptions { collect_stat: true, target_partitions: 1, file_sort_order: vec![], - single_file: false, file_type_write_options: None, } } @@ -421,12 +417,6 @@ impl ListingOptions { self } - /// Configure if this table is backed by a sigle file - pub fn with_single_file(mut self, single_file: bool) -> Self { - self.single_file = single_file; - self - } - /// Configure file format specific writing options. pub fn with_write_options( mut self, @@ -797,7 +787,7 @@ impl TableProvider for ListingTable { // all of the data at the input is sink after execution finishes. See discussion for rationale: // https://github.com/apache/arrow-datafusion/pull/7610#issuecomment-1728979918 unbounded_input: is_plan_streaming(&input)?, - single_file_output: self.options.single_file, + single_file_output: false, overwrite, file_type_writer_options, }; diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index 7c859ee988d5..f443722699cb 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -135,12 +135,8 @@ impl TableProviderFactory for ListingTableFactory { let mut statement_options = StatementOptions::from(&cmd.options); - // Extract ListingTable specific options if present or set default - let single_file = statement_options - .take_bool_option("single_file")? - .unwrap_or(false); - - // Backwards compatibility (#8547) + // Backwards compatibility (#8547), discard deprecated options + statement_options.take_bool_option("single_file")?; if let Some(s) = statement_options.take_str_option("insert_mode") { if !s.eq_ignore_ascii_case("append_new_files") { return plan_err!("Unknown or unsupported insert mode {s}. Only append_new_files supported"); @@ -195,7 +191,6 @@ impl TableProviderFactory for ListingTableFactory { .with_target_partitions(state.config().target_partitions()) .with_table_partition_cols(table_partition_cols) .with_file_sort_order(cmd.order_exprs.clone()) - .with_single_file(single_file) .with_write_options(file_type_writer_options); let resolved_schema = match provided_schema { diff --git a/docs/source/user-guide/sql/write_options.md b/docs/source/user-guide/sql/write_options.md index 94adee960996..1a37661fbfb6 100644 --- a/docs/source/user-guide/sql/write_options.md +++ b/docs/source/user-guide/sql/write_options.md @@ -42,12 +42,11 @@ WITH HEADER ROW DELIMITER ';' LOCATION '/test/location/my_csv_table/' OPTIONS( -CREATE_LOCAL_PATH 'true', NULL_VALUE 'NAN' ); ``` -When running `INSERT INTO my_table ...`, the options from the `CREATE TABLE` will be respected (gzip compression, special delimiter, and header row included). Note that compression, header, and delimiter settings can also be specified within the `OPTIONS` tuple list. Dedicated syntax within the SQL statement always takes precedence over arbitrary option tuples, so if both are specified the `OPTIONS` setting will be ignored. CREATE_LOCAL_PATH is a special option that indicates if DataFusion should create local file paths when writing new files if they do not already exist. This option is useful if you wish to create an external table from scratch, using only DataFusion SQL statements. Finally, NULL_VALUE is a CSV format specific option that determines how null values should be encoded within the CSV file. +When running `INSERT INTO my_table ...`, the options from the `CREATE TABLE` will be respected (gzip compression, special delimiter, and header row included). Note that compression, header, and delimiter settings can also be specified within the `OPTIONS` tuple list. Dedicated syntax within the SQL statement always takes precedence over arbitrary option tuples, so if both are specified the `OPTIONS` setting will be ignored. NULL_VALUE is a CSV format specific option that determines how null values should be encoded within the CSV file. Finally, options can be passed when running a `COPY` command. @@ -72,15 +71,7 @@ The following special options are specific to the `COPY` command. | Option | Description | Default Value | | ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | | SINGLE_FILE_OUTPUT | If true, COPY query will write output to a single file. Otherwise, multiple files will be written to a directory in parallel. | true | -| FORMAT | Specifies the file format COPY query will write out. If single_file_output is false or the format cannot be inferred from the file extension, then FORMAT must be specified. | N/A | - -### CREATE EXTERNAL TABLE Specific Options - -The following special options are specific to creating an external table. - -| Option | Description | Default Value | -| ----------- | --------------------------------------------------------------------------------------------------------------------- | ------------- | -| SINGLE_FILE | If true, indicates that this external table is backed by a single file. INSERT INTO queries will append to this file. | false | +| FORMAT | Specifies the file format COPY query will write out. If single_file_output is false or the format cannot be inferred from the file extension, then FORMAT must be specified. | N/A | | ### JSON Format Specific Options From e61d139e8f363aa20f6f647e7652e0fdc95a0460 Mon Sep 17 00:00:00 2001 From: Devin D'Angelo Date: Wed, 20 Dec 2023 15:15:06 -0500 Subject: [PATCH 2/2] prettier --- docs/source/user-guide/sql/write_options.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/sql/write_options.md b/docs/source/user-guide/sql/write_options.md index 1a37661fbfb6..470591afafff 100644 --- a/docs/source/user-guide/sql/write_options.md +++ b/docs/source/user-guide/sql/write_options.md @@ -69,7 +69,7 @@ In this example, we write the entirety of `source_table` out to a folder of parq The following special options are specific to the `COPY` command. | Option | Description | Default Value | -| ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | +| ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | --- | | SINGLE_FILE_OUTPUT | If true, COPY query will write output to a single file. Otherwise, multiple files will be written to a directory in parallel. | true | | FORMAT | Specifies the file format COPY query will write out. If single_file_output is false or the format cannot be inferred from the file extension, then FORMAT must be specified. | N/A | |