diff --git a/crates/polars-mem-engine/src/executors/hive_scan.rs b/crates/polars-mem-engine/src/executors/hive_scan.rs index 3b07e41e8ce4..f55a6a61e88e 100644 --- a/crates/polars-mem-engine/src/executors/hive_scan.rs +++ b/crates/polars-mem-engine/src/executors/hive_scan.rs @@ -11,7 +11,7 @@ use polars_io::predicates::BatchStats; use polars_io::prelude::FileMetadata; use polars_io::RowIndex; -use super::Executor; +use super::{CsvExec, Executor}; use crate::executors::ParquetExec; use crate::prelude::*; @@ -88,6 +88,13 @@ fn source_to_scan_exec( .clone() }), )) as _, + FileScan::Csv { options, .. } => Box::new(CsvExec { + sources: source, + file_info: file_info.clone(), + options: options.clone(), + file_options: file_options.clone(), + predicate: None, + }), _ => todo!(), }) } diff --git a/crates/polars-mem-engine/src/executors/scan/csv.rs b/crates/polars-mem-engine/src/executors/scan/csv.rs index c9abea10df69..8c49ce8c3d09 100644 --- a/crates/polars-mem-engine/src/executors/scan/csv.rs +++ b/crates/polars-mem-engine/src/executors/scan/csv.rs @@ -224,10 +224,9 @@ impl ScanExec for CsvExec { self.predicate = predicate; self.file_options.row_index = row_index; - self.file_info.reader_schema = Some(arrow::Either::Left(Arc::new( - schema.to_arrow(CompatLevel::newest()), - ))); - self.file_info.schema = Arc::new(schema); + let schema = Arc::new(schema); + self.file_info.reader_schema = Some(arrow::Either::Right(schema.clone())); + self.file_info.schema = schema.clone(); self.options.schema.take(); // self.options.schema_overwrite.take(); @@ -260,20 +259,15 @@ impl ScanExec for CsvExec { )? as IdxSize; let schema = infer_file_schema( &get_reader_bytes(&mut std::io::Cursor::new(bytes))?, - popt.separator, + self.options.parse_options.as_ref(), self.options.infer_schema_length, self.options.has_header, self.options.schema_overwrite.as_deref(), self.options.skip_rows, + self.options.skip_lines, self.options.skip_rows_after_header, - popt.comment_prefix.as_ref(), - popt.quote_char, - popt.eol_char, - popt.null_values.as_ref(), - popt.try_parse_dates, self.options.raise_if_empty, &mut self.options.n_threads, - popt.decimal_comma, )? .0; diff --git a/crates/polars-mem-engine/src/planner/lp.rs b/crates/polars-mem-engine/src/planner/lp.rs index 1c8c2e16dcde..e473ea413794 100644 --- a/crates/polars-mem-engine/src/planner/lp.rs +++ b/crates/polars-mem-engine/src/planner/lp.rs @@ -302,13 +302,28 @@ fn create_physical_plan_impl( match scan_type.clone() { #[cfg(feature = "csv")] - FileScan::Csv { options, .. } => Ok(Box::new(executors::CsvExec { - sources, - file_info, - options, - predicate, - file_options, - })), + FileScan::Csv { options, .. } => { + if sources.len() > 1 + && std::env::var("POLARS_NEW_MULTIFILE").as_deref() == Ok("1") + { + Ok(Box::new(executors::MultiScanExec::new( + sources, + file_info, + hive_parts, + predicate, + file_options, + scan_type, + ))) + } else { + Ok(Box::new(executors::CsvExec { + sources, + file_info, + options, + predicate, + file_options, + })) + } + }, #[cfg(feature = "ipc")] FileScan::Ipc { options,