From 3efe6465762352ab6e8c4234283779b89bd9c3db Mon Sep 17 00:00:00 2001 From: Nico Wagner Date: Wed, 29 May 2024 06:12:19 +0200 Subject: [PATCH] Update `polars` from 0.38 to 0.40 (#785) Signed-off-by: Nico Wagner --- Cargo.toml | 2 +- crates/pica-toolkit/src/filter_list.rs | 43 +++++++++++++------------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 02fb24281..21c40fc36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,7 @@ clap = "4.5" clap_complete = "4.5" csv = "1.3" flate2 = "1.0" -polars = { version = "0.38", features = ["ipc", "decompress", "performant"] } +polars = { version = "0.40", features = ["ipc", "decompress", "performant"] } quickcheck = "1.0" rand = "0.8" regex = "1.10" diff --git a/crates/pica-toolkit/src/filter_list.rs b/crates/pica-toolkit/src/filter_list.rs index 66b5ec4a1..02cd92e15 100644 --- a/crates/pica-toolkit/src/filter_list.rs +++ b/crates/pica-toolkit/src/filter_list.rs @@ -93,35 +93,36 @@ impl FilterList { ) -> Result { let extension = path.extension().and_then(OsStr::to_str); let path_str = path.to_str().unwrap_or_default(); + let path = path.to_owned(); + + let options = CsvReadOptions::default() + .with_has_header(true) + .with_infer_schema_length(Some(0)); match extension { Some("ipc" | "arrow" | "feather") => { Ok(IpcReader::new(File::open(path)?) - .memory_mapped(false) + .memory_mapped(None) .finish()?) } - Some("csv") => Ok(CsvReader::from_path(path)? - .infer_schema(Some(0)) - .has_header(true) + Some("csv") => Ok(options + .try_into_reader_with_file_path(Some(path))? .finish()?), - Some("gz") if path_str.ends_with(".csv.gz") => { - Ok(CsvReader::from_path(path)? - .infer_schema(Some(0)) - .has_header(true) - .finish()?) - } - Some("tsv") => Ok(CsvReader::from_path(path)? - .with_separator(b'\t') - .has_header(true) - .infer_schema(Some(0)) + Some("gz") if path_str.ends_with(".csv.gz") => Ok(options + .try_into_reader_with_file_path(Some(path))? + .finish()?), + Some("tsv") => Ok(options + .with_parse_options( + CsvParseOptions::default().with_separator(b'\t'), + ) + .try_into_reader_with_file_path(Some(path))? + .finish()?), + Some("gz") if path_str.ends_with(".tsv.gz") => Ok(options + .with_parse_options( + CsvParseOptions::default().with_separator(b'\t'), + ) + .try_into_reader_with_file_path(Some(path))? .finish()?), - Some("gz") if path_str.ends_with(".tsv.gz") => { - Ok(CsvReader::from_path(path)? - .with_separator(b'\t') - .infer_schema(Some(0)) - .has_header(true) - .finish()?) - } _ => { Err(FilterListError::InvalidFileFormat(path_str.into())) }