From 5e8b117f70a8efb6f9fabaa9305b308c5d0f57e6 Mon Sep 17 00:00:00 2001 From: Nico Wagner Date: Tue, 28 May 2024 22:17:59 +0200 Subject: [PATCH 1/5] Update `clap` from 4.4 to 4.5 (#786) Signed-off-by: Nico Wagner --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 128b75bfc..a45ff4d57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,8 +31,8 @@ pica-utils = { version = "0.24", path = "./crates/pica-utils" } anyhow = "1.0" bstr = "1.9" chrono = { version = "0.4", default-features = false } -clap = "4.4" -clap_complete = "4.4" +clap = "4.5" +clap_complete = "4.5" csv = "1.3" flate2 = "1.0" polars = { version = "0.38", features = ["ipc", "decompress", "performant"] } From e3ccc6efe26e0a74ea137d9b0aea4065bc8a6103 Mon Sep 17 00:00:00 2001 From: Nico Wagner Date: Tue, 28 May 2024 22:23:44 +0200 Subject: [PATCH 2/5] Remove unused dev dependencies (#787) Signed-off-by: Nico Wagner --- crates/pica-toolkit/Cargo.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/crates/pica-toolkit/Cargo.toml b/crates/pica-toolkit/Cargo.toml index d01f40de2..d4a9b945a 100644 --- a/crates/pica-toolkit/Cargo.toml +++ b/crates/pica-toolkit/Cargo.toml @@ -34,11 +34,8 @@ toml = { workspace = true } unicode-normalization = { version = "0.1" } [dev-dependencies] -assert_cmd = "2.0" -predicates = "3.0" quickcheck = "1.0" quickcheck_macros = "1.0" -tempfile = "3.8" trycmd = "0.15" [[bin]] From 703f3171625ca8fdabc351d10e2c890dd7c6f9eb Mon Sep 17 00:00:00 2001 From: Nico Wagner Date: Tue, 28 May 2024 22:41:29 +0200 Subject: [PATCH 3/5] Bump MSRV to `1.76.0` (#788) Signed-off-by: Nico Wagner --- .github/workflows/ci.yml | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a648db10c..fd7939255 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,15 +33,15 @@ jobs: matrix: target: - { os: ubuntu-latest, toolchain: stable, triple: x86_64-unknown-linux-gnu } - - { os: ubuntu-latest, toolchain: 1.74.1, triple: x86_64-unknown-linux-gnu } + - { os: ubuntu-latest, toolchain: 1.76, triple: x86_64-unknown-linux-gnu } - { os: ubuntu-latest, toolchain: beta, triple: x86_64-unknown-linux-gnu } - { os: ubuntu-latest, toolchain: nightly, triple: x86_64-unknown-linux-gnu } - { os: macos-latest, toolchain: stable, triple: x86_64-apple-darwin } - - { os: macos-latest, toolchain: 1.74.1, triple: x86_64-apple-darwin } + - { os: macos-latest, toolchain: 1.76, triple: x86_64-apple-darwin } - { os: windows-latest, toolchain: stable, triple: x86_64-pc-windows-gnu } - - { os: windows-latest, toolchain: 1.74.1, triple: x86_64-pc-windows-gnu } + - { os: windows-latest, toolchain: 1.76, triple: x86_64-pc-windows-gnu } - { os: windows-latest, toolchain: stable, triple: i686-pc-windows-msvc } - - { os: windows-latest, toolchain: 1.74.1, triple: i686-pc-windows-msvc } + - { os: windows-latest, toolchain: 1.76, triple: i686-pc-windows-msvc } runs-on: ${{ matrix.target.os }} steps: - uses: actions/checkout@v4 diff --git a/Cargo.toml b/Cargo.toml index a45ff4d57..02fb24281 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ license = "EUPL-1.2" readme = "./README.md" keywords = ["PICA+", "code4lib"] edition = "2021" -rust-version = "1.74.1" +rust-version = "1.76" [workspace.dependencies] pica-matcher = { version = "0.24", path = "./crates/pica-matcher" } From 1fc468226cd8f0bbc4a736819c007ed298bfeba5 Mon Sep 17 00:00:00 2001 From: Nico Wagner Date: Tue, 28 May 2024 22:43:57 +0200 Subject: [PATCH 4/5] Run `cargo-outdated` for all workspace members (#789) Signed-off-by: Nico Wagner --- .github/workflows/ci.yml | 3 ++- .github/workflows/daily.yml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fd7939255..bb9df39e9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -183,7 +183,8 @@ jobs: - uses: dtolnay/rust-toolchain@stable - uses: dtolnay/install@cargo-outdated - uses: Swatinem/rust-cache@v2 - - run: cargo outdated --exit-code 1 + - run: cargo update + - run: cargo outdated --workspace --exit-code 1 miri: name: miri diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 81bbae7be..1ffc0e04d 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -25,4 +25,5 @@ jobs: - uses: dtolnay/rust-toolchain@stable - uses: dtolnay/install@cargo-outdated - uses: Swatinem/rust-cache@v2 - - run: cargo outdated --exit-code 1 + - run: cargo update + - run: cargo outdated --workspace --exit-code 1 From 3efe6465762352ab6e8c4234283779b89bd9c3db Mon Sep 17 00:00:00 2001 From: Nico Wagner Date: Wed, 29 May 2024 06:12:19 +0200 Subject: [PATCH 5/5] Update `polars` from 0.38 to 0.40 (#785) Signed-off-by: Nico Wagner --- Cargo.toml | 2 +- crates/pica-toolkit/src/filter_list.rs | 43 +++++++++++++------------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 02fb24281..21c40fc36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,7 @@ clap = "4.5" clap_complete = "4.5" csv = "1.3" flate2 = "1.0" -polars = { version = "0.38", features = ["ipc", "decompress", "performant"] } +polars = { version = "0.40", features = ["ipc", "decompress", "performant"] } quickcheck = "1.0" rand = "0.8" regex = "1.10" diff --git a/crates/pica-toolkit/src/filter_list.rs b/crates/pica-toolkit/src/filter_list.rs index 66b5ec4a1..02cd92e15 100644 --- a/crates/pica-toolkit/src/filter_list.rs +++ b/crates/pica-toolkit/src/filter_list.rs @@ -93,35 +93,36 @@ impl FilterList { ) -> Result { let extension = path.extension().and_then(OsStr::to_str); let path_str = path.to_str().unwrap_or_default(); + let path = path.to_owned(); + + let options = CsvReadOptions::default() + .with_has_header(true) + .with_infer_schema_length(Some(0)); match extension { Some("ipc" | "arrow" | "feather") => { Ok(IpcReader::new(File::open(path)?) - .memory_mapped(false) + .memory_mapped(None) .finish()?) } - Some("csv") => Ok(CsvReader::from_path(path)? - .infer_schema(Some(0)) - .has_header(true) + Some("csv") => Ok(options + .try_into_reader_with_file_path(Some(path))? .finish()?), - Some("gz") if path_str.ends_with(".csv.gz") => { - Ok(CsvReader::from_path(path)? - .infer_schema(Some(0)) - .has_header(true) - .finish()?) - } - Some("tsv") => Ok(CsvReader::from_path(path)? - .with_separator(b'\t') - .has_header(true) - .infer_schema(Some(0)) + Some("gz") if path_str.ends_with(".csv.gz") => Ok(options + .try_into_reader_with_file_path(Some(path))? + .finish()?), + Some("tsv") => Ok(options + .with_parse_options( + CsvParseOptions::default().with_separator(b'\t'), + ) + .try_into_reader_with_file_path(Some(path))? + .finish()?), + Some("gz") if path_str.ends_with(".tsv.gz") => Ok(options + .with_parse_options( + CsvParseOptions::default().with_separator(b'\t'), + ) + .try_into_reader_with_file_path(Some(path))? .finish()?), - Some("gz") if path_str.ends_with(".tsv.gz") => { - Ok(CsvReader::from_path(path)? - .with_separator(b'\t') - .infer_schema(Some(0)) - .has_header(true) - .finish()?) - } _ => { Err(FilterListError::InvalidFileFormat(path_str.into())) }