From c4d085222b625b5a6a456b5cb95263beed84fef8 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Mon, 16 Dec 2024 10:49:33 +0100 Subject: [PATCH] feat: Use better error messages when opening files (#20307) --- crates/polars-io/src/partition.rs | 3 ++- crates/polars-io/src/utils/file.rs | 11 ++++++----- crates/polars-mem-engine/src/executors/scan/ipc.rs | 3 ++- crates/polars-python/src/batched_csv.rs | 3 ++- crates/polars-python/src/file.rs | 3 ++- py-polars/tests/unit/io/test_utils.py | 11 ++++++++++- 6 files changed, 24 insertions(+), 10 deletions(-) diff --git a/crates/polars-io/src/partition.rs b/crates/polars-io/src/partition.rs index bea0b9958a63..ce64a4cb45e7 100644 --- a/crates/polars-io/src/partition.rs +++ b/crates/polars-io/src/partition.rs @@ -5,6 +5,7 @@ use std::path::Path; use polars_core::prelude::*; use polars_core::series::IsSorted; use polars_core::POOL; +use polars_utils::create_file; use rayon::prelude::*; use crate::parquet::write::ParquetWriteOptions; @@ -110,7 +111,7 @@ where }; let write_part = |df: DataFrame, path: &Path| { - let f = std::fs::File::create(path)?; + let f = create_file(path)?; file_write_options.write_df_to_file(df, f)?; PolarsResult::Ok(()) }; diff --git a/crates/polars-io/src/utils/file.rs b/crates/polars-io/src/utils/file.rs index fce0c620ed0b..9bf69c3da0f1 100644 --- a/crates/polars-io/src/utils/file.rs +++ b/crates/polars-io/src/utils/file.rs @@ -1,7 +1,9 @@ use std::io::Write; +use std::path::Path; use polars_core::config; -use polars_error::{feature_gated, PolarsError, PolarsResult}; +use polars_error::{feature_gated, PolarsResult}; +use polars_utils::create_file; use polars_utils::mmap::ensure_not_mapped; use crate::cloud::CloudOptions; @@ -24,8 +26,7 @@ pub fn try_get_writeable( } if path.starts_with("file://") { - std::fs::File::create(&path[const { "file://".len() }..]) - .map_err(PolarsError::from)?; + create_file(Path::new(&path[const { "file://".len() }..]))?; } let writer = crate::pl_async::get_runtime() @@ -45,7 +46,7 @@ pub fn try_get_writeable( ) } - std::fs::File::create(&path).map_err(PolarsError::from)?; + create_file(&path)?; let path = std::fs::canonicalize(&path)?; ensure_not_mapped(&path.metadata()?)?; @@ -69,7 +70,7 @@ pub fn try_get_writeable( }) } else { let path = resolve_homedir(&path); - std::fs::File::create(&path).map_err(PolarsError::from)?; + create_file(&path)?; if verbose { eprintln!( diff --git a/crates/polars-mem-engine/src/executors/scan/ipc.rs b/crates/polars-mem-engine/src/executors/scan/ipc.rs index 3fcb4b66e263..c35e1d11441a 100644 --- a/crates/polars-mem-engine/src/executors/scan/ipc.rs +++ b/crates/polars-mem-engine/src/executors/scan/ipc.rs @@ -6,6 +6,7 @@ use polars_io::cloud::CloudOptions; use polars_io::path_utils::is_cloud_url; use polars_io::predicates::apply_predicate; use polars_utils::mmap::MemSlice; +use polars_utils::open_file; use rayon::prelude::*; use super::*; @@ -77,7 +78,7 @@ impl IpcExec { let memslice = match source { ScanSourceRef::Path(path) => { let file = match idx_to_cached_file(index) { - None => std::fs::File::open(path)?, + None => open_file(path)?, Some(f) => f?, }; diff --git a/crates/polars-python/src/batched_csv.rs b/crates/polars-python/src/batched_csv.rs index 06bd35ccaa4d..ad761bfe671a 100644 --- a/crates/polars-python/src/batched_csv.rs +++ b/crates/polars-python/src/batched_csv.rs @@ -5,6 +5,7 @@ use polars::io::csv::read::OwnedBatchedCsvReader; use polars::io::mmap::MmapBytesReader; use polars::io::RowIndex; use polars::prelude::*; +use polars_utils::open_file; use pyo3::prelude::*; use pyo3::pybacked::PyBackedStr; @@ -91,7 +92,7 @@ impl PyBatchedCsv { .collect::>() }); - let file = std::fs::File::open(path).map_err(PyPolarsErr::from)?; + let file = open_file(&path).map_err(PyPolarsErr::from)?; let reader = Box::new(file) as Box; let reader = CsvReadOptions::default() .with_infer_schema_length(infer_schema_length) diff --git a/crates/polars-python/src/file.rs b/crates/polars-python/src/file.rs index e417c59c9f92..29b1df01cecc 100644 --- a/crates/polars-python/src/file.rs +++ b/crates/polars-python/src/file.rs @@ -12,6 +12,7 @@ use std::sync::Arc; use polars::io::mmap::MmapBytesReader; use polars_error::polars_err; use polars_io::cloud::CloudOptions; +use polars_utils::create_file; use polars_utils::mmap::MemSlice; use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; @@ -350,7 +351,7 @@ fn get_either_buffer_or_path( if let Ok(s) = py_f.extract::>() { let file_path = resolve_homedir(&&*s); let f = if write { - File::create(&file_path)? + create_file(&file_path).map_err(PyPolarsErr::from)? } else { polars_utils::open_file(&file_path).map_err(PyPolarsErr::from)? }; diff --git a/py-polars/tests/unit/io/test_utils.py b/py-polars/tests/unit/io/test_utils.py index e115aec4f71f..5a1aad084b71 100644 --- a/py-polars/tests/unit/io/test_utils.py +++ b/py-polars/tests/unit/io/test_utils.py @@ -1,9 +1,10 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import pytest +import polars as pl from polars.io._utils import looks_like_url, parse_columns_arg, parse_row_index_args if TYPE_CHECKING: @@ -60,3 +61,11 @@ def test_parse_row_index_args() -> None: ) def test_looks_like_url(url: str, result: bool) -> None: assert looks_like_url(url) == result + + +@pytest.mark.parametrize( + "scan", [pl.scan_csv, pl.scan_parquet, pl.scan_ndjson, pl.scan_ipc] +) +def test_filename_in_err(scan: Any) -> None: + with pytest.raises(FileNotFoundError, match=r".*does not exist"): + scan("does not exist").collect()