diff --git a/python/core/Cargo.lock b/python/core/Cargo.lock index b8a8b7a31..931413f13 100644 --- a/python/core/Cargo.lock +++ b/python/core/Cargo.lock @@ -968,11 +968,13 @@ dependencies = [ "arrow", "arrow-array", "arrow-buffer", + "bytes", "flatgeobuf", "geo", "geoarrow", "geozero", "numpy", + "parquet", "pyo3", "pyo3-asyncio", "sqlx", diff --git a/python/core/Cargo.toml b/python/core/Cargo.toml index ba6d93df5..4fc53e48b 100644 --- a/python/core/Cargo.toml +++ b/python/core/Cargo.toml @@ -20,7 +20,9 @@ crate-type = ["cdylib"] arrow-array = "50" arrow-buffer = "50" arrow = { version = "50", features = ["ffi"] } +bytes = "1" flatgeobuf = { version = "4", default-features = false } +parquet = "50" pyo3 = { version = "0.20.0", features = [ "abi3-py38", "multiple-pymethods", diff --git a/python/core/python/geoarrow/rust/core/_rust.pyi b/python/core/python/geoarrow/rust/core/_rust.pyi index 0a0680aa7..498f024b0 100644 --- a/python/core/python/geoarrow/rust/core/_rust.pyi +++ b/python/core/python/geoarrow/rust/core/_rust.pyi @@ -1,6 +1,7 @@ from __future__ import annotations -from typing import List, Optional, Self, Sequence, Tuple +from pathlib import Path +from typing import BinaryIO, List, Optional, Self, Sequence, Tuple, Union try: import numpy as np @@ -915,19 +916,35 @@ def from_wkt( ): ... def to_wkb(input: ArrowArrayExportable) -> WKBArray: ... def read_csv( - path: str, geometry_column_name: str, batch_size: Optional[int] = None + file: str | Path | BinaryIO, + geometry_column_name: str, + *, + batch_size: int = 65536, ) -> GeoTable: ... -def read_flatgeobuf(path: str, batch_size: Optional[int] = None) -> GeoTable: ... -def read_geojson(path: str, batch_size: Optional[int] = None) -> GeoTable: ... -def read_geojson_lines(path: str, batch_size: Optional[int] = None) -> GeoTable: ... -def read_parquet(path: str, batch_size: Optional[int] = None) -> GeoTable: ... +def read_flatgeobuf( + file: Union[str, Path, BinaryIO], batch_size: int = 65536 +) -> GeoTable: ... +def read_geojson( + file: Union[str, Path, BinaryIO], batch_size: int = 65536 +) -> GeoTable: ... +def read_geojson_lines( + file: Union[str, Path, BinaryIO], batch_size: int = 65536 +) -> GeoTable: ... +def read_parquet(path: str, batch_size: int = 65536) -> GeoTable: ... def read_postgis(connection_url: str, sql: str) -> Optional[GeoTable]: ... async def read_postgis_async(connection_url: str, sql: str) -> Optional[GeoTable]: ... -def write_csv(table: ArrowStreamExportable, path: str) -> GeoTable: ... +def write_csv( + table: ArrowStreamExportable, file: str | Path | BinaryIO +) -> GeoTable: ... def write_flatgeobuf( - table: ArrowStreamExportable, path: str, *, write_index: bool = True + table: ArrowStreamExportable, + file: str | Path | BinaryIO, + *, + write_index: bool = True, +) -> GeoTable: ... +def write_geojson( + table: ArrowStreamExportable, file: Union[str, Path, BinaryIO] ) -> GeoTable: ... -def write_geojson(table: ArrowStreamExportable, path: str) -> GeoTable: ... # Interop def from_shapely( diff --git a/python/core/src/io/csv.rs b/python/core/src/io/csv.rs index d242f16bf..b3fbd7c4f 100644 --- a/python/core/src/io/csv.rs +++ b/python/core/src/io/csv.rs @@ -1,32 +1,30 @@ -use std::fs::File; -use std::io::{BufReader, BufWriter}; - use crate::error::PyGeoArrowResult; +use crate::io::file::{BinaryFileReader, BinaryFileWriter}; use crate::table::GeoTable; use geoarrow::io::csv::read_csv as _read_csv; use geoarrow::io::csv::write_csv as _write_csv; use geoarrow::io::csv::CSVReaderOptions; -use pyo3::exceptions::PyFileNotFoundError; use pyo3::prelude::*; /// Read a CSV file from a path on disk into a GeoTable. /// /// Args: -/// path: the path to the file +/// file: the path to the file or a Python file object in binary read mode. /// geometry_column_name: the name of the geometry column within the CSV. /// batch_size: the number of rows to include in each internal batch of the table. /// /// Returns: /// Table from CSV file. #[pyfunction] +#[pyo3(signature = (file, geometry_column_name, *, batch_size=65536))] pub fn read_csv( - path: String, + py: Python, + file: PyObject, geometry_column_name: &str, - batch_size: Option, + batch_size: usize, ) -> PyGeoArrowResult { - let f = File::open(path).map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?; - let mut reader = BufReader::new(f); - let options = CSVReaderOptions::new(Default::default(), batch_size.unwrap_or(65536)); + let mut reader = file.extract::(py)?; + let options = CSVReaderOptions::new(Default::default(), batch_size); let table = _read_csv(&mut reader, geometry_column_name, options)?; Ok(GeoTable(table)) } @@ -35,15 +33,15 @@ pub fn read_csv( /// /// Args: /// table: the table to write. -/// path: the path to the file. +/// file: the path to the file or a Python file object in binary write mode. /// /// Returns: /// None #[pyfunction] -pub fn write_csv(table: &PyAny, path: String) -> PyGeoArrowResult<()> { +#[pyo3(signature = (table, file))] +pub fn write_csv(py: Python, table: &PyAny, file: PyObject) -> PyGeoArrowResult<()> { let mut table: GeoTable = FromPyObject::extract(table)?; - let f = File::create(path).map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?; - let writer = BufWriter::new(f); + let writer = file.extract::(py)?; _write_csv(&mut table.0, writer)?; Ok(()) } diff --git a/python/core/src/io/file.rs b/python/core/src/io/file.rs new file mode 100644 index 000000000..10f1b6ff7 --- /dev/null +++ b/python/core/src/io/file.rs @@ -0,0 +1,426 @@ +//! Vendored and derived from https://github.com/omerbenamram/pyo3-file under the MIT/Apache 2 +//! license + +use pyo3::exceptions::{PyFileNotFoundError, PyTypeError}; +use pyo3::intern; +use pyo3::prelude::*; +use pyo3::types::{PyBytes, PyString, PyType}; + +use std::fs::File; +use std::io::{self, BufRead, BufReader, BufWriter}; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::os::fd::{AsRawFd, RawFd}; +use std::path::Path; + +#[derive(Debug)] +pub struct PyFileLikeObject { + inner: PyObject, + is_text_io: bool, +} + +/// Wraps a `PyObject`, and implements read, seek, and write for it. +impl PyFileLikeObject { + /// Creates an instance of a `PyFileLikeObject` from a `PyObject`. + /// To assert the object has the required methods methods, + /// instantiate it with `PyFileLikeObject::require` + pub fn new(object: PyObject) -> PyResult { + Python::with_gil(|py| { + let io = PyModule::import(py, "io")?; + let text_io = io.getattr("TextIOBase")?; + + let text_io_type = text_io.extract::<&PyType>()?; + let is_text_io = object.as_ref(py).is_instance(text_io_type)?; + + Ok(PyFileLikeObject { + inner: object, + is_text_io, + }) + }) + } + + /// Same as `PyFileLikeObject::new`, but validates that the underlying + /// python object has a `read`, `write`, and `seek` methods in respect to parameters. + /// Will return a `TypeError` if object does not have `read`, `seek`, `write` and `fileno` methods. + pub fn with_requirements( + object: PyObject, + read: bool, + write: bool, + seek: bool, + fileno: bool, + ) -> PyResult { + Python::with_gil(|py| { + if read && object.getattr(py, "read").is_err() { + return Err(PyErr::new::( + "Object does not have a .read() method.", + )); + } + + if seek && object.getattr(py, "seek").is_err() { + return Err(PyErr::new::( + "Object does not have a .seek() method.", + )); + } + + if write && object.getattr(py, "write").is_err() { + return Err(PyErr::new::( + "Object does not have a .write() method.", + )); + } + + if fileno && object.getattr(py, "fileno").is_err() { + return Err(PyErr::new::( + "Object does not have a .fileno() method.", + )); + } + + PyFileLikeObject::new(object) + }) + } + + pub fn name(&self, py: Python) -> Option { + self.inner + .getattr(py, intern!(py, "name")) + .ok() + .map(|x| x.to_string()) + } +} + +impl Clone for PyFileLikeObject { + fn clone(&self) -> Self { + Python::with_gil(|py| { + PyFileLikeObject::new(self.inner.clone_ref(py)).expect("Failed to clone") + }) + } +} + +/// Extracts a string repr from, and returns an IO error to send back to rust. +fn pyerr_to_io_err(e: PyErr) -> io::Error { + Python::with_gil(|py| { + let e_as_object: PyObject = e.into_py(py); + + match e_as_object.call_method(py, "__str__", (), None) { + Ok(repr) => match repr.extract::(py) { + Ok(s) => io::Error::new(io::ErrorKind::Other, s), + Err(_e) => io::Error::new(io::ErrorKind::Other, "An unknown error has occurred"), + }, + Err(_) => io::Error::new(io::ErrorKind::Other, "Err doesn't have __str__"), + } + }) +} + +impl Read for PyFileLikeObject { + fn read(&mut self, mut buf: &mut [u8]) -> Result { + Python::with_gil(|py| { + if self.is_text_io { + if buf.len() < 4 { + return Err(io::Error::new( + io::ErrorKind::Other, + "buffer size must be at least 4 bytes", + )); + } + let res = self + .inner + .call_method(py, "read", (buf.len() / 4,), None) + .map_err(pyerr_to_io_err)?; + let pystring: &PyString = res + .downcast(py) + .expect("Expecting to be able to downcast into str from read result."); + let bytes = pystring.to_str().unwrap().as_bytes(); + buf.write_all(bytes)?; + Ok(bytes.len()) + } else { + let res = self + .inner + .call_method(py, "read", (buf.len(),), None) + .map_err(pyerr_to_io_err)?; + let pybytes: &PyBytes = res + .downcast(py) + .expect("Expecting to be able to downcast into bytes from read result."); + let bytes = pybytes.as_bytes(); + buf.write_all(bytes)?; + Ok(bytes.len()) + } + }) + } +} + +impl Write for PyFileLikeObject { + fn write(&mut self, buf: &[u8]) -> Result { + Python::with_gil(|py| { + let arg = if self.is_text_io { + let s = std::str::from_utf8(buf) + .expect("Tried to write non-utf8 data to a TextIO object."); + PyString::new(py, s).to_object(py) + } else { + PyBytes::new(py, buf).to_object(py) + }; + + let number_bytes_written = self + .inner + .call_method(py, "write", (arg,), None) + .map_err(pyerr_to_io_err)?; + + if number_bytes_written.is_none(py) { + return Err(io::Error::new( + io::ErrorKind::Other, + "write() returned None, expected number of bytes written", + )); + } + + number_bytes_written.extract(py).map_err(pyerr_to_io_err) + }) + } + + fn flush(&mut self) -> Result<(), io::Error> { + Python::with_gil(|py| { + self.inner + .call_method(py, "flush", (), None) + .map_err(pyerr_to_io_err)?; + + Ok(()) + }) + } +} + +impl Seek for PyFileLikeObject { + fn seek(&mut self, pos: SeekFrom) -> Result { + Python::with_gil(|py| { + let (whence, offset) = match pos { + SeekFrom::Start(i) => (0, i as i64), + SeekFrom::Current(i) => (1, i), + SeekFrom::End(i) => (2, i), + }; + + let new_position = self + .inner + .call_method(py, "seek", (offset, whence), None) + .map_err(pyerr_to_io_err)?; + + new_position.extract(py).map_err(pyerr_to_io_err) + }) + } +} + +impl AsRawFd for PyFileLikeObject { + fn as_raw_fd(&self) -> RawFd { + Python::with_gil(|py| { + let fileno = self + .inner + .getattr(py, "fileno") + .expect("Object does not have a fileno() method."); + + let fd = fileno + .call(py, (), None) + .expect("fileno() method did not return a file descriptor."); + + fd.extract(py).expect("File descriptor is not an integer.") + }) + } +} + +// impl Length for PyFileLikeObject { +// fn len(&self) -> u64 { +// let len = self.seek(SeekFrom::End(0)).unwrap(); +// len +// } +// } + +// impl ChunkReader for PyFileLikeObject { +// type T = Self; + +// fn get_read(&self, start: u64) -> parquet::errors::Result { +// self.seek(SeekFrom::Start(start))?; +// Ok(self.clone()) +// } + +// fn get_bytes(&self, start: u64, length: usize) -> parquet::errors::Result { +// todo!() +// } +// } + +/// Implements Read + Seek +pub enum BinaryFileReader { + String(BufReader), + FileLike(BufReader), +} + +impl Read for BinaryFileReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + Self::String(reader) => reader.read(buf), + Self::FileLike(reader) => reader.read(buf), + } + } +} + +impl Seek for BinaryFileReader { + fn seek(&mut self, pos: SeekFrom) -> Result { + match self { + Self::String(reader) => reader.seek(pos), + Self::FileLike(reader) => reader.seek(pos), + } + } +} + +impl BufRead for BinaryFileReader { + fn fill_buf(&mut self) -> io::Result<&[u8]> { + match self { + Self::String(reader) => reader.fill_buf(), + Self::FileLike(reader) => reader.fill_buf(), + } + } + + fn consume(&mut self, amt: usize) { + match self { + Self::String(reader) => reader.consume(amt), + Self::FileLike(reader) => reader.consume(amt), + } + } +} + +// impl Length for BinaryFileReader { +// fn len(&self) -> u64 { +// match self { +// Self::String(reader) => reader.get_ref().len(), +// Self::FileLike(reader) => reader.get_ref().len(), +// } +// } +// } + +// impl ChunkReader for BinaryFileReader { +// type T = Self; + +// fn get_read(&self, start: u64) -> parquet::errors::Result { +// match self { +// Self::String(reader) => Ok(BinaryFileReader::String(reader.get_ref().get_read(start)?)), +// Self::FileLike(reader) => Ok(BinaryFileReader::FileLike(BufReader::new( +// reader.get_ref().get_read(start)?, +// ))), +// } +// } + +// fn get_bytes(&self, start: u64, length: usize) -> parquet::errors::Result { +// match self { +// Self::String(reader) => reader.get_ref().get_bytes(start, length), +// Self::FileLike(reader) => reader.get_ref().get_bytes(start, length), +// } +// } +// } + +impl<'a> FromPyObject<'a> for BinaryFileReader { + fn extract(ob: &'a PyAny) -> PyResult { + if let Ok(string_ref) = ob.downcast::() { + let path = string_ref.to_string_lossy().to_string(); + let reader = BufReader::new( + File::open(path).map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?, + ); + return Ok(Self::String(reader)); + } + + Python::with_gil(|py| { + let module = PyModule::import(py, intern!(py, "pathlib"))?; + let path = module.getattr(intern!(py, "Path"))?; + let path_type = path.extract::<&PyType>()?; + if ob.is_instance(path_type)? { + let path = ob.to_string(); + let reader = BufReader::new( + File::open(path) + .map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?, + ); + return Ok(Self::String(reader)); + } + + match PyFileLikeObject::with_requirements(ob.into(), true, false, true, false) { + Ok(f) => Ok(Self::FileLike(BufReader::new(f))), + Err(e) => Err(e), + } + }) + } +} + +pub enum BinaryFileWriter { + String(String, BufWriter), + FileLike(BufWriter), +} + +impl Write for BinaryFileWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + match self { + Self::String(_path, writer) => writer.write(buf), + Self::FileLike(writer) => writer.write(buf), + } + } + + fn flush(&mut self) -> io::Result<()> { + match self { + Self::String(_path, writer) => writer.flush(), + Self::FileLike(writer) => writer.flush(), + } + } +} + +impl Seek for BinaryFileWriter { + fn seek(&mut self, pos: SeekFrom) -> Result { + match self { + Self::String(_path, writer) => writer.seek(pos), + Self::FileLike(writer) => writer.seek(pos), + } + } +} + +impl BinaryFileWriter { + pub fn file_stem(&self, py: Python) -> Option { + match self { + Self::String(path, _writer) => { + let path = Path::new(path); + Some(path.file_stem()?.to_str()?.to_string()) + } + Self::FileLike(writer) => { + let name = writer.get_ref().name(py)?; + let path = Path::new(&name).file_stem()?; + Some(path.to_str()?.to_string()) + } + } + } + + pub fn file_name(&self, py: Python) -> Option { + match self { + Self::String(path, _writer) => { + let path = Path::new(path); + Some(path.file_name()?.to_str()?.to_string()) + } + Self::FileLike(writer) => writer.get_ref().name(py), + } + } +} + +impl<'a> FromPyObject<'a> for BinaryFileWriter { + fn extract(ob: &'a PyAny) -> PyResult { + if let Ok(string_ref) = ob.downcast::() { + let path = string_ref.to_string_lossy().to_string(); + let writer = BufWriter::new( + File::create(&path).map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?, + ); + return Ok(Self::String(path, writer)); + } + + Python::with_gil(|py| { + let module = PyModule::import(py, intern!(py, "pathlib"))?; + let path = module.getattr(intern!(py, "Path"))?; + let path_type = path.extract::<&PyType>()?; + if ob.is_instance(path_type)? { + let path = ob.to_string(); + let writer = BufWriter::new( + File::create(&path) + .map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?, + ); + return Ok(Self::String(path, writer)); + } + + match PyFileLikeObject::with_requirements(ob.into(), false, true, true, false) { + Ok(f) => Ok(Self::FileLike(BufWriter::new(f))), + Err(e) => Err(e), + } + }) + } +} diff --git a/python/core/src/io/flatgeobuf.rs b/python/core/src/io/flatgeobuf.rs index 970d2401a..debf286e5 100644 --- a/python/core/src/io/flatgeobuf.rs +++ b/python/core/src/io/flatgeobuf.rs @@ -1,26 +1,28 @@ -use std::fs::File; -use std::io::{BufReader, BufWriter}; - use crate::error::PyGeoArrowResult; +use crate::io::file::{BinaryFileReader, BinaryFileWriter}; use crate::table::GeoTable; use flatgeobuf::FgbWriterOptions; use geoarrow::io::flatgeobuf::read_flatgeobuf as _read_flatgeobuf; use geoarrow::io::flatgeobuf::write_flatgeobuf_with_options as _write_flatgeobuf; -use pyo3::exceptions::PyFileNotFoundError; use pyo3::prelude::*; /// Read a FlatGeobuf file from a path on disk into a GeoTable. /// /// Args: -/// path: the path to the file +/// file: the path to the file or a Python file object in binary read mode. +/// batch_size: the number of rows to include in each internal batch of the table. /// /// Returns: /// Table from FlatGeobuf file. #[pyfunction] -pub fn read_flatgeobuf(path: String, batch_size: Option) -> PyGeoArrowResult { - let f = File::open(path).map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?; - let mut reader = BufReader::new(f); - let table = _read_flatgeobuf(&mut reader, Default::default(), batch_size)?; +#[pyo3(signature = (file, *, batch_size=65536))] +pub fn read_flatgeobuf( + py: Python, + file: PyObject, + batch_size: usize, +) -> PyGeoArrowResult { + let mut reader = file.extract::(py)?; + let table = _read_flatgeobuf(&mut reader, Default::default(), Some(batch_size))?; Ok(GeoTable(table)) } @@ -28,20 +30,26 @@ pub fn read_flatgeobuf(path: String, batch_size: Option) -> PyGeoArrowRes /// /// Args: /// table: the table to write. -/// path: the path to the file. +/// file: the path to the file or a Python file object in binary write mode. /// /// Returns: /// None #[pyfunction] -#[pyo3(signature = (table, path, *, write_index=true))] -pub fn write_flatgeobuf(table: &PyAny, path: String, write_index: bool) -> PyGeoArrowResult<()> { +#[pyo3(signature = (table, file, *, write_index=true))] +pub fn write_flatgeobuf( + py: Python, + table: &PyAny, + file: PyObject, + write_index: bool, +) -> PyGeoArrowResult<()> { let mut table: GeoTable = FromPyObject::extract(table)?; - let f = File::create(path).map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?; - let writer = BufWriter::new(f); + let writer = file.extract::(py)?; + let name = writer.file_stem(py); + let options = FgbWriterOptions { write_index, ..Default::default() }; - _write_flatgeobuf(&mut table.0, writer, "", options)?; + _write_flatgeobuf(&mut table.0, writer, name.as_deref().unwrap_or(""), options)?; Ok(()) } diff --git a/python/core/src/io/geojson.rs b/python/core/src/io/geojson.rs index 6b2663b8d..4dad03605 100644 --- a/python/core/src/io/geojson.rs +++ b/python/core/src/io/geojson.rs @@ -1,26 +1,23 @@ -use std::fs::File; -use std::io::{BufReader, BufWriter}; - use crate::error::PyGeoArrowResult; +use crate::io::file::{BinaryFileReader, BinaryFileWriter}; use crate::table::GeoTable; use geoarrow::io::geojson::read_geojson as _read_geojson; use geoarrow::io::geojson::write_geojson as _write_geojson; -use pyo3::exceptions::PyFileNotFoundError; use pyo3::prelude::*; /// Read a GeoJSON file from a path on disk into a GeoTable. /// /// Args: -/// path: the path to the file +/// file: the path to the file or a Python file object in binary read mode. /// batch_size: the number of rows to include in each internal batch of the table. /// /// Returns: /// Table from GeoJSON file. #[pyfunction] -pub fn read_geojson(path: String, batch_size: Option) -> PyGeoArrowResult { - let f = File::open(path).map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?; - let mut reader = BufReader::new(f); - let table = _read_geojson(&mut reader, batch_size)?; +#[pyo3(signature = (file, *, batch_size=65536))] +pub fn read_geojson(py: Python, file: PyObject, batch_size: usize) -> PyGeoArrowResult { + let mut reader = file.extract::(py)?; + let table = _read_geojson(&mut reader, Some(batch_size))?; Ok(GeoTable(table)) } @@ -31,15 +28,14 @@ pub fn read_geojson(path: String, batch_size: Option) -> PyGeoArrowResult /// /// Args: /// table: the table to write. -/// path: the path to the file. +/// file: the path to the file or a Python file object in binary write mode. /// /// Returns: /// None #[pyfunction] -pub fn write_geojson(table: &PyAny, path: String) -> PyGeoArrowResult<()> { +pub fn write_geojson(py: Python, table: &PyAny, file: PyObject) -> PyGeoArrowResult<()> { let mut table: GeoTable = FromPyObject::extract(table)?; - let f = File::create(path).map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?; - let writer = BufWriter::new(f); + let writer = file.extract::(py)?; _write_geojson(&mut table.0, writer)?; Ok(()) } diff --git a/python/core/src/io/geojson_lines.rs b/python/core/src/io/geojson_lines.rs index 0525de8c1..efa8621b6 100644 --- a/python/core/src/io/geojson_lines.rs +++ b/python/core/src/io/geojson_lines.rs @@ -1,10 +1,7 @@ -use std::fs::File; -use std::io::BufReader; - use crate::error::PyGeoArrowResult; +use crate::io::file::BinaryFileReader; use crate::table::GeoTable; use geoarrow::io::geojson_lines::read_geojson_lines as _read_geojson_lines; -use pyo3::exceptions::PyFileNotFoundError; use pyo3::prelude::*; /// Read a GeoJSON Lines file from a path on disk into a GeoTable. @@ -13,14 +10,18 @@ use pyo3::prelude::*; /// each Feature. /// /// Args: -/// path: the path to the file +/// file: the path to the file or a Python file object in binary read mode. /// /// Returns: /// Table from GeoJSON file. #[pyfunction] -pub fn read_geojson_lines(path: String, batch_size: Option) -> PyGeoArrowResult { - let f = File::open(path).map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?; - let mut reader = BufReader::new(f); - let table = _read_geojson_lines(&mut reader, batch_size)?; +#[pyo3(signature = (file, *, batch_size=65536))] +pub fn read_geojson_lines( + py: Python, + file: PyObject, + batch_size: usize, +) -> PyGeoArrowResult { + let mut reader = file.extract::(py)?; + let table = _read_geojson_lines(&mut reader, Some(batch_size))?; Ok(GeoTable(table)) } diff --git a/python/core/src/io/mod.rs b/python/core/src/io/mod.rs index 59fb3a212..d717384a4 100644 --- a/python/core/src/io/mod.rs +++ b/python/core/src/io/mod.rs @@ -2,6 +2,7 @@ pub mod csv; pub mod ewkb; +pub mod file; pub mod flatgeobuf; pub mod geojson; pub mod geojson_lines; diff --git a/python/core/src/io/parquet.rs b/python/core/src/io/parquet.rs index c8b3fe87f..70980724f 100644 --- a/python/core/src/io/parquet.rs +++ b/python/core/src/io/parquet.rs @@ -11,14 +11,16 @@ use pyo3::prelude::*; /// /// Args: /// path: the path to the file +/// batch_size: the number of rows to include in each internal batch of the table. /// /// Returns: /// Table from GeoParquet file. #[pyfunction] -pub fn read_parquet(path: String, batch_size: Option) -> PyGeoArrowResult { +#[pyo3(signature = (path, *, batch_size=65536))] +pub fn read_parquet(path: String, batch_size: usize) -> PyGeoArrowResult { let file = File::open(path).map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?; - let options = GeoParquetReaderOptions::new(batch_size.unwrap_or(65536), Default::default()); + let options = GeoParquetReaderOptions::new(batch_size, Default::default()); let table = _read_geoparquet(file, options)?; Ok(GeoTable(table)) } diff --git a/src/io/flatgeobuf/writer.rs b/src/io/flatgeobuf/writer.rs index d63ab840b..4e2ee76ed 100644 --- a/src/io/flatgeobuf/writer.rs +++ b/src/io/flatgeobuf/writer.rs @@ -17,6 +17,8 @@ pub fn write_flatgeobuf( } /// Write a GeoTable to a FlatGeobuf file with specific writer options. +/// +/// Note: this `name` argument is what OGR observes as the layer name of the file. pub fn write_flatgeobuf_with_options( table: &mut GeoTable, writer: W,