Skip to content

Commit

Permalink
Accept python file-like objects for IO (#441)
Browse files Browse the repository at this point in the history
closes #348

- Should move `BinaryFileInput` into `file.rs`
- Update input arg in rust and pyi file
- Add writer option
  • Loading branch information
kylebarron authored Jan 23, 2024
1 parent 28f606d commit 835db98
Show file tree
Hide file tree
Showing 11 changed files with 517 additions and 62 deletions.
2 changes: 2 additions & 0 deletions python/core/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions python/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ crate-type = ["cdylib"]
arrow-array = "50"
arrow-buffer = "50"
arrow = { version = "50", features = ["ffi"] }
bytes = "1"
flatgeobuf = { version = "4", default-features = false }
parquet = "50"
pyo3 = { version = "0.20.0", features = [
"abi3-py38",
"multiple-pymethods",
Expand Down
35 changes: 26 additions & 9 deletions python/core/python/geoarrow/rust/core/_rust.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from typing import List, Optional, Self, Sequence, Tuple
from pathlib import Path
from typing import BinaryIO, List, Optional, Self, Sequence, Tuple, Union

try:
import numpy as np
Expand Down Expand Up @@ -915,19 +916,35 @@ def from_wkt(
): ...
def to_wkb(input: ArrowArrayExportable) -> WKBArray: ...
def read_csv(
path: str, geometry_column_name: str, batch_size: Optional[int] = None
file: str | Path | BinaryIO,
geometry_column_name: str,
*,
batch_size: int = 65536,
) -> GeoTable: ...
def read_flatgeobuf(path: str, batch_size: Optional[int] = None) -> GeoTable: ...
def read_geojson(path: str, batch_size: Optional[int] = None) -> GeoTable: ...
def read_geojson_lines(path: str, batch_size: Optional[int] = None) -> GeoTable: ...
def read_parquet(path: str, batch_size: Optional[int] = None) -> GeoTable: ...
def read_flatgeobuf(
file: Union[str, Path, BinaryIO], batch_size: int = 65536
) -> GeoTable: ...
def read_geojson(
file: Union[str, Path, BinaryIO], batch_size: int = 65536
) -> GeoTable: ...
def read_geojson_lines(
file: Union[str, Path, BinaryIO], batch_size: int = 65536
) -> GeoTable: ...
def read_parquet(path: str, batch_size: int = 65536) -> GeoTable: ...
def read_postgis(connection_url: str, sql: str) -> Optional[GeoTable]: ...
async def read_postgis_async(connection_url: str, sql: str) -> Optional[GeoTable]: ...
def write_csv(table: ArrowStreamExportable, path: str) -> GeoTable: ...
def write_csv(
table: ArrowStreamExportable, file: str | Path | BinaryIO
) -> GeoTable: ...
def write_flatgeobuf(
table: ArrowStreamExportable, path: str, *, write_index: bool = True
table: ArrowStreamExportable,
file: str | Path | BinaryIO,
*,
write_index: bool = True,
) -> GeoTable: ...
def write_geojson(
table: ArrowStreamExportable, file: Union[str, Path, BinaryIO]
) -> GeoTable: ...
def write_geojson(table: ArrowStreamExportable, path: str) -> GeoTable: ...

# Interop
def from_shapely(
Expand Down
26 changes: 12 additions & 14 deletions python/core/src/io/csv.rs
Original file line number Diff line number Diff line change
@@ -1,32 +1,30 @@
use std::fs::File;
use std::io::{BufReader, BufWriter};

use crate::error::PyGeoArrowResult;
use crate::io::file::{BinaryFileReader, BinaryFileWriter};
use crate::table::GeoTable;
use geoarrow::io::csv::read_csv as _read_csv;
use geoarrow::io::csv::write_csv as _write_csv;
use geoarrow::io::csv::CSVReaderOptions;
use pyo3::exceptions::PyFileNotFoundError;
use pyo3::prelude::*;

/// Read a CSV file from a path on disk into a GeoTable.
///
/// Args:
/// path: the path to the file
/// file: the path to the file or a Python file object in binary read mode.
/// geometry_column_name: the name of the geometry column within the CSV.
/// batch_size: the number of rows to include in each internal batch of the table.
///
/// Returns:
/// Table from CSV file.
#[pyfunction]
#[pyo3(signature = (file, geometry_column_name, *, batch_size=65536))]
pub fn read_csv(
path: String,
py: Python,
file: PyObject,
geometry_column_name: &str,
batch_size: Option<usize>,
batch_size: usize,
) -> PyGeoArrowResult<GeoTable> {
let f = File::open(path).map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?;
let mut reader = BufReader::new(f);
let options = CSVReaderOptions::new(Default::default(), batch_size.unwrap_or(65536));
let mut reader = file.extract::<BinaryFileReader>(py)?;
let options = CSVReaderOptions::new(Default::default(), batch_size);
let table = _read_csv(&mut reader, geometry_column_name, options)?;
Ok(GeoTable(table))
}
Expand All @@ -35,15 +33,15 @@ pub fn read_csv(
///
/// Args:
/// table: the table to write.
/// path: the path to the file.
/// file: the path to the file or a Python file object in binary write mode.
///
/// Returns:
/// None
#[pyfunction]
pub fn write_csv(table: &PyAny, path: String) -> PyGeoArrowResult<()> {
#[pyo3(signature = (table, file))]
pub fn write_csv(py: Python, table: &PyAny, file: PyObject) -> PyGeoArrowResult<()> {
let mut table: GeoTable = FromPyObject::extract(table)?;
let f = File::create(path).map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?;
let writer = BufWriter::new(f);
let writer = file.extract::<BinaryFileWriter>(py)?;
_write_csv(&mut table.0, writer)?;
Ok(())
}
Loading

0 comments on commit 835db98

Please sign in to comment.