diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index b14f930..0399d4d 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -48,7 +48,7 @@ jobs: - name: Install the package run: | set -e - python -m pip install cog3pio[tests] --find-links dist --force-reinstall + python -m pip install cog3pio[benchmark,tests] --find-links dist --force-reinstall python -m pip list # Run the benchmark tests diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bd85433..979f012 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -70,7 +70,7 @@ jobs: runs-on: windows-2022 strategy: matrix: - target: [x64, x86] + target: [x64] steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 @@ -89,7 +89,6 @@ jobs: name: wheels-windows-${{ matrix.target }} path: dist - name: pytest - if: ${{ !startsWith(matrix.target, 'aarch64') }} shell: bash run: | set -e diff --git a/Cargo.lock b/Cargo.lock index 3393c06..846734a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -369,8 +369,7 @@ dependencies = [ [[package]] name = "geo" version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f811f663912a69249fa620dcd2a005db7254529da2d8a0b23942e81f47084501" +source = "git+https://github.com/georust/geo.git?rev=481196b4e50a488442b3919e02496ad909fc5412#481196b4e50a488442b3919e02496ad909fc5412" dependencies = [ "earcutr", "float_next_after", diff --git a/Cargo.toml b/Cargo.toml index 9066972..2e9a722 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ crate-type = ["cdylib"] [dependencies] bytes = "1.5.0" -geo = "0.28.0" +geo = { git = "https://github.com/georust/geo.git", version = "0.28.0", rev = "481196b4e50a488442b3919e02496ad909fc5412" } ndarray = "0.15.6" numpy = "0.21.0" object_store = { version = "0.9.0", features = ["http"] } diff --git a/README.md b/README.md index 220f254..0e3c3c5 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,7 @@ async fn main() { Cursor::new(bytes) }; + // Read GeoTIFF into an ndarray::Array let arr: Array3 = read_geotiff(stream).unwrap(); assert_eq!(arr.dim(), (1, 549, 549)); assert_eq!(arr[[0, 500, 500]], 0.13482364); @@ -61,10 +62,13 @@ async fn main() { ### Python +#### NumPy + ```python import numpy as np from cog3pio import read_geotiff +# Read GeoTIFF into a numpy array array: np.ndarray = read_geotiff( path="https://github.com/cogeotiff/rio-tiler/raw/6.4.0/tests/fixtures/cog_nodata_nan.tif" ) @@ -72,6 +76,20 @@ assert array.shape == (1, 549, 549) # bands, height, width assert array.dtype == "float32" ``` +#### Xarray + +```python +import xarray as xr + +# Read GeoTIFF into an xarray.DataArray +dataarray: xr.DataArray = xr.open_dataarray( + filename_or_obj="https://github.com/cogeotiff/rio-tiler/raw/6.4.1/tests/fixtures/cog_nodata_nan.tif", + engine="cog3pio", +) +assert dataarray.sizes == {'band': 1, 'y': 549, 'x': 549} +assert dataarray.dtype == "float32" +``` + > [!NOTE] > Currently, this crate/library only supports reading single or multi-band float32 > GeoTIFF files, i.e. other dtypes (e.g. uint16) don't work yet. See roadmap below on @@ -81,16 +99,18 @@ assert array.dtype == "float32" ## Roadmap Short term (Q1 2024): -- [ ] Implement single-band GeoTIFF reader (for uint/int/float dtypes) to - [`ndarray`](https://github.com/rust-ndarray/ndarray) -- [x] Multi-band reader (relying on - [`image-tiff`](https://github.com/image-rs/image-tiff)) -- [x] Read from remote storage (using +- [x] Multi-band reader to [`ndarray`](https://github.com/rust-ndarray/ndarray) (relying + on [`image-tiff`](https://github.com/image-rs/image-tiff)) +- [x] Read from HTTP remote storage (using [`object-store`](https://github.com/apache/arrow-rs/tree/object_store_0.9.0/object_store)) Medium term (Q2 2024): -- [ ] Integration with `xarray` as a +- [x] Integration with `xarray` as a [`BackendEntrypoint`](https://docs.xarray.dev/en/v2024.02.0/internals/how-to-add-new-backend.html) +- [ ] Implement single-band GeoTIFF reader for multiple dtypes (uint/int/float) (relying + on [`geotiff`](https://github.com/georust/geotiff) crate) + +Longer term (Q3-Q4 2024): - [ ] Parallel reader (TBD on multi-threaded or asynchronous) - [ ] Direct-to-GPU loading diff --git a/pyproject.toml b/pyproject.toml index c01e625..8ce3f88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,15 +16,22 @@ classifiers = [ ] dependencies = [ "numpy>=1.23", + "xarray>=2022.6.0", ] dynamic = ["version"] [project.optional-dependencies] +benchmark = [ + "pytest-codspeed", + "rioxarray", +] tests = [ "pytest", - "pytest-codspeed", ] +[project.entry-points."xarray.backends"] +cog3pio = "cog3pio.xarray_backend:Cog3pioBackendEntrypoint" + [tool.maturin] python-source = "python" features = ["pyo3/extension-module"] diff --git a/python/cog3pio/xarray_backend.py b/python/cog3pio/xarray_backend.py new file mode 100644 index 0000000..b7adf74 --- /dev/null +++ b/python/cog3pio/xarray_backend.py @@ -0,0 +1,56 @@ +""" +An xarray backend for reading GeoTIFF files using the 'cog3pio' engine. +""" + +import os + +import numpy as np +import xarray as xr +from xarray.backends import BackendEntrypoint + +from cog3pio import CogReader + + +# %% +class Cog3pioBackendEntrypoint(BackendEntrypoint): + """ + Xarray backend to read GeoTIFF files using 'cog3pio' engine. + """ + + description = "Use .tif files in Xarray" + open_dataset_parameters = ["filename_or_obj"] + url = "https://github.com/weiji14/cog3pio" + + def open_dataset( + self, + filename_or_obj: str, + *, + drop_variables=None, + # other backend specific keyword arguments + # `chunks` and `cache` DO NOT go here, they are handled by xarray + ) -> xr.Dataset: + reader = CogReader(path=filename_or_obj) + + array: np.ndarray = reader.to_numpy() + x_coords, y_coords = reader.xy_coords() + + channels, height, width = array.shape + dataarray: xr.DataArray = xr.DataArray( + data=array, + coords={ + "band": np.arange(stop=channels, dtype=np.uint8), + "y": y_coords, + "x": x_coords, + }, + name=None, + attrs=None, + ) + + return dataarray.to_dataset(name="raster") + + def guess_can_open(self, filename_or_obj): + try: + _, ext = os.path.splitext(filename_or_obj) + except TypeError: + return False + return ext in {".tif", ".tiff"} diff --git a/python/tests/test_xarray_backend.py b/python/tests/test_xarray_backend.py new file mode 100644 index 0000000..2d4ba31 --- /dev/null +++ b/python/tests/test_xarray_backend.py @@ -0,0 +1,46 @@ +""" +Tests for xarray 'cog3pio' backend engine. +""" + +import numpy as np +import pytest +import xarray as xr + +try: + import rioxarray + + HAS_RIOXARRAY = True +except ImportError: + HAS_RIOXARRAY = False + + +# %% +@pytest.mark.benchmark +@pytest.mark.parametrize( + "engine", + [ + "cog3pio", + pytest.param( + "rasterio", + marks=pytest.mark.skipif( + condition=not HAS_RIOXARRAY, reason="Could not import 'rioxarray'" + ), + ), + ], +) +def test_xarray_backend_open_dataarray(engine): + """ + Ensure that passing engine='cog3pio' to xarray.open_dataarray works, and benchmark + against engine="rasterio" (rioxarray). + """ + with xr.open_dataarray( + filename_or_obj="https://github.com/cogeotiff/rio-tiler/raw/6.4.1/tests/fixtures/cog_nodata_nan.tif", + engine=engine, + ) as da: + assert da.sizes == {'band': 1, 'y': 549, 'x': 549} + assert da.x.min() == 500080.0 + assert da.x.max() == 609680.0 + assert da.y.min() == 5190340.0 + assert da.y.max() == 5299940.0 + assert da.dtype == "float32" + # np.testing.assert_allclose(actual=da.mean(), desired=0.181176) diff --git a/src/io/geotiff.rs b/src/io/geotiff.rs index 496a4df..6a68854 100644 --- a/src/io/geotiff.rs +++ b/src/io/geotiff.rs @@ -1,7 +1,7 @@ use std::io::{Read, Seek}; use geo::AffineTransform; -use ndarray::Array3; +use ndarray::{Array, Array1, Array3}; use tiff::decoder::{Decoder, DecodingResult, Limits}; use tiff::tags::Tag; use tiff::{ColorType, TiffError, TiffFormatError, TiffResult, TiffUnsupportedError}; @@ -109,6 +109,32 @@ impl CogReader { Ok(transform) } + + /// Get list of x and y coordinates + pub fn xy_coords(&mut self) -> TiffResult<(Array1, Array1)> { + let transform = self.transform()?; // affine transformation matrix + + // Get spatial resolution in x and y dimensions + let x_res: &f64 = &transform.a(); + let y_res: &f64 = &transform.e(); + + // Get xy coordinate of the center of the top left pixel + let x_origin: &f64 = &(transform.xoff() + x_res / 2.0); + let y_origin: &f64 = &(transform.yoff() + y_res / 2.0); + + // Get number of pixels along the x and y dimensions + let (x_pixels, y_pixels): (u32, u32) = self.decoder.dimensions()?; + + // Get xy coordinate of the center of the bottom right pixel + let x_end: f64 = x_origin + x_res * x_pixels as f64; + let y_end: f64 = y_origin + y_res * y_pixels as f64; + + // Get array of x-coordinates and y-coordinates + let x_coords = Array::range(x_origin.to_owned(), x_end, x_res.to_owned()); + let y_coords = Array::range(y_origin.to_owned(), y_end, y_res.to_owned()); + + Ok((x_coords, y_coords)) + } } /// Synchronously read a GeoTIFF file into an [`ndarray::Array`] diff --git a/src/python/adapters.rs b/src/python/adapters.rs index 45040c0..da6cff3 100644 --- a/src/python/adapters.rs +++ b/src/python/adapters.rs @@ -2,7 +2,7 @@ use std::io::Cursor; use bytes::Bytes; use ndarray::Array3; -use numpy::{PyArray3, ToPyArray}; +use numpy::{PyArray1, PyArray3, ToPyArray}; use object_store::{parse_url, ObjectStore}; use pyo3::exceptions::{PyBufferError, PyFileNotFoundError, PyValueError}; use pyo3::prelude::{pyclass, pyfunction, pymethods, pymodule, PyModule, PyResult, Python}; @@ -68,6 +68,19 @@ impl PyCogReader { // Convert from ndarray (Rust) to numpy ndarray (Python) Ok(array_data.to_pyarray_bound(py)) } + + /// Get x and y coordinates as numpy.ndarray + fn xy_coords<'py>( + &mut self, + py: Python<'py>, + ) -> PyResult<(Bound<'py, PyArray1>, Bound<'py, PyArray1>)> { + let (x_coords, y_coords) = self + .inner + .xy_coords() + .map_err(|err| PyValueError::new_err(err.to_string()))?; + + Ok((x_coords.to_pyarray_bound(py), y_coords.to_pyarray_bound(py))) + } } /// Read from a filepath or url into a byte stream