Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Implement cog3pio xarray BackendEntrypoint #14

Merged
merged 10 commits into from
May 9, 2024
Merged
2 changes: 1 addition & 1 deletion .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
- name: Install the package
run: |
set -e
python -m pip install cog3pio[tests] --find-links dist --force-reinstall
python -m pip install cog3pio[benchmark,tests] --find-links dist --force-reinstall
python -m pip list

# Run the benchmark tests
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
runs-on: windows-2022
strategy:
matrix:
target: [x64, x86]
target: [x64]
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
Expand All @@ -89,7 +89,6 @@ jobs:
name: wheels-windows-${{ matrix.target }}
path: dist
- name: pytest
if: ${{ !startsWith(matrix.target, 'aarch64') }}
shell: bash
run: |
set -e
Expand Down
3 changes: 1 addition & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ crate-type = ["cdylib"]

[dependencies]
bytes = "1.5.0"
geo = "0.28.0"
geo = { git = "https://github.com/georust/geo.git", version = "0.28.0", rev = "481196b4e50a488442b3919e02496ad909fc5412" }
ndarray = "0.15.6"
numpy = "0.21.0"
object_store = { version = "0.9.0", features = ["http"] }
Expand Down
32 changes: 26 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ async fn main() {
Cursor::new(bytes)
};

// Read GeoTIFF into an ndarray::Array
let arr: Array3<f32> = read_geotiff(stream).unwrap();
assert_eq!(arr.dim(), (1, 549, 549));
assert_eq!(arr[[0, 500, 500]], 0.13482364);
Expand All @@ -61,17 +62,34 @@ async fn main() {

### Python

#### NumPy

```python
import numpy as np
from cog3pio import read_geotiff

# Read GeoTIFF into a numpy array
array: np.ndarray = read_geotiff(
path="https://github.com/cogeotiff/rio-tiler/raw/6.4.0/tests/fixtures/cog_nodata_nan.tif"
)
assert array.shape == (1, 549, 549) # bands, height, width
assert array.dtype == "float32"
```

#### Xarray

```python
import xarray as xr

# Read GeoTIFF into an xarray.DataArray
dataarray: xr.DataArray = xr.open_dataarray(
filename_or_obj="https://github.com/cogeotiff/rio-tiler/raw/6.4.1/tests/fixtures/cog_nodata_nan.tif",
engine="cog3pio",
)
assert dataarray.sizes == {'band': 1, 'y': 549, 'x': 549}
assert dataarray.dtype == "float32"
```

> [!NOTE]
> Currently, this crate/library only supports reading single or multi-band float32
> GeoTIFF files, i.e. other dtypes (e.g. uint16) don't work yet. See roadmap below on
Expand All @@ -81,16 +99,18 @@ assert array.dtype == "float32"
## Roadmap

Short term (Q1 2024):
- [ ] Implement single-band GeoTIFF reader (for uint/int/float dtypes) to
[`ndarray`](https://github.com/rust-ndarray/ndarray)
- [x] Multi-band reader (relying on
[`image-tiff`](https://github.com/image-rs/image-tiff))
- [x] Read from remote storage (using
- [x] Multi-band reader to [`ndarray`](https://github.com/rust-ndarray/ndarray) (relying
on [`image-tiff`](https://github.com/image-rs/image-tiff))
- [x] Read from HTTP remote storage (using
[`object-store`](https://github.com/apache/arrow-rs/tree/object_store_0.9.0/object_store))

Medium term (Q2 2024):
- [ ] Integration with `xarray` as a
- [x] Integration with `xarray` as a
[`BackendEntrypoint`](https://docs.xarray.dev/en/v2024.02.0/internals/how-to-add-new-backend.html)
- [ ] Implement single-band GeoTIFF reader for multiple dtypes (uint/int/float) (relying
on [`geotiff`](https://github.com/georust/geotiff) crate)

Longer term (Q3-Q4 2024):
- [ ] Parallel reader (TBD on multi-threaded or asynchronous)
- [ ] Direct-to-GPU loading

Expand Down
9 changes: 8 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,22 @@ classifiers = [
]
dependencies = [
"numpy>=1.23",
"xarray>=2022.6.0",
]
dynamic = ["version"]

[project.optional-dependencies]
benchmark = [
"pytest-codspeed",
"rioxarray",
]
tests = [
"pytest",
"pytest-codspeed",
]

[project.entry-points."xarray.backends"]
cog3pio = "cog3pio.xarray_backend:Cog3pioBackendEntrypoint"

[tool.maturin]
python-source = "python"
features = ["pyo3/extension-module"]
56 changes: 56 additions & 0 deletions python/cog3pio/xarray_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""
An xarray backend for reading GeoTIFF files using the 'cog3pio' engine.
"""

import os

import numpy as np
import xarray as xr
from xarray.backends import BackendEntrypoint

from cog3pio import CogReader


# %%
class Cog3pioBackendEntrypoint(BackendEntrypoint):
"""
Xarray backend to read GeoTIFF files using 'cog3pio' engine.
"""

description = "Use .tif files in Xarray"
open_dataset_parameters = ["filename_or_obj"]
url = "https://github.com/weiji14/cog3pio"

def open_dataset(
self,
filename_or_obj: str,
*,
drop_variables=None,
# other backend specific keyword arguments
# `chunks` and `cache` DO NOT go here, they are handled by xarray
) -> xr.Dataset:
reader = CogReader(path=filename_or_obj)

array: np.ndarray = reader.to_numpy()
x_coords, y_coords = reader.xy_coords()

channels, height, width = array.shape
dataarray: xr.DataArray = xr.DataArray(
data=array,
coords={
"band": np.arange(stop=channels, dtype=np.uint8),
"y": y_coords,
"x": x_coords,
},
name=None,
attrs=None,
)

return dataarray.to_dataset(name="raster")

def guess_can_open(self, filename_or_obj):
try:
_, ext = os.path.splitext(filename_or_obj)
except TypeError:
return False
return ext in {".tif", ".tiff"}
46 changes: 46 additions & 0 deletions python/tests/test_xarray_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
Tests for xarray 'cog3pio' backend engine.
"""

import numpy as np
import pytest
import xarray as xr

try:
import rioxarray

HAS_RIOXARRAY = True
except ImportError:
HAS_RIOXARRAY = False


# %%
@pytest.mark.benchmark
@pytest.mark.parametrize(
"engine",
[
"cog3pio",
pytest.param(
"rasterio",
marks=pytest.mark.skipif(
condition=not HAS_RIOXARRAY, reason="Could not import 'rioxarray'"
),
),
],
)
def test_xarray_backend_open_dataarray(engine):
"""
Ensure that passing engine='cog3pio' to xarray.open_dataarray works, and benchmark
against engine="rasterio" (rioxarray).
"""
with xr.open_dataarray(
filename_or_obj="https://github.com/cogeotiff/rio-tiler/raw/6.4.1/tests/fixtures/cog_nodata_nan.tif",
engine=engine,
) as da:
assert da.sizes == {'band': 1, 'y': 549, 'x': 549}
assert da.x.min() == 500080.0
assert da.x.max() == 609680.0
assert da.y.min() == 5190340.0
assert da.y.max() == 5299940.0
assert da.dtype == "float32"
# np.testing.assert_allclose(actual=da.mean(), desired=0.181176)
28 changes: 27 additions & 1 deletion src/io/geotiff.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::io::{Read, Seek};

use geo::AffineTransform;
use ndarray::Array3;
use ndarray::{Array, Array1, Array3};
use tiff::decoder::{Decoder, DecodingResult, Limits};
use tiff::tags::Tag;
use tiff::{ColorType, TiffError, TiffFormatError, TiffResult, TiffUnsupportedError};
Expand Down Expand Up @@ -109,6 +109,32 @@ impl<R: Read + Seek> CogReader<R> {

Ok(transform)
}

/// Get list of x and y coordinates
pub fn xy_coords(&mut self) -> TiffResult<(Array1<f64>, Array1<f64>)> {
let transform = self.transform()?; // affine transformation matrix

// Get spatial resolution in x and y dimensions
let x_res: &f64 = &transform.a();
let y_res: &f64 = &transform.e();

// Get xy coordinate of the center of the top left pixel
let x_origin: &f64 = &(transform.xoff() + x_res / 2.0);
let y_origin: &f64 = &(transform.yoff() + y_res / 2.0);

// Get number of pixels along the x and y dimensions
let (x_pixels, y_pixels): (u32, u32) = self.decoder.dimensions()?;

// Get xy coordinate of the center of the bottom right pixel
let x_end: f64 = x_origin + x_res * x_pixels as f64;
let y_end: f64 = y_origin + y_res * y_pixels as f64;

// Get array of x-coordinates and y-coordinates
let x_coords = Array::range(x_origin.to_owned(), x_end, x_res.to_owned());
let y_coords = Array::range(y_origin.to_owned(), y_end, y_res.to_owned());

Ok((x_coords, y_coords))
}
}

/// Synchronously read a GeoTIFF file into an [`ndarray::Array`]
Expand Down
15 changes: 14 additions & 1 deletion src/python/adapters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::io::Cursor;

use bytes::Bytes;
use ndarray::Array3;
use numpy::{PyArray3, ToPyArray};
use numpy::{PyArray1, PyArray3, ToPyArray};
use object_store::{parse_url, ObjectStore};
use pyo3::exceptions::{PyBufferError, PyFileNotFoundError, PyValueError};
use pyo3::prelude::{pyclass, pyfunction, pymethods, pymodule, PyModule, PyResult, Python};
Expand Down Expand Up @@ -68,6 +68,19 @@ impl PyCogReader {
// Convert from ndarray (Rust) to numpy ndarray (Python)
Ok(array_data.to_pyarray_bound(py))
}

/// Get x and y coordinates as numpy.ndarray
fn xy_coords<'py>(
&mut self,
py: Python<'py>,
) -> PyResult<(Bound<'py, PyArray1<f64>>, Bound<'py, PyArray1<f64>>)> {
let (x_coords, y_coords) = self
.inner
.xy_coords()
.map_err(|err| PyValueError::new_err(err.to_string()))?;

Ok((x_coords.to_pyarray_bound(py), y_coords.to_pyarray_bound(py)))
}
}

/// Read from a filepath or url into a byte stream
Expand Down
Loading