Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Example: Converting GeoTIFF to GeoZarr with Overviews and Metadata #42

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 132 additions & 0 deletions examples/geozarr_with_overviews.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""Create a GeoTIFF, convert to a premature(!) "GeoZarr" with pyramid overviews."""

# This is a toy problem to demonstrate the creation of a GeoTIFF and the conversion to a Zarr store with overviews.

import shutil

import affine
import numpy as np
import pandas as pd
import rasterio
import rioxarray
import xarray as xr


def get_some_test_data(bands, height, width):
"""Create some test data."""
data = np.zeros((bands, height, width), dtype=np.float32)
for b in range(bands):
random_value = np.random.rand()
data[b] = np.sin(np.linspace(0, np.pi, height)[:, None] * random_value)
return data


def save_to_geotiff(
data: np.ndarray, transform: affine.Affine, filename: str = "some.tif", overviews: list[int] = None
) -> None:
"""Save data to a GeoTIFF file."""
kwargs = {
"driver": "GTiff",
"count": data.shape[0],
"height": data.shape[1],
"width": data.shape[2],
"dtype": rasterio.float32,
"crs": "EPSG:6933",
"transform": transform,
"compress": "DEFLATE",
}

with rasterio.open(filename, "w", **kwargs) as dst:
dst.write(data)
overviews = overviews
dst.build_overviews(overviews, resampling=rasterio.enums.Resampling.average)
dst.update_tags(ns="rio_overview", resampling="average")


def add_geozarr_metadata(
da: xr.DataArray, variable_name: str = "reflectance", crs: str = "EPSG:6933", overviews: list[int] = None
):
"""Add GeoZarr-specific metadata and CF conventions to an xarray DataArray.

Based on https://github.com/zarr-developers/geozarr-spec/blob/b9f4ad3/geozarr-spec.md.

NEEDS REVIEWS!!

Args:
da: xarray DataArray to add metadata to.
variable_name: Standard name of the variable for CF 'standard_name' attribute.
crs: Coordinate Reference System in EPSG code.
overviews: aggregation overviews
"""
# _ARRAY_DIMENSIONS: Specifies the dimension names in the order they appear.
da.attrs["_ARRAY_DIMENSIONS"] = list(da.dims)

# Climate and Forecast (CF) Attributes
# CF standard name for the physical quantity.
da.attrs["standard_name"] = variable_name
# Points to the variable containing CRS info.
da.attrs["grid_mapping"] = "spatial_ref"

# Example of adding grid_mapping info directly in the DataArray
da["spatial_ref"] = xr.DataArray(
0,
attrs={
"grid_mapping_name": "transverse_mercator",
"epsg_code": crs,
"semi_major_axis": 6378137,
"inverse_flattening": 298.257223563,
},
)

# Multiscales Metadata
# This is a placeholder showing where multiscale metadata could be added.
da.attrs["multiscales"] = [
{
"datasets": [{"path": f"overview_{overview}", "overview": str(overview)} for overview in overviews],
"type": "overview",
"version": "1.0",
}
]

# Additional CF Attributes for enhanced description and interoperability
# Assuming reflectance is unitless; adjust based on actual variable.
da.attrs["units"] = "1"
da.attrs["scale_overview"] = 1.0
da.attrs["add_offset"] = 0.0

# Including documentation and descriptive metadata
da.attrs["long_name"] = f"{variable_name} measured as reflectance"
da.attrs["comment"] = "Generated for demonstration purposes"

# Versioning information for tracking dataset updates
da.attrs["version"] = "1.0.0"
da.attrs["history"] = f"Created on {pd.Timestamp.now().strftime('%Y-%m-%d')}"


def to_pyramid_zarr(da: xr.DataArray, zarr_file: str, overviews: list[int]):
"""Convert dataarray to coarsened dataset."""
shutil.rmtree(zarr_file, ignore_errors=True)
ds = da.to_dataset(name="data")
add_geozarr_metadata(ds, overviews=overviews)
ds.to_zarr(zarr_file, mode="w")

for overview in overviews:
da_coarsened = da.coarsen(x=overview, y=overview, boundary="trim").mean()
# Do we need to copy metadata?
da_coarsened.attrs = da.attrs # Copy metadata
add_geozarr_metadata(da_coarsened, overviews=overviews)
group_path = f"/overview_{overview}"
# Add coarsened data to the zarr store
da_coarsened.to_dataset(name="data").to_zarr(zarr_file, mode="a", group=group_path)


if __name__ == "__main__":
overviews = [2, 4, 8, 16, 32, 64]
width, height, bands = 8192, 8192, 3
# create affine transformation
transform = affine.Affine(10.0, 0.0, 400000.0, 0.0, -10.0, 6000000.0)
data = get_some_test_data(bands, height, width)
tif_filename = "some.tif"
save_to_geotiff(data, transform, filename=tif_filename, overviews=overviews)
da = rioxarray.open_rasterio(tif_filename, masked=True)
to_pyramid_zarr(da, "multiscale_zarr", overviews)
66 changes: 66 additions & 0 deletions examples/tif_to_zarr_with_crs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Create a GeoTIFF and convert it to a Zarr file with CRS metadata, that gets read by QGIS with GDAL 3.8+."""

# (`conda install qgis; qgis` to get qgis with GDAL 3.8+)."""
# There's an open issue to remove `attrs` in Zarr V3, though GDAL's behaviour currently relies on attrs https://github.com/zarr-developers/zarr-python/issues/1624

import affine
import numpy as np
import rasterio
import rioxarray as rxr


def get_some_test_data(bands: int, height: int, width: int) -> np.ndarray:
"""Create some test data."""
data = np.zeros((bands, height, width), dtype=np.float32)
for b in range(bands):
random_value = np.random.rand()
data[b] = np.sin(np.linspace(0, np.pi, height)[:, None] * random_value)
return data


def save_to_geotiff(data: np.ndarray, transform: affine.Affine, filename: str = "some.tif") -> None:
"""Save data to a GeoTIFF file."""
kwargs = {
"driver": "GTiff",
"count": data.shape[0],
"height": data.shape[1],
"width": data.shape[2],
"dtype": rasterio.float32,
"crs": "EPSG:6933",
"transform": transform,
"compress": "DEFLATE",
}

with rasterio.open(filename, "w", **kwargs) as dst:
dst.write(data)


def tif_to_crs_zarr(tif_path: str, zarr_path: str, x: str = "x", y: str = "y") -> None:
"""Convert a GeoTIFF file to a Zarr file with valid CRS metadata."""
da = rxr.open_rasterio(tif_path)
name = tif_path.split(".")[0].split("/")[-1]
ds = da.to_dataset(name=name)
# get the epsg code from the tif file
epsg = ds.rio.crs.to_epsg()

# Once your data goes beyond X, Y and 1 data variable,
# qgis let's you choose which variable to open, as it inherits gdal behavior:
# https://gdal.org/drivers/raster/zarr.html#particularities-of-the-classic-raster-api
ds = ds.drop_vars(["band", "spatial_ref"], errors="ignore")
# renamed x and y to X and Y for gdal to automatically recognize them as spatial dimensions
ds = ds.rename({x: "X", y: "Y"})
# add CRS to the dataset following the gdal implementation
ds.some.attrs["_CRS"] = {"url": f"http://www.opengis.net/def/crs/EPSG/0/{epsg}"}
ds.to_zarr(f"{name}.zarr", mode="w", consolidated=True)


if __name__ == "__main__":
width, height, bands = 8192, 8192, 3
# create affine transformation
transform = affine.Affine(10.0, 0.0, 400000.0, 0.0, -10.0, 6000000.0)
# create some test data
data = get_some_test_data(bands, height, width)
tif_filename = "some.tif"
save_to_geotiff(data, transform, filename=tif_filename)
# convert tif to zarr
tif_to_crs_zarr(tif_filename, "some.zarr", x="x", y="y")