Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WPSDataset: Wraps GDAL dataset and provides common wps operations #4010

Merged
merged 18 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
"allclose",
"anyio",
"APCP",
"astype",
"asyncpg",
"autoflush",
"Behaviour",
Expand All @@ -69,6 +70,7 @@
"cutline",
"CWFIS",
"determinates",
"dtype",
"excinfo",
"fastapi",
"FBAN",
Expand Down Expand Up @@ -111,6 +113,7 @@
"PRIMEM",
"PROJCS",
"pydantic",
"pyproj",
"RDPS",
"reduxjs",
"reproject",
Expand Down
8 changes: 4 additions & 4 deletions api/app/geospatial.py → api/app/geospatial/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
""" Geospatial things
"""
"""Module for geospatial logic"""

from typing import Final
from pyproj import CRS

Expand All @@ -8,7 +8,7 @@
# BCGOV standard is to store everything in NAD83 / BC Albers (EPSG:3005).
NAD83_BC_ALBERS: Final = 3005
# NAD 83 alone (EPSG:4269), uses geographic coordinates.
NAD83: Final = 'epsg:4269'
NAD83: Final = "epsg:4269"
NAD83_CRS: Final = CRS(NAD83)
# De facto standard is to expose data in WGS84 (EPSG:4326).
WGS84: Final = 'epsg:4326'
WGS84: Final = "epsg:4326"
210 changes: 210 additions & 0 deletions api/app/geospatial/wps_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
from typing import Optional
from osgeo import gdal, osr
import numpy as np

from app.utils.geospatial import GDALResamplingMethod


class WPSDataset:
"""
A wrapper around gdal datasets for common operations
"""

def __init__(self, ds_path: Optional[str], ds=None, band=1, chunk_size=256, access=gdal.GA_ReadOnly, datatype=gdal.GDT_Byte):
self.ds = ds
self.ds_path = ds_path
self.band = band
self.chunk_size = chunk_size
self.access = access
self.datatype = datatype

def __enter__(self):
if self.ds is None:
self.ds: gdal.Dataset = gdal.Open(self.ds_path, self.access)
dgboss marked this conversation as resolved.
Show resolved Hide resolved
return self

def __exit__(self, *_):
self.ds = None

def __mul__(self, other):
"""
Multiplies this WPSDataset with the other WPSDataset

:param other: WPSDataset
:raises ValueError: Raised if this and other WPSDataset have mismatched raster dimensions
:return: a new WPSDataset
"""
# Get raster dimensions
x_size = self.ds.RasterXSize
y_size = self.ds.RasterYSize

# Check if the dimensions of both rasters match
if x_size != other.ds.RasterXSize or y_size != other.ds.RasterYSize:
conbrad marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError("The dimensions of the two rasters do not match.")

# Get the geotransform and projection from the first raster
geotransform = self.ds.GetGeoTransform()
projection = self.ds.GetProjection()

# Check if projection matches
if projection != other.ds.GetProjection():
raise ValueError("The projections of the two rasters do not match.")

# Check if origin matches
if geotransform[0] != other.ds.GetGeoTransform()[0] or geotransform[3] != other.ds.GetGeoTransform()[3]:
raise ValueError("The origins of the two rasters do not match.")

# Create the output raster
driver: gdal.Driver = gdal.GetDriverByName("MEM")
out_ds: gdal.Dataset = driver.Create("memory", x_size, y_size, 1, self.datatype)

# Set the geotransform and projection
out_ds.SetGeoTransform(geotransform)
out_ds.SetProjection(projection)

self_band: gdal.Band = self.ds.GetRasterBand(self.band)
other_band: gdal.Band = other.ds.GetRasterBand(self.band)

# Process in chunks
for y in range(0, y_size, self.chunk_size):
y_chunk_size = min(self.chunk_size, y_size - y)

for x in range(0, x_size, self.chunk_size):
x_chunk_size = min(self.chunk_size, x_size - x)

# Read chunks from both rasters
self_chunk = self_band.ReadAsArray(x, y, x_chunk_size, y_chunk_size)
other_chunk = other_band.ReadAsArray(x, y, x_chunk_size, y_chunk_size)
wider_type = np.promote_types(self_chunk.dtype, other_chunk.dtype)

self_chunk = self_chunk.astype(wider_type)
other_chunk = other_chunk.astype(wider_type)

other_chunk[other_chunk >= 1] = 1
other_chunk[other_chunk < 1] = 0

# Multiply the chunks
self_chunk *= other_chunk

# Write the result to the output raster
out_ds.GetRasterBand(self.band).WriteArray(self_chunk, x, y)
self_chunk = None
other_chunk = None

return WPSDataset(ds_path=None, ds=out_ds)

def warp_to_match(self, other, output_path: str, resample_method: GDALResamplingMethod = GDALResamplingMethod.NEAREST_NEIGHBOUR):
"""
Warp the dataset to match the extent, pixel size, and projection of the other dataset.

:param ds_to_match: the reference dataset raster to match the source against
:param output_path: output path of the resulting raster
:param resample_method: gdal resampling algorithm
:return: warped raster dataset
"""
source_geotransform = other.ds.GetGeoTransform()
x_res = source_geotransform[1]
y_res = -source_geotransform[5]
minx = source_geotransform[0]
maxy = source_geotransform[3]
maxx = minx + source_geotransform[1] * other.ds.RasterXSize
miny = maxy + source_geotransform[5] * other.ds.RasterYSize
extent = [minx, miny, maxx, maxy]

# Warp to match input option parameters
warped_ds = gdal.Warp(
output_path,
self.ds,
options=gdal.WarpOptions(
dstSRS=other.ds.GetProjection(),
outputBounds=extent,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe @brettedw can chime in, but I think there are some subtleties we need to be cautious of here when setting the extent and xRes/yRes in WarpOptions.

If we're warping a raster with a geographic coordinate system (think coordinates are lat/lon in degrees) to a projected coordinate system (coordinates are in feet or meters as measured from an arbitrary point) and vice versa, we're going to run into problems I think.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, this was pulled over from

return gdal.Warp(output_path, source_ds, dstSRS=ds_to_match.GetProjection(), outputBounds=extent, xRes=x_res, yRes=y_res, resampleAlg=resample_method.value)
that we currently use.

Copy link
Collaborator

@dgboss dgboss Oct 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I didn't notice the issue with the original implementation. I caught it as I was generating those test files. I created the first test file in EPSG4326 which uses lat/lon coordinates and has x and y resolution measured in degrees. I basically used the warp_to_match code to then create the EPSG3857 test file but I kept getting errors about not having enough disk space. The problem was that I was taking the x/y res in degrees which was very small (0.003333333333067) and passing that in to gdal.Warp which then tries to create an output raster where the x/y resolution is 0.003333333333067 metres!
Additionally, I ran into problems with the 4326 extent not making sense in the 3857 spatial reference.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to do a quick test and see if specifying a source spatial reference works around the issue.

Copy link
Collaborator

@dgboss dgboss Oct 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Boo...specifying the srcSRS WarpOption doesn't help.
ERROR 3: 3005_lats.tif: Free disk space available is 478468026368 bytes, whereas 635088851384008 are at least necessary.

Copy link
Collaborator

@brettedw brettedw Oct 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Riiiiiight, ya that's an interesting one. Gonna have to think about a solution to that one. Maybe for now we can just implement some sort of check to see if both datasets are in geographic or projected. My brain can't do that right now but maybe we have something like this to use as a check (I don't know if this works, came from chatgippity):

def is_geographic(crs_wkt: str) -> bool:
    """
    Check if the given CRS (WKT string) is geographic (i.e., using degrees).
    :param crs_wkt: The WKT string of the CRS.
    :return: True if geographic, False if projected.
    """
    srs = osr.SpatialReference()
    srs.ImportFromWkt(crs_wkt)
    return srs.IsGeographic()

def get_crs_units(crs_wkt: str) -> str:
    """
    Get the units of the CRS (either 'degrees' for geographic, or 'meters/feet' for projected).
    :param crs_wkt: The WKT string of the CRS.
    :return: The units of the CRS ('degrees', 'meters', 'feet', etc.)
    """
    srs = osr.SpatialReference()
    srs.ImportFromWkt(crs_wkt)
    
    if srs.IsGeographic():
        return 'degrees'
    else:
        return srs.GetLinearUnitsName()

Otherwise maybe we have to do some units conversion of meters to degrees? Need to think on that one

Copy link
Collaborator

@dgboss dgboss Oct 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I think I figured out one path forward, but we might want to dev chat next week. Right now when this warps, we don't specify the dimensions of the array. We can use height and width parameters in WarpOptions and set those to self.band.YSize and self.band.XSize. Also, we can keep the outputBounds and pass in outputBoundsSRS. Untested as of yet, but my brain is done.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My brain is working again (sort of). Specifying the number of pixels for height and width might not be a good idea afterall. I think it could lead to non-square pixels when re-projecting.

xRes=x_res,
yRes=y_res,
resampleAlg=resample_method.value,
),
)
return WPSDataset(ds_path=None, ds=warped_ds)

def replace_nodata_with(self, new_no_data_value: int):
"""
Modifies the dataset inplace by replacing the nodata value with the supplied one

:param new_no_data_value: the new nodata value
"""
band: gdal.Band = self.ds.GetRasterBand(self.band)
nodata_value = band.GetNoDataValue()
array = band.ReadAsArray()

if nodata_value is not None:
modified_array = np.where(array != nodata_value, array, new_no_data_value)
band.WriteArray(modified_array)
band.SetNoDataValue(new_no_data_value)
band.FlushCache()

def generate_latitude_array(self):
"""
Transforms this dataset to 4326 to compute the latitude coordinates

:return: array of latitude coordinates
"""
geotransform = self.ds.GetGeoTransform()
projection = self.ds.GetProjection()

src_srs = osr.SpatialReference()
src_srs.ImportFromWkt(projection)

x_size = self.ds.RasterXSize
y_size = self.ds.RasterYSize

tgt_srs = osr.SpatialReference()
tgt_srs.ImportFromEPSG(4326)

transform = osr.CoordinateTransformation(src_srs, tgt_srs)

# empty array to store latitude values
latitudes = np.zeros((y_size, x_size))

for y in range(y_size):
for x in range(x_size):
x_coord = geotransform[0] + x * geotransform[1] + y * geotransform[2]
y_coord = geotransform[3] + x * geotransform[4] + y * geotransform[5]

_, lat, _ = transform.TransformPoint(x_coord, y_coord)

latitudes[y, x] = lat

return latitudes

def export_to_geotiff(self, output_path):
"""
Exports the dataset to a geotiff with the given path

:param output_path: path to export the geotiff to
"""
driver: gdal.Driver = gdal.GetDriverByName("GTiff")

geotransform = self.ds.GetGeoTransform()
projection = self.ds.GetProjection()

band: gdal.Band = self.ds.GetRasterBand(self.band)
nodata_value = band.GetNoDataValue()
array = band.ReadAsArray()

rows, cols = array.shape
output_dataset: gdal.Dataset = driver.Create(output_path, cols, rows, 1, self.datatype)
output_dataset.SetGeoTransform(geotransform)
output_dataset.SetProjection(projection)

output_band: gdal.Band = output_dataset.GetRasterBand(self.band)
output_band.WriteArray(array)
if nodata_value is not None:
output_band.SetNoDataValue(nodata_value)

output_band.FlushCache()
output_dataset = None
del output_dataset
output_band = None
del output_band

def as_gdal_ds(self) -> gdal.Dataset:
return self.ds
Binary file added api/app/tests/geospatial/3005_lats.tif
Binary file not shown.
Binary file added api/app/tests/geospatial/4326_lats.tif
Binary file not shown.
Binary file not shown.
Loading
Loading