Skip to content

Commit

Permalink
Merge pull request #452 from databrickslabs/python/fix/raster-tests
Browse files Browse the repository at this point in the history
Python/fix/raster tests
  • Loading branch information
Milos Colic authored Nov 10, 2023
2 parents 7cf8f49 + 40b80c0 commit fe15127
Show file tree
Hide file tree
Showing 37 changed files with 363 additions and 137 deletions.
2 changes: 1 addition & 1 deletion .github/actions/python_build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ runs:
shell: bash
run: |
cd python
pip install build wheel pyspark==${{ matrix.spark }}
pip install build wheel pyspark==${{ matrix.spark }} numpy==${{ matrix.numpy }}
pip install .
- name: Test and build python package
shell: bash
Expand Down
21 changes: 12 additions & 9 deletions .github/actions/scala_build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,21 @@ runs:
with:
java-version: '8'
distribution: 'zulu'
- name: Configure python interpreter
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
- name: Add packaged GDAL dependencies
shell: bash
run : |
sudo apt-get update && sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7
pip install databricks-mosaic-gdal==${{ matrix.gdal }}
sudo tar -xf /opt/hostedtoolcache/Python/${{ matrix.python }}/x64/lib/python3.9/site-packages/databricks-mosaic-gdal/resources/gdal-${{ matrix.gdal }}-filetree.tar.xz -C /
sudo tar -xhf /opt/hostedtoolcache/Python/${{ matrix.python }}/x64/lib/python3.9/site-packages/databricks-mosaic-gdal/resources/gdal-${{ matrix.gdal }}-symlinks.tar.xz -C /
- name: Test and build the scala JAR - skip tests is false
if: inputs.skip_tests == 'false'
shell: bash
run: |
pip install databricks-mosaic-gdal==3.4.3
sudo tar -xf /home/runner/.local/lib/python3.8/site-packages/databricks-mosaic-gdal/resources/gdal-3.4.3-filetree.tar.xz -C /
sudo tar -xhf /home/runner/.local/lib/python3.8/site-packages/databricks-mosaic-gdal/resources/gdal-3.4.3-symlinks.tar.xz -C /
sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable
sudo apt clean && sudo apt -o Acquire::Retries=3 update --fix-missing -y
sudo apt-get -o Acquire::Retries=3 update -y
sudo apt-get -o Acquire::Retries=3 install -y gdal-bin=3.4.3+dfsg-1~focal0 libgdal-dev=3.4.3+dfsg-1~focal0 python3-gdal=3.4.3+dfsg-1~focal0
sudo mvn -q clean install
run: sudo mvn -q clean install
- name: Build the scala JAR - skip tests is true
if: inputs.skip_tests == 'true'
shell: bash
Expand Down
14 changes: 8 additions & 6 deletions .github/workflows/build_main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ name: build main
on:
push:
branches-ignore:
- "R/*"
- "r/*"
- "python/*"
- "scala/*"
- "R/**"
- "r/**"
- "python/**"
- "scala/**"
pull_request:
branches:
- "**"
Expand All @@ -16,8 +16,10 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.9 ]
spark: [ 3.2.1 ]
python: [ 3.9.5 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.3 ]
spark: [ 3.3.2 ]
R: [ 4.1.2 ]
steps:
- name: checkout code
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/build_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: build_python
on:
push:
branches:
- "python/*"
- "python/**"

jobs:
build:
Expand All @@ -12,8 +12,10 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.9 ]
spark: [ 3.2.1 ]
python: [ 3.9.5 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.3 ]
spark: [ 3.3.2 ]
R: [ 4.1.2 ]
steps:
- name: checkout code
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/build_r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ name: build_R
on:
push:
branches:
- 'r/*'
- 'R/*'
- 'r/**'
- 'R/**'

jobs:
build:
Expand All @@ -13,8 +13,10 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.9 ]
spark: [ 3.2.1 ]
python: [ 3.9.5 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.3 ]
spark: [ 3.3.2 ]
R: [ 4.1.2 ]
steps:
- name: checkout code
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/build_scala.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: build_scala
on:
push:
branches:
- "scala/"
- "scala/**"

jobs:
build:
Expand All @@ -11,8 +11,10 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.9 ]
spark: [ 3.2.1 ]
python: [ 3.9.5 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.3 ]
spark: [ 3.3.2 ]
R: [ 4.1.2 ]
steps:
- name: checkout code
Expand Down
8 changes: 2 additions & 6 deletions python/mosaic/api/aggregators.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,7 @@ def st_intersection_aggregate(
)


def st_intersection_agg(
leftIndex: ColumnOrName, rightIndex: ColumnOrName
) -> Column:
def st_intersection_agg(leftIndex: ColumnOrName, rightIndex: ColumnOrName) -> Column:
"""
Computes the intersection of all `leftIndex` : `rightIndex` pairs
and unions these to produce a single geometry.
Expand Down Expand Up @@ -100,9 +98,7 @@ def st_intersects_aggregate(
)


def st_intersects_agg(
leftIndex: ColumnOrName, rightIndex: ColumnOrName
) -> Column:
def st_intersects_agg(leftIndex: ColumnOrName, rightIndex: ColumnOrName) -> Column:
"""
Tests if any `leftIndex` : `rightIndex` pairs intersect.
Expand Down
28 changes: 0 additions & 28 deletions python/mosaic/api/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
"st_convexhull",
"st_buffer",
"st_bufferloop",
"st_buffer_cap_style",
"st_dump",
"st_envelope",
"st_srid",
Expand Down Expand Up @@ -208,33 +207,6 @@ def st_bufferloop(
)


def st_buffer_cap_style(geom: ColumnOrName, radius: ColumnOrName, cap_style: ColumnOrName) -> Column:
"""
Compute the buffered geometry based on geom and radius.
Parameters
----------
geom : Column
The input geometry
radius : Column
The radius of buffering
cap_style : Column
The cap style of the buffer
Returns
-------
Column
A geometry
"""
return config.mosaic_context.invoke_function(
"st_buffer_cap_style",
pyspark_to_java_column(geom),
pyspark_to_java_column(radius),
pyspark_to_java_column(cap_style)
)


def st_dump(geom: ColumnOrName) -> Column:
"""
Explodes a multi-geometry into one row per constituent geometry.
Expand Down
7 changes: 2 additions & 5 deletions python/mosaic/api/gdal.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,11 @@ def setup_gdal(
-------
"""
sc = spark.sparkContext
mosaicContextClass = getattr(
sc._jvm.com.databricks.labs.mosaic.functions, "MosaicContext"
mosaicGDALObject = getattr(
spark.sparkContext._jvm.com.databricks.labs.mosaic.gdal, "MosaicGDAL"
)
mosaicGDALObject = getattr(sc._jvm.com.databricks.labs.mosaic.gdal, "MosaicGDAL")
mosaicGDALObject.prepareEnvironment(spark._jsparkSession, init_script_path)
print("GDAL setup complete.\n")
print(f"Shared objects (*.so) stored in: {shared_objects_path}.\n")
print(f"Init script stored in: {init_script_path}.\n")
print(
"Please restart the cluster with the generated init script to complete the setup.\n"
Expand Down
53 changes: 32 additions & 21 deletions python/mosaic/api/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,29 @@
"rst_boundingbox",
"rst_clip",
"rst_combineavg",
"rst_fromfile",
"rst_frombands",
"rst_fromfile",
"rst_georeference",
"ret_getnodata",
"rst_getnodata",
"rst_getsubdataset",
"rst_height",
"rst_isempty",
"rst_initnodata",
"rst_isempty",
"rst_memsize",
"rst_metadata",
"rst_merge",
"rst_numbands",
"rst_metadata",
"rst_ndvi",
"rst_numbands",
"rst_pixelheight",
"rst_pixelwidth",
"rst_rastertogridavg",
"rst_rastertogridcount",
"rst_rastertogridmax",
"rst_rastertogridmin",
"rst_rastertogridmedian",
"rst_rastertoworldcoord",
"rst_rastertogridmin",
"rst_rastertoworldcoordx",
"rst_rastertoworldcoordy",
"rst_rastertoworldcoord",
"rst_retile",
"rst_rotation",
"rst_scalex",
Expand All @@ -45,17 +45,17 @@
"rst_skewy",
"rst_srid",
"rst_subdatasets",
"rst_summary",
"rst_subdivide",
"rst_summary",
"rst_tessellate",
"rst_to_overlapping_tiles",
"rst_tryopen",
"rst_upperleftx",
"rst_upperlefty",
"rst_width",
"rst_worldtorastercoord",
"rst_worldtorastercoordx",
"rst_worldtorastercoordy",
"rst_worldtorastercoord",
]


Expand Down Expand Up @@ -172,7 +172,7 @@ def rst_georeference(raster: ColumnOrName) -> Column:
)


def ret_getnodata(raster: ColumnOrName) -> Column:
def rst_getnodata(raster: ColumnOrName) -> Column:
"""
Returns the nodata value of the band.
Expand All @@ -190,7 +190,7 @@ def ret_getnodata(raster: ColumnOrName) -> Column:
"""
return config.mosaic_context.invoke_function(
"ret_getnodata", pyspark_to_java_column(raster)
"rst_getnodata", pyspark_to_java_column(raster)
)


Expand Down Expand Up @@ -253,8 +253,7 @@ def rst_initnodata(raster: ColumnOrName) -> Column:
"""
return config.mosaic_context.invoke_function(
"rst_initnodata",
pyspark_to_java_column(raster)
"rst_initnodata", pyspark_to_java_column(raster)
)


Expand Down Expand Up @@ -897,13 +896,16 @@ def rst_fromfile(raster: ColumnOrName, sizeInMB: ColumnOrName) -> Column:
"""

return config.mosaic_context.invoke_function(
"rst_fromfile",
pyspark_to_java_column(raster),
pyspark_to_java_column(sizeInMB)
"rst_fromfile", pyspark_to_java_column(raster), pyspark_to_java_column(sizeInMB)
)


def rst_to_overlapping_tiles(raster: ColumnOrName, width: ColumnOrName, height: ColumnOrName, overlap: ColumnOrName) -> Column:
def rst_to_overlapping_tiles(
raster: ColumnOrName,
width: ColumnOrName,
height: ColumnOrName,
overlap: ColumnOrName,
) -> Column:
"""
Tiles the raster into tiles of the given size.
:param raster:
Expand All @@ -916,7 +918,7 @@ def rst_to_overlapping_tiles(raster: ColumnOrName, width: ColumnOrName, height:
pyspark_to_java_column(raster),
pyspark_to_java_column(width),
pyspark_to_java_column(height),
pyspark_to_java_column(overlap)
pyspark_to_java_column(overlap),
)


Expand Down Expand Up @@ -1048,7 +1050,10 @@ def rst_worldtorastercoord(
"""
return config.mosaic_context.invoke_function(
"rst_worldtorastercoord", pyspark_to_java_column(raster)
"rst_worldtorastercoord",
pyspark_to_java_column(raster),
pyspark_to_java_column(x),
pyspark_to_java_column(y),
)


Expand All @@ -1074,7 +1079,10 @@ def rst_worldtorastercoordx(
"""
return config.mosaic_context.invoke_function(
"rst_worldtorastercoordx", pyspark_to_java_column(raster)
"rst_worldtorastercoordx",
pyspark_to_java_column(raster),
pyspark_to_java_column(x),
pyspark_to_java_column(y),
)


Expand All @@ -1100,5 +1108,8 @@ def rst_worldtorastercoordy(
"""
return config.mosaic_context.invoke_function(
"rst_worldtorastercoordy", pyspark_to_java_column(raster)
"rst_worldtorastercoordy",
pyspark_to_java_column(raster),
pyspark_to_java_column(x),
pyspark_to_java_column(y),
)
1 change: 1 addition & 0 deletions python/mosaic/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
display_handler: DisplayHandler
ipython_hook: InteractiveShell
notebook_utils = None
default_gdal_init_script_path: str = "/dbfs/FileStore/geospatial/mosaic/gdal/"
4 changes: 1 addition & 3 deletions python/mosaic/core/mosaic_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,7 @@ def __init__(self, spark: SparkSession):
IndexSystem = self._indexSystemFactory.getIndexSystem(self._index_system)
GeometryAPIClass = getattr(self._mosaicPackageObject, self._geometry_api)

self._context = self._mosaicContextClass.build(
IndexSystem, GeometryAPIClass()
)
self._context = self._mosaicContextClass.build(IndexSystem, GeometryAPIClass())

def invoke_function(self, name: str, *args: Any) -> MosaicColumn:
func = getattr(self._context.functions(), name)
Expand Down
6 changes: 5 additions & 1 deletion python/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,14 @@ classifiers =
[options]
packages = find:
python_requires = >=3.7.0
setup_requires =
pyspark==3.3.2
ipython>=7.22.0

install_requires =
keplergl==0.3.2
h3==3.7.3
ipython>=7.22.0
gdal[numpy]==3.4.3

[options.package_data]
mosaic =
Expand Down
Binary file not shown.
Loading

0 comments on commit fe15127

Please sign in to comment.