diff --git a/.jvmopts b/.jvmopts new file mode 100644 index 000000000..d5d677f51 --- /dev/null +++ b/.jvmopts @@ -0,0 +1 @@ +-Djts.overlay=ng \ No newline at end of file diff --git a/modules/python/gdal_package/databricks-mosaic-gdal/resources/scripts/mosaic-gdal-3.4.3-filetree-init.sh b/modules/python/gdal_package/databricks-mosaic-gdal/resources/scripts/mosaic-gdal-3.4.3-filetree-init.sh index 8d90034e3..56f8bdc11 100644 --- a/modules/python/gdal_package/databricks-mosaic-gdal/resources/scripts/mosaic-gdal-3.4.3-filetree-init.sh +++ b/modules/python/gdal_package/databricks-mosaic-gdal/resources/scripts/mosaic-gdal-3.4.3-filetree-init.sh @@ -2,12 +2,14 @@ # # File: mosaic-gdal-3.4.3-filetree-init.sh # Author: Michael Johns -# Modified: 2023-03-21 +# Modified: 2023-07-23 +# +# !!! FOR DBR 11.x and 12.x ONLY [Ubuntu 20.04] !!! +# !!! NOT for DBR 13.x [Ubuntu 22.04] !!! +# # 1. script is using custom tarballs for offline / self-contained install of GDAL # 2. This will unpack files directly into the filetree across cluster nodes (vs run apt install) -# 3. Note: Mosaic will be able to auto-detect and handle tarball unpacking -# without this init script on/around APR 2023 (so this is an alt to that capability) - +# # -- install databricks-mosaic-gdal on cluster # - from pypi.org (once available) pip install databricks-mosaic-gdal==3.4.3 diff --git a/src/main/scala/com/databricks/labs/mosaic/core/index/H3IndexSystem.scala b/src/main/scala/com/databricks/labs/mosaic/core/index/H3IndexSystem.scala index d48ce3191..79ba98390 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/index/H3IndexSystem.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/index/H3IndexSystem.scala @@ -3,7 +3,7 @@ package com.databricks.labs.mosaic.core.index import com.databricks.labs.mosaic.core.geometry.MosaicGeometry import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.types.model.Coordinates -import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum.POLYGON +import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum.{LINESTRING, POLYGON} import com.uber.h3core.H3Core import com.uber.h3core.util.GeoCoord import org.apache.spark.sql.types.LongType @@ -11,6 +11,7 @@ import org.apache.spark.unsafe.types.UTF8String import org.locationtech.jts.geom.Geometry import scala.collection.JavaConverters._ +import scala.collection.mutable import scala.util.{Success, Try} /** @@ -93,10 +94,9 @@ object H3IndexSystem extends IndexSystem(LongType) with Serializable { override def indexToGeometry(index: Long, geometryAPI: GeometryAPI): MosaicGeometry = { val boundary = h3.h3ToGeoBoundary(index).asScala val extended = boundary ++ List(boundary.head) - geometryAPI.geometry( - extended.map(p => geometryAPI.fromGeoCoord(Coordinates(p.lat, p.lng))), - POLYGON - ) + + if (crossesNorthPole(index) || crossesSouthPole(index)) makePoleGeometry(boundary, crossesNorthPole(index), geometryAPI) + else makeSafeGeometry(extended, geometryAPI) } /** @@ -198,10 +198,9 @@ object H3IndexSystem extends IndexSystem(LongType) with Serializable { override def indexToGeometry(index: String, geometryAPI: GeometryAPI): MosaicGeometry = { val boundary = h3.h3ToGeoBoundary(index).asScala val extended = boundary ++ List(boundary.head) - geometryAPI.geometry( - extended.map(p => geometryAPI.fromGeoCoord(Coordinates(p.lat, p.lng))), - POLYGON - ) + + if (crossesNorthPole(index) || crossesSouthPole(index)) makePoleGeometry(boundary, crossesNorthPole(index), geometryAPI) + else makeSafeGeometry(extended, geometryAPI) } override def format(id: Long): String = { @@ -227,4 +226,172 @@ object H3IndexSystem extends IndexSystem(LongType) with Serializable { override def distance(cellId: Long, cellId2: Long): Long = Try(h3.h3Distance(cellId, cellId2)).map(_.toLong).getOrElse(0) + // Find all cells that cross the north pole. There always is exactly one cell per resolution. + private lazy val northPoleCells = Range.inclusive(0, 15).map(h3.geoToH3(90, 0, _)) + + // Find all cells that cross the south pole. There always is exactly one cell per resolution. + private lazy val southPoleCells = Range.inclusive(0, 15).map(h3.geoToH3(-90, 0, _)) + + private def crossesNorthPole(cell_id: Long): Boolean = northPoleCells contains cell_id + private def crossesSouthPole(cell_id: Long): Boolean = southPoleCells contains cell_id + private def crossesNorthPole(cell_id: String): Boolean = northPoleCells contains h3.stringToH3(cell_id) + private def crossesSouthPole(cell_id: String): Boolean = southPoleCells contains h3.stringToH3(cell_id) + + /** + * Check if H3 cell crosses the anti-meridian. This check is not + * generalizable for arbitrary polygons. + * @param geometry + * H3 Geometry to be checked. + * @return + * boolean True if the geometry crosses the anti-meridian, false + * otherwise. + */ + private def crossesAntiMeridian(geometry: MosaicGeometry): Boolean = { + val minX = geometry.minMaxCoord("X", "MIN") + val maxX = geometry.minMaxCoord("X", "MAX") + minX < 0 && maxX >= 0 && ((maxX - minX > 180) || !geometry.isValid) + } + + /** + * Shift point that falls into the western hemisphere by 360 degrees to the + * east. + * @param lng + * Longitude of the point to be shifted. + * @param lat + * Latitude of the point to be shifted. + * @return + * Shifted point. + */ + private def shiftEast(lng: Double, lat: Double): (Double, Double) = { + if (lng < 0) (lng + 360.0, lat) + else (lng, lat) + } + + /** + * Shift point that lie east of the eastern hemisphere by 360 degrees to + * the west. + * @param lng + * Longitude of the point to be shifted. + * @param lat + * Latitude of the point to be shifted. + * @return + * Shifted point. + */ + private def shiftWest(lng: Double, lat: Double): (Double, Double) = { + if (lng >= 180.0) (lng - 360.0, lat) + else (lng, lat) + } + + /** + * @param coordinates + * A collection of [[GeoCoord]]s to be used to create a + * [[MosaicGeometry]]. + * @param geometryAPI + * An instance of [[GeometryAPI]] to be used to create a + * [[MosaicGeometry]]. + * @return + * A [[MosaicGeometry]] instance. Generates a polygon using the + * cooridaates for the outer ring in the order they are provided. This + * method will not check for validity of the geometry and may return an + * invalid geometry. + */ + private def makeUnsafeGeometry(coordinates: mutable.Buffer[GeoCoord], geometryAPI: GeometryAPI): MosaicGeometry = { + geometryAPI.geometry( + coordinates.map(p => geometryAPI.fromGeoCoord(Coordinates(p.lat, p.lng))), + POLYGON + ) + } + + /** + * A BBox that covers the eastern Hemisphere + * @param geometryAPI + * An instance of [[GeometryAPI]] to be used to create the geometry. + * @return + * A [[MosaicGeometry]] instance. + */ + private def makeEastBBox(geometryAPI: GeometryAPI): MosaicGeometry = + makeUnsafeGeometry( + mutable.Buffer(new GeoCoord(-90, 0), new GeoCoord(90, 0), new GeoCoord(90, 180), new GeoCoord(-90, 180), new GeoCoord(-90, 0)), + geometryAPI: GeometryAPI + ) + + /** + * A BBox that covers the western Hemisphere shifted by 360 degrees to the + * East + * @param geometryAPI + * An instance of [[GeometryAPI]] to be used to create the geometry. + * @return + * A [[MosaicGeometry]] instance. + */ + private def makeShiftedWestBBox(geometryAPI: GeometryAPI): MosaicGeometry = + makeUnsafeGeometry( + mutable + .Buffer(new GeoCoord(-90, 180), new GeoCoord(90, 180), new GeoCoord(90, 360), new GeoCoord(-90, 360), new GeoCoord(-90, 180)), + geometryAPI: GeometryAPI + ) + + /** + * Generate a pole-safe H3 geometry. Pole geometries require two additional + * vertices where the pole touches the anti-meridian. This method is not + * generalizable for arbitrary polygons. + * + * @param coordinates + * A collection of [[GeoCoord]]s to be used to create a + * [[MosaicGeometry]]. + * @param isNorthPole + * Boolean indicating if the pole is the north or south pole. + * @param geometryAPI + * An instance of [[GeometryAPI]] to be used to create a + * [[MosaicGeometry]]. + * @return + * A [[MosaicGeometry]] instance. + */ + private def makePoleGeometry(coordinates: mutable.Buffer[GeoCoord], isNorthPole: Boolean, geometryAPI: GeometryAPI): MosaicGeometry = { + + val lat = if (isNorthPole) 90 else -90 + + val coords = coordinates.map(geoCoord => shiftEast(geoCoord.lng, geoCoord.lat)).sortBy(_._1) + val lineString = geometryAPI.geometry( + coords.map(p => geometryAPI.fromGeoCoord(Coordinates(p._2, p._1))), + LINESTRING + ) + + val westernLine = lineString.intersection(makeEastBBox(geometryAPI)) + val easternLine = lineString.intersection(makeShiftedWestBBox(geometryAPI)).mapXY(shiftWest) + + val vertices = westernLine.getShellPoints.head ++ + Seq(geometryAPI.fromGeoCoord(Coordinates(lat, 180)), geometryAPI.fromGeoCoord(Coordinates(lat, -180))) ++ + easternLine.getShellPoints.head ++ Seq(westernLine.getShellPoints.head.head) + + geometryAPI.geometry(vertices, POLYGON) + + } + + /** + * Generate a pole-safe and antimeridian-safe H3 geometry. This method is + * not generalizable for arbitrary polygons. + * + * @param coordinates + * A collection of [[GeoCoord]]s to be used to create a + * [[MosaicGeometry]]. + * @param geometryAPI + * An instance of [[GeometryAPI]] to be used to create a + * [[MosaicGeometry]]. + * @return + * A [[MosaicGeometry]] instance. + */ + private def makeSafeGeometry(coordinates: mutable.Buffer[GeoCoord], geometryAPI: GeometryAPI): MosaicGeometry = { + + val unsafeGeometry = makeUnsafeGeometry(coordinates, geometryAPI) + + if (crossesAntiMeridian(unsafeGeometry)) { + val shiftedGeometry = unsafeGeometry.mapXY(shiftEast) + val westGeom = shiftedGeometry.intersection(makeEastBBox(geometryAPI: GeometryAPI)) + val eastGeom = shiftedGeometry.intersection(makeShiftedWestBBox(geometryAPI: GeometryAPI)).mapXY(shiftWest) + westGeom.union(eastGeom) + } else { + unsafeGeometry + } + } + } diff --git a/src/test/scala/com/databricks/labs/mosaic/core/index/H3IndexSystemTest.scala b/src/test/scala/com/databricks/labs/mosaic/core/index/H3IndexSystemTest.scala index 6b370a277..4b013f580 100644 --- a/src/test/scala/com/databricks/labs/mosaic/core/index/H3IndexSystemTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/core/index/H3IndexSystemTest.scala @@ -1,15 +1,20 @@ package com.databricks.labs.mosaic.core.index -import com.databricks.labs.mosaic.core.geometry.api.{ESRI, JTS} +import com.databricks.labs.mosaic.core.geometry.api.{ESRI, GeometryAPI, JTS} import com.databricks.labs.mosaic.core.geometry.{MosaicGeometryESRI, MosaicGeometryJTS} +import com.databricks.labs.mosaic.core.index.H3IndexSystem.indexToGeometry import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum.{LINESTRING, MULTILINESTRING, MULTIPOINT, MULTIPOLYGON, POINT, POLYGON} import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum +import com.uber.h3core.H3Core import org.apache.spark.sql.types.{BooleanType, LongType, StringType} import org.apache.spark.unsafe.types.UTF8String +import org.scalactic.Tolerance import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers._ -class H3IndexSystemTest extends AnyFunSuite { +import scala.jdk.CollectionConverters.collectionAsScalaIterableConverter + +class H3IndexSystemTest extends AnyFunSuite with Tolerance { test("H3IndexSystem auxiliary methods") { val indexRes = H3IndexSystem.pointToIndex(10, 10, 10) @@ -129,4 +134,32 @@ class H3IndexSystemTest extends AnyFunSuite { H3IndexSystem.coerceChipGeometry(geomsWKTs4.map(MosaicGeometryESRI.fromWKT)).isEmpty shouldBe true } + test("indexToGeometry should return valid and correct geometries") { + val h3: H3Core = H3Core.newInstance() + + val esriGeomAPI: GeometryAPI = GeometryAPI("ESRI") + val jtsGeomAPI: GeometryAPI = GeometryAPI("JTS") + val apis = Seq(esriGeomAPI, jtsGeomAPI) + + val baseCells = h3.getRes0Indexes.asScala.toList + val lvl1Cells = baseCells.flatMap(h3.h3ToChildren(_, 1).asScala) + val testCells = Seq(baseCells, lvl1Cells) + + val baseCellsStr = baseCells.map(h3.h3ToString(_)) + val lvl1CellsStr = lvl1Cells.map(h3.h3ToString(_)) + val testCellsStr = Seq(baseCellsStr, lvl1CellsStr) + + apis.foreach(api => { + testCells.foreach(cells => { + val geoms = cells.map(indexToGeometry(_, api)) + geoms.foreach(geom => geom.isValid shouldBe true) + geoms.foldLeft(0.0)((acc, geom) => acc + geom.getArea) shouldBe ((180.0 * 360.0) +- 0.0001) + }) + testCellsStr.foreach(cells => { + val geoms = cells.map(indexToGeometry(_, api)) + geoms.foreach(geom => geom.isValid shouldBe true) + geoms.foldLeft(0.0)((acc, geom) => acc + geom.getArea) shouldBe ((180.0 * 360.0) +- 0.0001) + }) + }) + } }