Skip to content

Commit

Permalink
Spark 3 & Hadoop 3 support, Scala 2.13 cross compilation (#3294)
Browse files Browse the repository at this point in the history
* Upd Spark and Spire, drop Scala 2.11

* Workaround Cassandra JXM issue and commentout a failing test

* Add Scala 2.13 crosscompilation

* Adjust Raster and Spark tests

* Fix Raster tests

* Fix GDAL tests

* Fix HBase tests

* Increase CircleCI no output timeout

* Supply the empty argument list explicitly, it is deprecated since Scala 2.13

* Upd up to Hadoop 3.2.1 and HBase 2.4.2

* Move Accumulo into a separate executor

* Upd SBT up to 1.5.0

* Bump spire up to 0.17.0

* fromRGBA(double) => fromRGBAPct

* Bump proj4j version up
  • Loading branch information
pomadchin authored Apr 13, 2021
1 parent 017dfbf commit b071b33
Show file tree
Hide file tree
Showing 389 changed files with 2,122 additions and 1,979 deletions.
7 changes: 7 additions & 0 deletions .circleci/build-and-test-accumulo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

.circleci/unzip-rasters.sh

./sbt -Dsbt.supershell=false "++$SCALA_VERSION" \
"project accumulo" test \
"project accumulo-spark" test || { exit 1; }
5 changes: 2 additions & 3 deletions .circleci/build-and-test-set-1.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
"project vector" test \
"project vectortile" test \
"project util" test \
"project raster" test \
"project accumulo" test \
"project accumulo-spark" test \
"project raster" test && \
./sbt -Dsbt.supershell=false "++$SCALA_VERSION" \
"project mdoc" mdoc && \
./sbt -Dsbt.supershell=false "++$SCALA_VERSION" \
"project gdal" test || { exit 1; }
38 changes: 32 additions & 6 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ executors:
environment:
_JAVA_OPTIONS: "-Xms64m -Xmx1536m"
# https://github.com/pomadchin/hbase-docker
- image: daunnc/hbase:2.1.4
- image: daunnc/hbase:2.2.4
environment:
_JAVA_OPTIONS: "-Xms1m -Xmx512m"
HBASE_DOCKER_HOSTNAME: localhost
Expand All @@ -74,6 +74,7 @@ jobs:
export SCALA_VERSION=<< parameters.scala-version >>
export RUN_SET=<< parameters.test-set >>
.circleci/build-and-test.sh
no_output_timeout: 30m
- save_cache: *save_build_cache

cassandra:
Expand Down Expand Up @@ -121,6 +122,21 @@ jobs:
.circleci/build-and-test-hbase.sh
- save_cache: *save_build_cache

accumulo:
parameters:
scala-version:
type: string
executor: executor-gdal
steps:
- checkout
- restore_cache: *restore_build_cache
- run:
name: Test HBase
command: |
export SCALA_VERSION=<< parameters.scala-version >>
.circleci/build-and-test-accumulo.sh
- save_cache: *save_build_cache

scaladocs:
parameters:
scala-version:
Expand Down Expand Up @@ -159,7 +175,7 @@ workflows:
- common:
matrix:
parameters:
scala-version: ["2.12.13"]
scala-version: ["2.12.13", "2.13.5"]
test-set: ["1", "2"]
filters:
branches:
Expand All @@ -170,7 +186,7 @@ workflows:
- cassandra:
matrix:
parameters:
scala-version: ["2.12.13"]
scala-version: ["2.12.13", "2.13.5"]
filters:
branches:
only: /.*/
Expand All @@ -180,7 +196,7 @@ workflows:
- s3:
matrix:
parameters:
scala-version: ["2.12.13"]
scala-version: ["2.12.13", "2.13.5"]
filters:
branches:
only: /.*/
Expand All @@ -190,7 +206,17 @@ workflows:
- hbase:
matrix:
parameters:
scala-version: ["2.12.13"]
scala-version: ["2.12.13", "2.13.5"]
filters:
branches:
only: /.*/
tags:
only: /^v.*/

- accumulo:
matrix:
parameters:
scala-version: [ "2.12.13", "2.13.5" ]
filters:
branches:
only: /.*/
Expand All @@ -213,7 +239,7 @@ workflows:
- hbase
matrix:
parameters:
scala-version: ["2.12.13"]
scala-version: ["2.12.13", "2.13.5"]
filters:
branches:
only: master
Expand Down
29 changes: 29 additions & 0 deletions .locationtech/deploy-213.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env bash

set -e
set -x

./sbt -213 "project macros" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project vector" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project proj4" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project raster" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project spark" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project spark-pipeline" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project s3" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project s3-spark" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project accumulo" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project accumulo-spark" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project hbase" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project hbase-spark" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project cassandra" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project cassandra-spark" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project geotools" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project shapefile" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project layer" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project store" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project util" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project vectortile" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project raster-testkit" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project vector-testkit" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project spark-testkit" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project gdal" publish -no-colors -J-Drelease=locationtech
2 changes: 1 addition & 1 deletion .locationtech/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
set -e
set -x

./.locationtech/deploy-212.sh
./.locationtech/deploy-212.sh && ./.locationtech/deploy-213.sh
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added
- Add method SpatialIndex#pointsInExtentAsIterable [#3349](https://github.com/locationtech/geotrellis/issues/3349)
- Spark 3 & Hadoop 3 Support [#3218](https://github.com/locationtech/geotrellis/issues/3218)
- Scala 2.13 cross compilation [2893](https://github.com/locationtech/geotrellis/issues/2893)

### Changed
- Deprecate method SpatialIndex#traversePointsInExtent [#3349](https://github.com/locationtech/geotrellis/issues/3349)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ class GenericRasterBench {
// that are injected on-demand by the framework.
params.getBenchmark.split('.').last match {
case "genericRasterMap" =>
genericRaster = new GRaster(init(len)(Random.nextInt))
genericRaster = new GRaster(init(len)(Random.nextInt()))
case "rasterMap" =>
tile = ArrayTile(init(len)(Random.nextInt), size, size)
tile = ArrayTile(init(len)(Random.nextInt()), size, size)
case _ => throw new MatchError("Have a new benchmark without initialization?")
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class RasterizingReprojectBench {
@Setup(Level.Trial)
def setup(params: BenchmarkParams): Unit = {
val len = size * size
raster = ProjectedRaster(ArrayTile(init(len)(Random.nextInt), size, size), srcExtent, srcCrs)
raster = ProjectedRaster(ArrayTile(init(len)(Random.nextInt()), size, size), srcExtent, srcCrs)
destRE = ProjectedRasterExtent(raster.projectedExtent.reproject(destCrs), destCrs, size, size)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ class PolygonalSummaryBench {
@Setup(Level.Trial)
def setup(): Unit = {
val geotiff = SinglebandGeoTiff(s"${geotiffPath}/singleband.tif")
raster = Raster(geotiff.tile.toArrayTile, geotiff.extent)
geom = geotiff.extent.toPolygon
raster = Raster(geotiff.tile.toArrayTile(), geotiff.extent)
geom = geotiff.extent.toPolygon()

val multibandGeoTiff = MultibandGeoTiff(s"${geotiffPath}/multiband.tif")
multibandRaster = Raster(multibandGeoTiff.tile.toArrayTile, multibandGeoTiff.extent)
multibandGeom = multibandGeoTiff.extent.toPolygon
multibandRaster = Raster(multibandGeoTiff.tile.toArrayTile(), multibandGeoTiff.extent)
multibandGeom = multibandGeoTiff.extent.toPolygon()
}

// Bench the MeanVisitor because it uses a class instead of an AnyVal
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import sbt.Keys._

ThisBuild / scalaVersion := "2.12.13"
ThisBuild / organization := "org.locationtech.geotrellis"
ThisBuild / crossScalaVersions := List("2.12.13")
ThisBuild / crossScalaVersions := List("2.12.13", "2.13.5")

lazy val root = Project("geotrellis", file("."))
.aggregate(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@ trait CassandraTestEnvironment extends TestEnvironment { self: Suite =>
conf.set("spark.kryo.registrator", classOf[KryoRegistrator].getName)
.set("spark.kryo.registrationRequired", "false")

override def beforeAll = {
super.beforeAll
override def beforeAll() = {
super.beforeAll()
try {
val session = BaseCassandraInstance(Seq("127.0.0.1")).getSession
session.closeAsync()
session.getCluster.closeAsync()
} catch {
case e: Exception =>
println("\u001b[0;33mA script for setting up the Cassandra environment necessary to run these tests can be found at scripts/cassandraTestDB.sh - requires a working docker setup\u001b[m")
cancel
cancel()
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ object BaseCassandraInstance {
val builder =
Cluster
.builder()
// Spark 3 brings dropwizard 4.1.1
// https://docs.datastax.com/en/developer/java-driver/3.5/manual/metrics/#metrics-4-compatibility
// TODO: Upd cassandra driver up to 4.9
.withoutJMXReporting()
.withLoadBalancingPolicy(getLoadBalancingPolicy)
.addContactPoints(hosts: _*)
.withPort(cassandraConfig.port)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ object COGSparkExamples {
val layer: TileLayerRDD[SpatialKey] = reader.read[SpatialKey, Tile](LayerId("example_cog_layer", zoom))

// Let's stitch the layer into tile
val raster: Raster[Tile] = layer.stitch
val raster: Raster[Tile] = layer.stitch()

// Create a tiff
val tiff = GeoTiff(raster.reproject(layer.metadata.crs, WebMercator), WebMercator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,11 @@ object ClipToGridExamples {
}
}
}
.reduceByKey { case (Feature(poly1, (accum1, count1)), Feature(poly2, (accum2, count2))) =>
Feature(poly1, (accum1 + accum2, count1 + count2))
.reduceByKey { (l, r) =>
(l, r) match {
case (Feature(poly1, (accum1, count1)), Feature(poly2, (accum2, count2))) =>
Feature(poly1, (accum1 + accum2, count1 + count2))
}
}
.map { case (_, feature) =>
// We no longer need the UUID; also compute the mean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ object EuclideanDistanceExamples {

val sc: SparkContext = ???

val geomWKT = scala.io.Source.fromFile("geotrellis/spark/src/test/resources/wkt/schools.wkt").getLines.mkString
val geomWKT = scala.io.Source.fromFile("geotrellis/spark/src/test/resources/wkt/schools.wkt").getLines().mkString
val LayoutLevel(z, ld) = ZoomedLayoutScheme(WebMercator).levelForZoom(12)
val maptrans = ld.mapTransform

Expand All @@ -57,7 +57,7 @@ object EuclideanDistanceExamples {

val tileRDD: RDD[(SpatialKey, Tile)] = inputRDD.euclideanDistance(ld)

val maxDistance = tileRDD.map(_._2.findMinMaxDouble).collect.foldLeft(-1.0/0.0){ (max, minMax) => scala.math.max(max, minMax._2) }
val maxDistance = tileRDD.map(_._2.findMinMaxDouble).collect().foldLeft(-1.0/0.0){ (max, minMax) => scala.math.max(max, minMax._2) }
val cm = ColorMap(Range.BigDecimal.inclusive(0.0, maxDistance, maxDistance/512).map(_.toDouble).toArray, ColorRamps.BlueToRed)
tileRDD.stitch().renderPng(cm).write("schools.png")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ object LandsatMultibandRDDExample {
// Round the center coordinates in case there's any floating point errors
val center =
(
BigDecimal(x).setScale(5, RoundingMode.HALF_UP).doubleValue(),
BigDecimal(y).setScale(5, RoundingMode.HALF_UP).doubleValue()
BigDecimal(x).setScale(5, RoundingMode.HALF_UP).doubleValue,
BigDecimal(y).setScale(5, RoundingMode.HALF_UP).doubleValue
)

// Get the scene ID from the path
Expand Down Expand Up @@ -142,7 +142,7 @@ object LandsatMultibandRDDExample {
MultibandTile(red, green, blue)
}
}
.stitch
.stitch()

GeoTiff(raster, metadata.crs).write("/tmp/landsat-test.tif")
} finally {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ object PipelineSparkExamples {
// the result type of evaluation in this case would ben Stream[(Int, TileLayerRDD[SpatialKey])]
node.eval.foreach { case (zoom, rdd) =>
println(s"ZOOM: ${zoom}")
println(s"COUNT: ${rdd.count}")
println(s"COUNT: ${rdd.count()}")
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ object SparkExamples {
.filter() // Use the filter/query API to
.where(Intersects(areaOfInterest)) // filter so that only tiles intersecting
.result // the Extent are contained in the result
.stitch // Stitch together this RDD into a Raster[Tile]
.stitch() // Stitch together this RDD into a Raster[Tile]

GeoTiff(raster, metadata.crs).write("/some/path/result.tif")
}
Expand Down Expand Up @@ -164,7 +164,7 @@ object SparkExamples {
.map { case (key, tile) => (key.getComponent[SpatialKey], tile) }
.reduceByKey(_.localMax(_))
}
.stitch
.stitch()

GeoTiff(raster, queryResult.metadata.crs).write("/path/to/result.tif")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ object VectorExamples {
// GeoJson methods implicitly added to vector types,
// including any Traversable[Feature[G, D]]

val geojson: String = features.toGeoJson
val geojson: String = features.toGeoJson()

println(geojson)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class ShardingKeyIndexSpec extends AnyFunSpec with Matchers {
))

val zspaceTime: KeyIndex[SpaceTimeKey] =
ZCurveKeyIndexMethod.byDay.createIndex(KeyBounds(
ZCurveKeyIndexMethod.byDay().createIndex(KeyBounds(
SpaceTimeKey(0, 0, 1),
SpaceTimeKey(9, 9, 10)
))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ import geotrellis.spark._
import geotrellis.spark.testkit._
import geotrellis.spark.testkit.TestEnvironment

import org.joda.time._

import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

Expand Down Expand Up @@ -64,7 +62,7 @@ class SparkExamplesTests extends AnyFunSuite with Matchers with TestEnvironment
band.focalMax(neighborhood, Some(bufferedTile.targetArea))
}
}
.collect
.collect()
.toMap

// Check some values
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class GDALRasterSourceRDDSpec extends AnyFunSpec with TestEnvironment with Befor
val expectedKeys =
layout
.mapTransform
.keysForGeometry(reprojectedSource.extent.toPolygon)
.keysForGeometry(reprojectedSource.extent.toPolygon())
.toSeq
.sortBy { key => (key.col, key.row) }

Expand Down Expand Up @@ -151,8 +151,8 @@ class GDALRasterSourceRDDSpec extends AnyFunSpec with TestEnvironment with Befor
// geotrellis.raster.io.geotiff.GeoTiff(reprojectedExpectedRDD.stitch, targetCRS).write("/tmp/expected.tif")
// geotrellis.raster.io.geotiff.GeoTiff(reprojectedSourceRDD.stitch, targetCRS).write("/tmp/actual.tif")

val actual = reprojectedSourceRDD.stitch.tile.band(0)
val expected = reprojectedExpectedRDD.stitch.tile.band(0)
val actual = reprojectedSourceRDD.stitch().tile.band(0)
val expected = reprojectedExpectedRDD.stitch().tile.band(0)

var (diff, pixels, mismatched) = (0d, 0d, 0)
cfor(0)(_ < math.min(actual.cols, expected.cols), _ + 1) { c =>
Expand Down Expand Up @@ -268,7 +268,7 @@ class GDALRasterSourceRDDSpec extends AnyFunSpec with TestEnvironment with Befor
dirtyCalls(reprojRS(i).source)
})
}
}.parSequence.unsafeRunSync
}.parSequence.unsafeRunSync()

println(java.lang.Thread.activeCount())

Expand Down
Loading

0 comments on commit b071b33

Please sign in to comment.