Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spark 3 & Hadoop 3 support, Scala 2.13 cross compilation #3294

Merged
merged 22 commits into from
Apr 13, 2021
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
7 changes: 7 additions & 0 deletions .circleci/build-and-test-accumulo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

.circleci/unzip-rasters.sh

./sbt -Dsbt.supershell=false "++$SCALA_VERSION" \
"project accumulo" test \
"project accumulo-spark" test || { exit 1; }
5 changes: 2 additions & 3 deletions .circleci/build-and-test-set-1.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
"project vector" test \
"project vectortile" test \
"project util" test \
"project raster" test \
"project accumulo" test \
"project accumulo-spark" test \
"project raster" test && \
./sbt -Dsbt.supershell=false "++$SCALA_VERSION" \
"project mdoc" mdoc && \
./sbt -Dsbt.supershell=false "++$SCALA_VERSION" \
"project gdal" test || { exit 1; }
38 changes: 32 additions & 6 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ executors:
environment:
_JAVA_OPTIONS: "-Xms64m -Xmx1536m"
# https://github.com/pomadchin/hbase-docker
- image: daunnc/hbase:2.1.4
- image: daunnc/hbase:2.2.4
environment:
_JAVA_OPTIONS: "-Xms1m -Xmx512m"
HBASE_DOCKER_HOSTNAME: localhost
Expand All @@ -74,6 +74,7 @@ jobs:
export SCALA_VERSION=<< parameters.scala-version >>
export RUN_SET=<< parameters.test-set >>
.circleci/build-and-test.sh
no_output_timeout: 30m
- save_cache: *save_build_cache

cassandra:
Expand Down Expand Up @@ -121,6 +122,21 @@ jobs:
.circleci/build-and-test-hbase.sh
- save_cache: *save_build_cache

accumulo:
parameters:
scala-version:
type: string
executor: executor-gdal
steps:
- checkout
- restore_cache: *restore_build_cache
- run:
name: Test HBase
command: |
export SCALA_VERSION=<< parameters.scala-version >>
.circleci/build-and-test-accumulo.sh
- save_cache: *save_build_cache

scaladocs:
parameters:
scala-version:
Expand Down Expand Up @@ -159,7 +175,7 @@ workflows:
- common:
matrix:
parameters:
scala-version: ["2.12.13"]
scala-version: ["2.12.13", "2.13.5"]
test-set: ["1", "2"]
filters:
branches:
Expand All @@ -170,7 +186,7 @@ workflows:
- cassandra:
matrix:
parameters:
scala-version: ["2.12.13"]
scala-version: ["2.12.13", "2.13.5"]
filters:
branches:
only: /.*/
Expand All @@ -180,7 +196,7 @@ workflows:
- s3:
matrix:
parameters:
scala-version: ["2.12.13"]
scala-version: ["2.12.13", "2.13.5"]
filters:
branches:
only: /.*/
Expand All @@ -190,7 +206,17 @@ workflows:
- hbase:
matrix:
parameters:
scala-version: ["2.12.13"]
scala-version: ["2.12.13", "2.13.5"]
filters:
branches:
only: /.*/
tags:
only: /^v.*/

- accumulo:
matrix:
parameters:
scala-version: [ "2.12.13", "2.13.5" ]
filters:
branches:
only: /.*/
Expand All @@ -213,7 +239,7 @@ workflows:
- hbase
matrix:
parameters:
scala-version: ["2.12.13"]
scala-version: ["2.12.13", "2.13.5"]
filters:
branches:
only: master
Expand Down
29 changes: 29 additions & 0 deletions .locationtech/deploy-213.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env bash

set -e
set -x

./sbt -213 "project macros" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project vector" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project proj4" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project raster" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project spark" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project spark-pipeline" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project s3" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project s3-spark" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project accumulo" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project accumulo-spark" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project hbase" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project hbase-spark" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project cassandra" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project cassandra-spark" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project geotools" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project shapefile" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project layer" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project store" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project util" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project vectortile" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project raster-testkit" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project vector-testkit" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project spark-testkit" publish -no-colors -J-Drelease=locationtech \
&& ./sbt -213 "project gdal" publish -no-colors -J-Drelease=locationtech
2 changes: 1 addition & 1 deletion .locationtech/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
set -e
set -x

./.locationtech/deploy-212.sh
./.locationtech/deploy-212.sh && ./.locationtech/deploy-213.sh
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added
- Add method SpatialIndex#pointsInExtentAsIterable [#3349](https://github.com/locationtech/geotrellis/issues/3349)
- Spark 3 & Hadoop 3 Support [#3218](https://github.com/locationtech/geotrellis/issues/3218)
- Scala 2.13 cross compilation [2893](https://github.com/locationtech/geotrellis/issues/2893)

### Changed
- Deprecate method SpatialIndex#traversePointsInExtent [#3349](https://github.com/locationtech/geotrellis/issues/3349)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ class GenericRasterBench {
// that are injected on-demand by the framework.
params.getBenchmark.split('.').last match {
case "genericRasterMap" =>
genericRaster = new GRaster(init(len)(Random.nextInt))
genericRaster = new GRaster(init(len)(Random.nextInt()))
case "rasterMap" =>
tile = ArrayTile(init(len)(Random.nextInt), size, size)
tile = ArrayTile(init(len)(Random.nextInt()), size, size)
case _ => throw new MatchError("Have a new benchmark without initialization?")
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class RasterizingReprojectBench {
@Setup(Level.Trial)
def setup(params: BenchmarkParams): Unit = {
val len = size * size
raster = ProjectedRaster(ArrayTile(init(len)(Random.nextInt), size, size), srcExtent, srcCrs)
raster = ProjectedRaster(ArrayTile(init(len)(Random.nextInt()), size, size), srcExtent, srcCrs)
destRE = ProjectedRasterExtent(raster.projectedExtent.reproject(destCrs), destCrs, size, size)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ class PolygonalSummaryBench {
@Setup(Level.Trial)
def setup(): Unit = {
val geotiff = SinglebandGeoTiff(s"${geotiffPath}/singleband.tif")
raster = Raster(geotiff.tile.toArrayTile, geotiff.extent)
geom = geotiff.extent.toPolygon
raster = Raster(geotiff.tile.toArrayTile(), geotiff.extent)
geom = geotiff.extent.toPolygon()

val multibandGeoTiff = MultibandGeoTiff(s"${geotiffPath}/multiband.tif")
multibandRaster = Raster(multibandGeoTiff.tile.toArrayTile, multibandGeoTiff.extent)
multibandGeom = multibandGeoTiff.extent.toPolygon
multibandRaster = Raster(multibandGeoTiff.tile.toArrayTile(), multibandGeoTiff.extent)
multibandGeom = multibandGeoTiff.extent.toPolygon()
}

// Bench the MeanVisitor because it uses a class instead of an AnyVal
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import sbt.Keys._

ThisBuild / scalaVersion := "2.12.13"
ThisBuild / organization := "org.locationtech.geotrellis"
ThisBuild / crossScalaVersions := List("2.12.13")
ThisBuild / crossScalaVersions := List("2.12.13", "2.13.5")

lazy val root = Project("geotrellis", file("."))
.aggregate(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@ trait CassandraTestEnvironment extends TestEnvironment { self: Suite =>
conf.set("spark.kryo.registrator", classOf[KryoRegistrator].getName)
.set("spark.kryo.registrationRequired", "false")

override def beforeAll = {
super.beforeAll
override def beforeAll() = {
super.beforeAll()
try {
val session = BaseCassandraInstance(Seq("127.0.0.1")).getSession
session.closeAsync()
session.getCluster.closeAsync()
} catch {
case e: Exception =>
println("\u001b[0;33mA script for setting up the Cassandra environment necessary to run these tests can be found at scripts/cassandraTestDB.sh - requires a working docker setup\u001b[m")
cancel
cancel()
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ object BaseCassandraInstance {
val builder =
Cluster
.builder()
// Spark 3 brings dropwizard 4.1.1
// https://docs.datastax.com/en/developer/java-driver/3.5/manual/metrics/#metrics-4-compatibility
// TODO: Upd cassandra driver up to 4.9
.withoutJMXReporting()
pomadchin marked this conversation as resolved.
Show resolved Hide resolved
.withLoadBalancingPolicy(getLoadBalancingPolicy)
.addContactPoints(hosts: _*)
.withPort(cassandraConfig.port)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ object COGSparkExamples {
val layer: TileLayerRDD[SpatialKey] = reader.read[SpatialKey, Tile](LayerId("example_cog_layer", zoom))

// Let's stitch the layer into tile
val raster: Raster[Tile] = layer.stitch
val raster: Raster[Tile] = layer.stitch()

// Create a tiff
val tiff = GeoTiff(raster.reproject(layer.metadata.crs, WebMercator), WebMercator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,11 @@ object ClipToGridExamples {
}
}
}
.reduceByKey { case (Feature(poly1, (accum1, count1)), Feature(poly2, (accum2, count2))) =>
Feature(poly1, (accum1 + accum2, count1 + count2))
.reduceByKey { (l, r) =>
(l, r) match {
case (Feature(poly1, (accum1, count1)), Feature(poly2, (accum2, count2))) =>
Feature(poly1, (accum1 + accum2, count1 + count2))
}
}
.map { case (_, feature) =>
// We no longer need the UUID; also compute the mean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ object EuclideanDistanceExamples {

val sc: SparkContext = ???

val geomWKT = scala.io.Source.fromFile("geotrellis/spark/src/test/resources/wkt/schools.wkt").getLines.mkString
val geomWKT = scala.io.Source.fromFile("geotrellis/spark/src/test/resources/wkt/schools.wkt").getLines().mkString
val LayoutLevel(z, ld) = ZoomedLayoutScheme(WebMercator).levelForZoom(12)
val maptrans = ld.mapTransform

Expand All @@ -57,7 +57,7 @@ object EuclideanDistanceExamples {

val tileRDD: RDD[(SpatialKey, Tile)] = inputRDD.euclideanDistance(ld)

val maxDistance = tileRDD.map(_._2.findMinMaxDouble).collect.foldLeft(-1.0/0.0){ (max, minMax) => scala.math.max(max, minMax._2) }
val maxDistance = tileRDD.map(_._2.findMinMaxDouble).collect().foldLeft(-1.0/0.0){ (max, minMax) => scala.math.max(max, minMax._2) }
val cm = ColorMap(Range.BigDecimal.inclusive(0.0, maxDistance, maxDistance/512).map(_.toDouble).toArray, ColorRamps.BlueToRed)
tileRDD.stitch().renderPng(cm).write("schools.png")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ object LandsatMultibandRDDExample {
// Round the center coordinates in case there's any floating point errors
val center =
(
BigDecimal(x).setScale(5, RoundingMode.HALF_UP).doubleValue(),
BigDecimal(y).setScale(5, RoundingMode.HALF_UP).doubleValue()
BigDecimal(x).setScale(5, RoundingMode.HALF_UP).doubleValue,
BigDecimal(y).setScale(5, RoundingMode.HALF_UP).doubleValue
)

// Get the scene ID from the path
Expand Down Expand Up @@ -142,7 +142,7 @@ object LandsatMultibandRDDExample {
MultibandTile(red, green, blue)
}
}
.stitch
.stitch()

GeoTiff(raster, metadata.crs).write("/tmp/landsat-test.tif")
} finally {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ object PipelineSparkExamples {
// the result type of evaluation in this case would ben Stream[(Int, TileLayerRDD[SpatialKey])]
node.eval.foreach { case (zoom, rdd) =>
println(s"ZOOM: ${zoom}")
println(s"COUNT: ${rdd.count}")
println(s"COUNT: ${rdd.count()}")
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ object SparkExamples {
.filter() // Use the filter/query API to
.where(Intersects(areaOfInterest)) // filter so that only tiles intersecting
.result // the Extent are contained in the result
.stitch // Stitch together this RDD into a Raster[Tile]
.stitch() // Stitch together this RDD into a Raster[Tile]

GeoTiff(raster, metadata.crs).write("/some/path/result.tif")
}
Expand Down Expand Up @@ -164,7 +164,7 @@ object SparkExamples {
.map { case (key, tile) => (key.getComponent[SpatialKey], tile) }
.reduceByKey(_.localMax(_))
}
.stitch
.stitch()

GeoTiff(raster, queryResult.metadata.crs).write("/path/to/result.tif")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ object VectorExamples {
// GeoJson methods implicitly added to vector types,
// including any Traversable[Feature[G, D]]

val geojson: String = features.toGeoJson
val geojson: String = features.toGeoJson()

println(geojson)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class ShardingKeyIndexSpec extends AnyFunSpec with Matchers {
))

val zspaceTime: KeyIndex[SpaceTimeKey] =
ZCurveKeyIndexMethod.byDay.createIndex(KeyBounds(
ZCurveKeyIndexMethod.byDay().createIndex(KeyBounds(
SpaceTimeKey(0, 0, 1),
SpaceTimeKey(9, 9, 10)
))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ import geotrellis.spark._
import geotrellis.spark.testkit._
import geotrellis.spark.testkit.TestEnvironment

import org.joda.time._

import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers

Expand Down Expand Up @@ -64,7 +62,7 @@ class SparkExamplesTests extends AnyFunSuite with Matchers with TestEnvironment
band.focalMax(neighborhood, Some(bufferedTile.targetArea))
}
}
.collect
.collect()
.toMap

// Check some values
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class GDALRasterSourceRDDSpec extends AnyFunSpec with TestEnvironment with Befor
val expectedKeys =
layout
.mapTransform
.keysForGeometry(reprojectedSource.extent.toPolygon)
.keysForGeometry(reprojectedSource.extent.toPolygon())
.toSeq
.sortBy { key => (key.col, key.row) }

Expand Down Expand Up @@ -151,8 +151,8 @@ class GDALRasterSourceRDDSpec extends AnyFunSpec with TestEnvironment with Befor
// geotrellis.raster.io.geotiff.GeoTiff(reprojectedExpectedRDD.stitch, targetCRS).write("/tmp/expected.tif")
// geotrellis.raster.io.geotiff.GeoTiff(reprojectedSourceRDD.stitch, targetCRS).write("/tmp/actual.tif")

val actual = reprojectedSourceRDD.stitch.tile.band(0)
val expected = reprojectedExpectedRDD.stitch.tile.band(0)
val actual = reprojectedSourceRDD.stitch().tile.band(0)
val expected = reprojectedExpectedRDD.stitch().tile.band(0)

var (diff, pixels, mismatched) = (0d, 0d, 0)
cfor(0)(_ < math.min(actual.cols, expected.cols), _ + 1) { c =>
Expand Down Expand Up @@ -268,7 +268,7 @@ class GDALRasterSourceRDDSpec extends AnyFunSpec with TestEnvironment with Befor
dirtyCalls(reprojRS(i).source)
})
}
}.parSequence.unsafeRunSync
}.parSequence.unsafeRunSync()

println(java.lang.Thread.activeCount())

Expand Down
Loading