From 02f7bf0d5f33bb9a6f9c6ce34451d8aeafea018a Mon Sep 17 00:00:00 2001 From: Robert Lovelock Date: Mon, 12 Feb 2018 17:59:11 +0000 Subject: [PATCH 1/3] Issue #183 - Support Parsing of MultiPolygon in WKT Format --- build.sbt | 2 +- src/main/scala/magellan/WKTParser.scala | 134 +++++++++++++----- src/main/scala/magellan/dsl/package.scala | 4 +- .../sql/catalyst/expressions/functions.scala | 93 +++++++++++- src/test/scala/magellan/WKTParserSuite.scala | 104 +++++++++++++- .../scala/magellan/catalyst/WKTSuite.scala | 47 +++++- 6 files changed, 334 insertions(+), 50 deletions(-) diff --git a/build.sbt b/build.sbt index 862df8d..f0c8b43 100644 --- a/build.sbt +++ b/build.sbt @@ -8,7 +8,7 @@ scalaVersion := "2.11.8" crossScalaVersions := Seq("2.11.8") -sparkVersion := "2.2.0" +sparkVersion := "2.2.1" scalacOptions += "-optimize" diff --git a/src/main/scala/magellan/WKTParser.scala b/src/main/scala/magellan/WKTParser.scala index 7fb8958..8bbb2e1 100644 --- a/src/main/scala/magellan/WKTParser.scala +++ b/src/main/scala/magellan/WKTParser.scala @@ -23,73 +23,126 @@ import scala.collection.mutable.ListBuffer object WKTParser { - def whitespace: P[String] = P(" ") map {_.toString} + def whitespace: P[String] = P(" ") map { + _.toString + } - val posInt: P[String] = P(CharIn('0'to'9').rep(1).!) + val posInt: P[String] = P(CharIn('0' to '9').rep(1).!) - val negInt: P[String] = P("-" ~ posInt) map {"-" + _} + val negInt: P[String] = P("-" ~ posInt) map { + "-" + _ + } val int: P[String] = P(posInt | negInt) - val float: P[String] = P(int ~ P(".") ~ posInt) map { case (x , y) => (x + "." + y)} + val float: P[String] = P(int ~ P(".") ~ posInt) map { case (x, y) => (x + "." + y) } - val number = P(float | int) map {_.toDouble} + val number = P(float | int) map { + _.toDouble + } - def point0: P[String] = P("""POINT""") map {_.toString} + def multi0: P[String] = P("""MULTI""") map { + _.toString + } - def empty0: P[String] = P("""EMPTY""") map {_.toString} + def point0: P[String] = P("""POINT""") map { + _.toString + } - def comma: P[String] = P(",") map {_.toString} + def empty0: P[String] = P("""EMPTY""") map { + _.toString + } - def leftBrace: P[String] = P("(") map {_.toString} + def comma: P[String] = P(",") map { + _.toString + } - def rightBrace: P[String] = P(")") map {_.toString} + def leftBrace: P[String] = P("(") map { + _.toString + } - def coords: P[Point] = P(number ~ whitespace ~ number) map { + def rightBrace: P[String] = P(")") map { + _.toString + } + + def coords: P[Point] = P(number ~ whitespace ~ number) map { case (x, _, y) => Point(x, y) } def ring: P[Array[Point]] = P(leftBrace ~ coords.rep(1, (comma ~ whitespace | comma)) ~ rightBrace) map { - case (_, x ,_) => x.toArray + case (_, x, _) => x.toArray + } + + def pointCoords: P[Point] = P(leftBrace ~ coords ~ rightBrace) map { + case (_, x, _) => x + } + + def point: P[Point] = P(point0 ~ whitespace.? ~ pointCoords) map { + case (_, _, p) => p } - def point: P[Point] = P(point0 ~ whitespace.? ~ leftBrace ~ coords ~ rightBrace) map { - case (_ , _, _, p, _) => p + def multipoint: P[Array[Point]] = P(multi0 ~ point0 ~ whitespace.? ~ leftBrace ~ (pointCoords.rep(1, (comma ~ whitespace | comma)) | coords.rep(1, (comma ~ whitespace | comma))) ~ rightBrace) map { + case (_, _, _, _, p, _) => p.toArray } - def pointEmpty: P[Shape] = P(point0 ~ whitespace ~ empty0) map {_ => NullShape} + def pointEmpty: P[Shape] = P(point0 ~ whitespace ~ empty0) map { _ => NullShape } - def linestring0: P[String] = P("""LINESTRING""") map {_.toString} + def linestring0: P[String] = P("""LINESTRING""") map { + _.toString + } def linestring: P[PolyLine] = P(linestring0 ~ whitespace.? ~ ring) map { - case (_ , _, x) => PolyLine(Array(0), x) + case (_, _, x) => PolyLine(Array(0), x) } - def polygon0: P[String] = P("""POLYGON""") map {_.toString} + def multilinestring: P[Array[PolyLine]] = P(multi0 ~ linestring0 ~ whitespace.? ~ leftBrace ~ ring.rep(1, (comma ~ whitespace | comma)) ~ rightBrace) map { + case (_, _, _, _, p, _) => p.map(points => PolyLine(Array(0), points)).toArray + } - def polygonWithoutHoles: P[Polygon] = - P(polygon0 ~ whitespace.? ~ P("((") ~ coords.rep(1, (comma ~ whitespace | comma)) ~ P("))")) map { - case (_ , _, x ) => Polygon(Array(0), x.toArray) + def polygon0: P[String] = P("""POLYGON""") map { + _.toString } - def polygonWithHoles: P[Polygon] = - P(polygon0 ~ whitespace.? ~ P("(") ~ ring.rep(1, (comma ~ whitespace | comma)) ~ P(")")) map { - case (_ , _, x) => - val indices = ListBuffer[Int]() - val points = ListBuffer[Point]() - var prev = 0 - var i = 0 - val numRings = x.size - while (i < numRings) { - indices.+= (prev) - prev += x(i).length - points.++=(x(i)) - i += 1 - } - Polygon(indices.toArray, points.toArray) + def polygonWithoutHoles: P[Polygon] = polygon + + + def polygonWithHoles: P[Polygon] = polygon + + def polygon: P[Polygon] = + P(polygon0 ~ whitespace.? ~ polygonCoords) map { + case (_, _, x) => x + } + + def polygonCoords: P[Polygon] = + P(P("(") ~ ring.rep(1, (comma ~ whitespace | comma)) ~ P(")")) map { + case (x) => + val indices = ListBuffer[Int]() + val points = ListBuffer[Point]() + var prev = 0 + var i = 0 + val numRings = x.size + while (i < numRings) { + indices.+=(prev) + prev += x(i).length + points.++=(x(i)) + i += 1 + } + Polygon(indices.toArray, points.toArray) + } + + + def multipolygon: P[Array[Polygon]] = P(multi0 ~ polygon0 ~ whitespace.? ~ leftBrace ~ polygonCoords.rep(1, (comma ~ whitespace | comma)) ~ rightBrace) map { + case (_, _, _, _, p, _) => p.toArray + } + + def expr: P[Shape] = P(point | pointEmpty | linestring | polygon ~ End) + + def singleShapeArray: P[Array[Shape]] = P(point | pointEmpty | linestring | polygon) map { + case (p) => Array(p) } - def expr: P[Shape] = P(point | pointEmpty | linestring | polygonWithoutHoles | polygonWithHoles ~ End) + + def exprArray: P[Array[_ <: Shape]] = P(singleShapeArray | multipoint | multilinestring | multipolygon ~ End) def parseAll(text: String): Shape = { expr.parse(text) match { @@ -98,4 +151,11 @@ object WKTParser { } } + def parseAllArray(text: String): Array[_ <: Shape] = { + exprArray.parse(text) match { + case Success(value, _) => value + case Failure(parser, index, stack) => throw new RuntimeException(stack.toString) + } + } + } diff --git a/src/main/scala/magellan/dsl/package.scala b/src/main/scala/magellan/dsl/package.scala index a3e0039..adb481b 100644 --- a/src/main/scala/magellan/dsl/package.scala +++ b/src/main/scala/magellan/dsl/package.scala @@ -48,11 +48,13 @@ package object dsl { def withinRange(origin: Point, radius: Double): Column = Column(WithinCircleRange(c.expr, origin, radius)) } - + implicit def point(x: Column, y: Column) = Column(PointConverter(x.expr, y.expr)) implicit def wkt(x: Column) = Column(WKT(x.expr)) + implicit def wktArray(x: Column) = Column(WKTArray(x.expr)) + implicit class DslDataset[T](c: Dataset[T]) { def df: Dataset[T] = c diff --git a/src/main/scala/org/apache/spark/sql/catalyst/expressions/functions.scala b/src/main/scala/org/apache/spark/sql/catalyst/expressions/functions.scala index 3621a9c..ecf6754 100644 --- a/src/main/scala/org/apache/spark/sql/catalyst/expressions/functions.scala +++ b/src/main/scala/org/apache/spark/sql/catalyst/expressions/functions.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import magellan.catalyst.MagellanExpression import magellan.index.{ZOrderCurve, ZOrderCurveIndexer} -import magellan.{Point, Relate, Shape} +import magellan.{Point, Relate, Shape, WKTParser} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.util.GenericArrayData @@ -156,6 +156,97 @@ case class WKT(override val child: Expression) } +/** + * Extracts shapes from WKT Text. + * + * @param child + */ +case class WKTArray(override val child: Expression) + extends UnaryExpression with MagellanExpression { + + private val pointUDT = new PointUDT() + private val lineUDT = new LineUDT() + private val polyLineUDT = new PolyLineUDT() + private val polygonUDT = new PolygonUDT() + + override protected def nullSafeEval(input: Any): Any = { + + val text = input.asInstanceOf[UTF8String] + val shapes = WKTParser.parseAllArray(text.toString) + new GenericArrayData( + shapes.map(s => { + val (udt: UserDefinedType[Shape], indexVar) = s.getType() match { + case 1 => (pointUDT, 0) + case 2 => (lineUDT, 1) + case 3 => (polyLineUDT, 1) + case 5 => (polygonUDT, 2) + } + + val row = new GenericInternalRow(3) + row.update(indexVar, udt.serialize(s)) + row + }) + ) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val serializersVar = ctx.freshName("serializers") + + ctx.addMutableState(classOf[java.util.HashMap[Integer, UserDefinedType[Shape]]].getName, s"$serializersVar", + s"$serializersVar = new java.util.HashMap>() ;" + + s"$serializersVar.put(1, new org.apache.spark.sql.types.PointUDT());" + + s"$serializersVar.put(2, new org.apache.spark.sql.types.LineUDT());" + + s"$serializersVar.put(3, new org.apache.spark.sql.types.PolyLineUDT());" + + s"$serializersVar.put(5, new org.apache.spark.sql.types.PolygonUDT());" + + "") + + val childTypeVar = ctx.freshName("childType") + val childShapeVar = ctx.freshName("childShape") + val serializerVar = ctx.freshName("serializer") + + val indexVar = ctx.freshName("index") + val resultVar = ctx.freshName("result") + + val j = ctx.freshName("j") + val n = ctx.freshName("n") + val values = ctx.freshName("values") + val arrayClass = classOf[GenericArrayData].getName + val internalRowClass = classOf[InternalRow].getName + val genericInternalRowClass = classOf[GenericInternalRow].getName + + nullSafeCodeGen(ctx, ev, (c1) => { + s"" + + s"String text = ${c1}.toString();\n" + + s"magellan.Shape[] $childShapeVar = (magellan.Shape[]) " + + s"magellan.WKTParser.parseAllArray(text);\n" + + s"final int $n = $childShapeVar.length;\n" + + s"final $internalRowClass[] $values = new $internalRowClass[$n];\n" + + s"for (int $j = 0; $j < $n; $j++) {\n" + + s"Integer $childTypeVar = $childShapeVar[$j].getType();\n" + + s"org.apache.spark.sql.types.UserDefinedType $serializerVar =" + + s" (org.apache.spark.sql.types.UserDefinedType) $serializersVar.get($childTypeVar);\n" + + s"Integer $indexVar = -1; \n" + + s"if ($childTypeVar == 1) {$indexVar = 0;}\n" + + s"else if ($childTypeVar == 2 || $childTypeVar == 3) {$indexVar = 1;} \n" + + s"else {$indexVar = 2;} \n" + + s"$genericInternalRowClass $resultVar = new $genericInternalRowClass(3);\n" + + s"$resultVar.update($indexVar, $serializerVar.serialize($childShapeVar[$j])); \n" + + s"$values[$j] = $resultVar;\n" + + s"}\n" + + s"${ev.value} = new $arrayClass($values); \n" + }) + } + + override def dataType: DataType = { + ArrayType( + StructType(List(StructField("point", new PointUDT(), true), + StructField("polyline", new PolyLineUDT(), true), + StructField("polygon", new PolygonUDT(), true)) + )) + } + +} + /** * Geohash Indexes a given shape expression to a specified precision. * diff --git a/src/test/scala/magellan/WKTParserSuite.scala b/src/test/scala/magellan/WKTParserSuite.scala index 3a179d0..4a84a06 100644 --- a/src/test/scala/magellan/WKTParserSuite.scala +++ b/src/test/scala/magellan/WKTParserSuite.scala @@ -24,7 +24,7 @@ class WKTParserSuite extends FunSuite { test("parse int") { val parsed = WKTParser.int.parse("-30") assert(parsed.index === 3) - assert(parsed.get.value === "-30") + assert(parsed.get.value === "-30") } test("parse float") { @@ -45,6 +45,43 @@ class WKTParserSuite extends FunSuite { assert(p.getY() === 10.0) } + test("parse multipoint, single value") { + val parsed = WKTParser.multipoint.parse("MULTIPOINT (30 10)") + assert(parsed.index == 18) + val p = parsed.get.value + + assert(p.length === 1) + + assert(p(0).getX() === 30.0) + assert(p(0).getY() === 10.0) + } + + test("parse multipoint, two values") { + val parsed = WKTParser.multipoint.parse("MULTIPOINT(30 10, 40 20)") + assert(parsed.index == 24) + val p = parsed.get.value + + assert(p.length === 2) + + assert(p(0).getX() === 30.0) + assert(p(0).getY() === 10.0) + assert(p(1).getX() === 40.0) + assert(p(1).getY() === 20.0) + } + + test("parse multipoint, two bracketed values") { + val parsed = WKTParser.multipoint.parse("MULTIPOINT((30 10), (40 20))") + assert(parsed.index == 28) + val p = parsed.get.value + + assert(p.length === 2) + + assert(p(0).getX() === 30.0) + assert(p(0).getY() === 10.0) + assert(p(1).getX() === 40.0) + assert(p(1).getY() === 20.0) + } + test("parse linestring") { var parsed = WKTParser.linestring.parse("LINESTRING (30 10, 10 30, 40 40)") var p: PolyLine = parsed.get.value @@ -59,10 +96,37 @@ class WKTParserSuite extends FunSuite { } + test("parse multilinestring, single value") { + val parsed = WKTParser.multilinestring.parse("MULTILINESTRING((30 10, 10 30, 40 40))") + assert(parsed.index == 38) + val p = parsed.get.value + + assert(p.length === 1) + + assert(p(0).length() === 3) + } + + test("parse multilinestring, two values") { + val parsed = WKTParser.multilinestring.parse("MULTILINESTRING((30 10, 10 30, 40 40),(-79.470579 35.442827,-79.469465 35.444889,-79.468907 35.445829,-79.468294 35.446608,-79.46687 35.447893))") + assert(parsed.index == 144) + val p = parsed.get.value + + assert(p.length === 2) + + assert(p(0).length() === 3) + assert(p(1).length() === 5) + } + test("parse polygon without holes") { var parsed = WKTParser.polygonWithoutHoles.parse("POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))") val p: Polygon = parsed.get.value assert(p.length === 5) + assert(p.getNumRings() === 1) + assert(p.getVertex(0) === Point(30, 10)) + assert(p.getVertex(1) === Point(40, 40)) + assert(p.getVertex(2) === Point(20, 40)) + assert(p.getVertex(3) === Point(10, 20)) + assert(p.getVertex(4) === Point(30, 10)) } test("parse polygon with holes") { @@ -84,18 +148,52 @@ class WKTParserSuite extends FunSuite { assert(p.getVertex(5) === Point(20.0, 30.0)) } + test("parse multipolygon") { + val parsed = WKTParser.multipolygon.parse("MULTIPOLYGON(((30 10, 40 40, 20 40, 10 20, 30 10)), ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30)))") + val p = parsed.get.value + + assert(p.length === 2) + + assert(p(0).length === 5) + assert(p(0).getNumRings() === 1) + assert(p(0).getVertex(0) === Point(30, 10)) + assert(p(0).getVertex(1) === Point(40, 40)) + assert(p(0).getVertex(2) === Point(20, 40)) + assert(p(0).getVertex(3) === Point(10, 20)) + assert(p(0).getVertex(4) === Point(30, 10)) + + assert(p(1).getNumRings() == 2) + assert(p(1).getRing(1) == 5) + assert(p(1).getVertex(4) === Point(35.0, 10.0)) + assert(p(1).getVertex(5) === Point(20.0, 30.0)) + } + + test("parse") { val shape = WKTParser.parseAll("LINESTRING (30 10, 10 30, 40 40)") assert(shape.isInstanceOf[PolyLine]) } + test("parse array") { + val shape = WKTParser.parseAllArray("LINESTRING (30 10, 10 30, 40 40)") + assert(shape.length === 1) + assert(shape(0).isInstanceOf[PolyLine]) + } + + test("parse multi array") { + val shape = WKTParser.parseAllArray("MULTILINESTRING ((30 10, 10 30, 40 40),(30 10, 10 30, 40 40))") + assert(shape.length === 2) + assert(shape(0).isInstanceOf[PolyLine]) + assert(shape(1).isInstanceOf[PolyLine]) + } + test("perf") { def time[R](block: => R): R = { val t0 = System.nanoTime() - val result = block // call-by-name + val result = block // call-by-name val t1 = System.nanoTime() - println("Elapsed time: " + (t1 - t0)/1E6 + "ms") + println("Elapsed time: " + (t1 - t0) / 1E6 + "ms") result } diff --git a/src/test/scala/magellan/catalyst/WKTSuite.scala b/src/test/scala/magellan/catalyst/WKTSuite.scala index c3b56e8..02c285e 100644 --- a/src/test/scala/magellan/catalyst/WKTSuite.scala +++ b/src/test/scala/magellan/catalyst/WKTSuite.scala @@ -20,21 +20,54 @@ import com.esri.core.geometry.GeometryEngine import magellan.TestingUtils._ import magellan.{Point, Polygon, TestSparkContext} import org.apache.spark.sql.Row +import org.apache.spark.sql.functions._ import org.apache.spark.sql.magellan.dsl.expressions._ import org.scalatest.FunSuite + class WKTSuite extends FunSuite with TestSparkContext { test("convert points to WKT") { val sqlCtx = this.sqlContext import sqlCtx.implicits._ val df = sc.parallelize(Seq( - (1, "POINT (3 15)"), - (2, "POINT (25 5)"), - (3, "POINT (30 10)") - )).toDF("id", "text") + (1, "POINT (3 15)"), + (2, "POINT (25 5)"), + (3, "POINT (30 10)") + )).toDF("id", "text") + + val points = df.withColumn("shape", wkt($"text")).select($"shape" ("point")) + assert(points.count() === 3) + val point = points.first()(0).asInstanceOf[Point] + assert(point.getX() === 3.0) + assert(point.getY() === 15.0) + } + + test("convert points to WKT Array") { + val sqlCtx = this.sqlContext + import sqlCtx.implicits._ + val df = sc.parallelize(Seq( + (1, "POINT (3 15)"), + (2, "POINT (25 5)"), + (3, "POINT (30 10)") + )).toDF("id", "text") - val points = df.withColumn("shape", wkt($"text")).select($"shape"("point")) + val points = df.withColumn("shape", explode(wktArray($"text"))).select($"shape" ("point")) + assert(points.count() === 3) + val point = points.first()(0).asInstanceOf[Point] + assert(point.getX() === 3.0) + assert(point.getY() === 15.0) + } + + test("convert multipoints to WKT Array") { + val sqlCtx = this.sqlContext + import sqlCtx.implicits._ + val df = sc.parallelize(Seq( + (1, "MULTIPOINT ((3 15),(25 5))"), + (2, "MULTIPOINT (30 10)") + )).toDF("id", "text") + + val points = df.withColumn("shape", explode(wktArray($"text"))).select($"shape" ("point")) assert(points.count() === 3) val point = points.first()(0).asInstanceOf[Point] assert(point.getX() === 3.0) @@ -69,7 +102,7 @@ class WKTSuite extends FunSuite with TestSparkContext { (polygonId, value, text) }.toDF("polygonId", "value", "text") .withColumn("polygon", wkt($"text")("polygon")) - + val actual = points.join(polygons) .where($"point" within $"polygon") .select($"pointId", $"polygonId") @@ -89,7 +122,7 @@ class WKTSuite extends FunSuite with TestSparkContext { val esriResults = polygons.flatMap { case Row(polygonId: String, value: String, text: String, polygon: Polygon) => val esriPolygon = toESRI(polygon) - esriPoints.map {case (pointId, esriPoint) => + esriPoints.map { case (pointId, esriPoint) => val within = GeometryEngine.contains(esriPolygon, esriPoint, null) (within, pointId, polygonId) }.filter(_._1) From 1a4e758c9fbe31be1a5fd3c880a65e53c82a1b05 Mon Sep 17 00:00:00 2001 From: Robert Lovelock Date: Wed, 14 Feb 2018 09:30:05 +0000 Subject: [PATCH 2/3] Issue #183 - Support Parsing of MultiPolygon in WKT Format (increase test coverage) --- .../scala/magellan/catalyst/WKTSuite.scala | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/src/test/scala/magellan/catalyst/WKTSuite.scala b/src/test/scala/magellan/catalyst/WKTSuite.scala index 02c285e..67914a5 100644 --- a/src/test/scala/magellan/catalyst/WKTSuite.scala +++ b/src/test/scala/magellan/catalyst/WKTSuite.scala @@ -18,12 +18,14 @@ package magellan.catalyst import com.esri.core.geometry.GeometryEngine import magellan.TestingUtils._ -import magellan.{Point, Polygon, TestSparkContext} +import magellan._ import org.apache.spark.sql.Row import org.apache.spark.sql.functions._ import org.apache.spark.sql.magellan.dsl.expressions._ import org.scalatest.FunSuite +import scala.collection.mutable + class WKTSuite extends FunSuite with TestSparkContext { @@ -59,7 +61,7 @@ class WKTSuite extends FunSuite with TestSparkContext { assert(point.getY() === 15.0) } - test("convert multipoints to WKT Array") { + test("convert multipoints to exploded WKT Array") { val sqlCtx = this.sqlContext import sqlCtx.implicits._ val df = sc.parallelize(Seq( @@ -74,6 +76,54 @@ class WKTSuite extends FunSuite with TestSparkContext { assert(point.getY() === 15.0) } + test("convert multipolygon to exploded WKT Array") { + val sqlCtx = this.sqlContext + import sqlCtx.implicits._ + val df = sc.parallelize(Seq( + (1, "MULTIPOLYGON(((30 10, 40 40, 20 40, 10 20, 30 10)), ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30)))") + )).toDF("id", "text") + + val polygons = df.withColumn("shape", explode(wktArray($"text"))).select($"shape" ("polygon")).collect() + assert(polygons.length === 2) + val polygon1 = polygons(0)(0).asInstanceOf[Polygon] + val polygon2 = polygons(1)(0).asInstanceOf[Polygon] + assert(polygon1.getNumRings() === 1) + assert(polygon2.getNumRings() === 2) + } + + test("convert multilinestring to exploded WKT Array") { + val sqlCtx = this.sqlContext + import sqlCtx.implicits._ + val df = sc.parallelize(Seq( + (1, "MULTILINESTRING((30 10, 10 30, 40 40),(-79.470579 35.442827,-79.469465 35.444889,-79.468907 35.445829,-79.468294 35.446608,-79.46687 35.447893))") + )).toDF("id", "text") + + val lines = df.withColumn("shape", explode(wktArray($"text"))).select($"shape" ("polyline")).collect() + assert(lines.length === 2) + val line1 = lines(0)(0).asInstanceOf[PolyLine] + val line2 = lines(1)(0).asInstanceOf[PolyLine] + assert(line1.getVertex(0) === Point(30 ,10)) + assert(line1.getVertex(1) === Point(10, 30)) + assert(line1.getVertex(2) === Point(40 ,40)) + assert(line2.getVertex(0) === Point(-79.470579, 35.442827)) + } + + test("convert multipoints to WKT Array") { + val sqlCtx = this.sqlContext + import sqlCtx.implicits._ + val df = sc.parallelize(Seq( + (1, "MULTIPOINT ((3 15),(25 5))"), + (2, "MULTIPOINT (30 10)") + )).toDF("id", "text") + + val points = df.withColumn("shape", wktArray($"text")).select($"shape" ("point")) + assert(points.count() === 2) + val point = points.first()(0).asInstanceOf[mutable.WrappedArray[Point]] + assert(point.length === 2) + assert(point(0).getX() === 3.0) + assert(point(0).getY() === 15.0) + } + test("ISSUE-108") { val sqlCtx = this.sqlContext From 0f7a1b2c0aaac34fd54170528f85de1eb5a7dcab Mon Sep 17 00:00:00 2001 From: Robert Lovelock Date: Wed, 14 Feb 2018 12:32:45 +0000 Subject: [PATCH 3/3] Issue #183 - Support Parsing of MultiPolygon in WKT Format (add failure tests) --- src/test/scala/magellan/WKTParserSuite.scala | 32 +++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/test/scala/magellan/WKTParserSuite.scala b/src/test/scala/magellan/WKTParserSuite.scala index 4a84a06..b6e073e 100644 --- a/src/test/scala/magellan/WKTParserSuite.scala +++ b/src/test/scala/magellan/WKTParserSuite.scala @@ -45,6 +45,14 @@ class WKTParserSuite extends FunSuite { assert(p.getY() === 10.0) } + test("parse empty point") { + val parsed = WKTParser.pointEmpty.parse("POINT EMPTY") + assert(parsed.index == 11) + val p = parsed.get.value + assert(p === NullShape) + + } + test("parse multipoint, single value") { val parsed = WKTParser.multipoint.parse("MULTIPOINT (30 10)") assert(parsed.index == 18) @@ -168,7 +176,6 @@ class WKTParserSuite extends FunSuite { assert(p(1).getVertex(5) === Point(20.0, 30.0)) } - test("parse") { val shape = WKTParser.parseAll("LINESTRING (30 10, 10 30, 40 40)") assert(shape.isInstanceOf[PolyLine]) @@ -187,6 +194,29 @@ class WKTParserSuite extends FunSuite { assert(shape(1).isInstanceOf[PolyLine]) } + test("parse failure") { + try { + WKTParser.parseAll("MULTILINESTRING ((30 10, 10 30, 40 40),30 10, 10 30, 40 40)") + fail() + } + catch { + case _: RuntimeException => // Expected, so continue + } + + } + + + test("parse array failure") { + try { + WKTParser.parseAll("LINESTRING (30 10, (10 30), 40 40)") + fail() + } + catch { + case _: RuntimeException => // Expected, so continue + } + + } + test("perf") { def time[R](block: => R): R = {