Skip to content

Commit

Permalink
PARQUET-2471: Add support for geometry logical type
Browse files Browse the repository at this point in the history
  • Loading branch information
wgtmac committed Jun 19, 2024
1 parent 9d04cf3 commit 4f21474
Show file tree
Hide file tree
Showing 13 changed files with 879 additions and 1 deletion.
5 changes: 5 additions & 0 deletions parquet-column/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.locationtech.jts</groupId>
<artifactId>jts-core</artifactId>
<version>${jts.version}</version>
</dependency>

<dependency>
<groupId>com.carrotsearch</groupId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.column.statistics;

import org.apache.parquet.Preconditions;
import org.apache.parquet.column.statistics.geometry.BoundingBox;
import org.apache.parquet.column.statistics.geometry.Covering;
import org.apache.parquet.column.statistics.geometry.GeometryTypes;
import org.apache.parquet.io.api.Binary;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.io.ParseException;
import org.locationtech.jts.io.WKBReader;

public class GeometryStatistics {

private final BoundingBox boundingBox;
private final Covering covering;
private final GeometryTypes geometryTypes;
private final WKBReader reader = new WKBReader();

public GeometryStatistics(BoundingBox boundingBox, Covering covering, GeometryTypes geometryTypes) {
this.boundingBox = boundingBox;
this.covering = covering;
this.geometryTypes = geometryTypes;
}

public BoundingBox getBoundingBox() {
return boundingBox;
}

public Covering getCovering() {
return covering;
}

public GeometryTypes getGeometryTypes() {
return geometryTypes;
}

public void update(Binary value) {
if (value == null) {
return;
}
try {
Geometry geom = reader.read(value.getBytes());
update(geom);
} catch (ParseException e) {
abort();
}
}

public void update(Geometry geom) {
boundingBox.update(geom);
covering.update(geom);
geometryTypes.update(geom);
}

public void merge(GeometryStatistics other) {
Preconditions.checkArgument(other != null, "Cannot merge with null GeometryStatistics");
boundingBox.merge(other.boundingBox);
covering.merge(other.covering);
geometryTypes.merge(other.geometryTypes);
}

public void reset() {
boundingBox.reset();
covering.reset();
geometryTypes.reset();
}

public void abort() {
boundingBox.abort();
covering.abort();
geometryTypes.abort();
}

@Override
public String toString() {
return "GeometryStatistics{" +
"boundingBox=" + boundingBox +
", covering=" + covering +
", geometryTypes=" + geometryTypes +
'}';
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.column.statistics.geometry;

import org.apache.parquet.Preconditions;
import org.locationtech.jts.geom.Coordinate;
import org.locationtech.jts.geom.Geometry;

public class BoundingBox {

private double xMin = Double.MAX_VALUE;
private double xMax = Double.MIN_VALUE;
private double yMin = Double.MAX_VALUE;
private double yMax = Double.MIN_VALUE;
private double zMin = Double.MAX_VALUE;
private double zMax = Double.MIN_VALUE;
private double mMin = Double.MAX_VALUE;
private double mMax = Double.MIN_VALUE;

public BoundingBox(double xMin, double xMax, double yMin, double yMax, double zMin, double zMax, double mMin,
double mMax) {
this.xMin = xMin;
this.xMax = xMax;
this.yMin = yMin;
this.yMax = yMax;
this.zMin = zMin;
this.zMax = zMax;
this.mMin = mMin;
this.mMax = mMax;
}

public double getXMin() {
return xMin;
}

public double getXMax() {
return xMax;
}

public double getYMin() {
return yMin;
}

public double getYMax() {
return yMax;
}

public double getZMin() {
return zMin;
}

public double getZMax() {
return zMax;
}

public double getMMin() {
return mMin;
}

public double getMMax() {
return mMax;
}

public void update(Geometry geom) {
if (geom == null || geom.isEmpty()) {
return;
}
Coordinate[] coordinates = geom.getCoordinates();
for (Coordinate coordinate : coordinates) {
update(coordinate.getX(), coordinate.getY(), coordinate.getZ(), coordinate.getM());
}
}

public void update(double x, double y, double z, double m) {
xMin = Math.min(xMin, x);
xMax = Math.max(xMax, x);
yMin = Math.min(yMin, y);
yMax = Math.max(yMax, y);
zMin = Math.min(zMin, z);
zMax = Math.max(zMax, z);
mMin = Math.min(mMin, m);
mMax = Math.max(mMax, m);
}

public void merge(BoundingBox other) {
Preconditions.checkArgument(other != null, "Cannot merge with null bounding box");
xMin = Math.min(xMin, other.xMin);
xMax = Math.max(xMax, other.xMax);
yMin = Math.min(yMin, other.yMin);
yMax = Math.max(yMax, other.yMax);
zMin = Math.min(zMin, other.zMin);
zMax = Math.max(zMax, other.zMax);
mMin = Math.min(mMin, other.mMin);
mMax = Math.max(mMax, other.mMax);
}

public void reset() {
xMin = Double.MAX_VALUE;
xMax = Double.MIN_VALUE;
yMin = Double.MAX_VALUE;
yMax = Double.MIN_VALUE;
zMin = Double.MAX_VALUE;
zMax = Double.MIN_VALUE;
mMin = Double.MAX_VALUE;
mMax = Double.MIN_VALUE;
}

public void abort() {
xMin = Double.NaN;
xMax = Double.NaN;
yMin = Double.NaN;
yMax = Double.NaN;
zMin = Double.NaN;
zMax = Double.NaN;
mMin = Double.NaN;
mMax = Double.NaN;
}

@Override
public String toString() {
return "BoundingBox{" +
"xMin=" + xMin +
", xMax=" + xMax +
", yMin=" + yMin +
", yMax=" + yMax +
", zMin=" + zMin +
", zMax=" + zMax +
", mMin=" + mMin +
", mMax=" + mMax +
'}';
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.column.statistics.geometry;

import java.nio.ByteBuffer;
import org.apache.parquet.Preconditions;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.io.ParseException;
import org.locationtech.jts.io.WKBReader;
import org.locationtech.jts.io.WKBWriter;

public class Covering {

protected final LogicalTypeAnnotation.Edges edges;
protected ByteBuffer geometry;

public Covering(ByteBuffer geometry, LogicalTypeAnnotation.Edges edges) {
Preconditions.checkArgument(geometry != null, "Geometry cannot be null");
Preconditions.checkArgument(edges != null, "Edges cannot be null");
this.geometry = geometry;
this.edges = edges;
}

public ByteBuffer getGeometry() {
return geometry;
}

public LogicalTypeAnnotation.Edges getEdges() {
return edges;
}

public void update(Geometry geom) {
geometry = ByteBuffer.wrap(new WKBWriter().write(geom));
}

public void merge(Covering other) {
throw new UnsupportedOperationException("Merge is not supported for " + this.getClass().getSimpleName());
}

public void reset() {
throw new UnsupportedOperationException("Reset is not supported for " + this.getClass().getSimpleName());
}

public void abort() {
throw new UnsupportedOperationException("Abort is not supported for " + this.getClass().getSimpleName());
}

@Override
public String toString() {
String geomText;
try {
geomText = new WKBReader().read(geometry.array()).toText();
} catch (ParseException e) {
geomText = "Invalid Geometry";
}

return "Covering{" +
"geometry=" + geomText +
", edges=" + edges +
'}';
}
}
Loading

0 comments on commit 4f21474

Please sign in to comment.