diff --git a/.run/overture-serve.run.xml b/.run/overture-serve.run.xml new file mode 100644 index 000000000..374bdab4d --- /dev/null +++ b/.run/overture-serve.run.xml @@ -0,0 +1,11 @@ + + + + \ No newline at end of file diff --git a/.run/overture-workflow.run.xml b/.run/overture-workflow.run.xml new file mode 100644 index 000000000..80cccb25f --- /dev/null +++ b/.run/overture-workflow.run.xml @@ -0,0 +1,11 @@ + + + + \ No newline at end of file diff --git a/baremaps-cli/src/main/resources/log4j.properties b/baremaps-cli/src/main/resources/log4j.properties new file mode 100644 index 000000000..9109d40f8 --- /dev/null +++ b/baremaps-cli/src/main/resources/log4j.properties @@ -0,0 +1,8 @@ +# Root logger option +log4j.rootLogger=INFO, stdout + +# Direct log messages to console +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.out +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n \ No newline at end of file diff --git a/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java b/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java index 0d05f680b..f8b3cf81a 100644 --- a/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java +++ b/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java @@ -101,9 +101,17 @@ public DataSchema schema() { this.schema = GeoParquetTypeConversion.asSchema(path.toString(), schema); return this.schema; } catch (URISyntaxException e) { - throw new GeoParquetException("Fail toe get the schema.", e); + throw new GeoParquetException("Failed to get the schema.", e); } } return schema; } + + public int srid(String column) { + try { + return reader().getGeoParquetMetadata().getSrid(column); + } catch (Exception e) { + throw new GeoParquetException("Fail to read the SRID from the GeoParquet metadata", e); + } + } } diff --git a/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetTypeConversion.java b/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetTypeConversion.java index 78be70624..435effa5e 100644 --- a/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetTypeConversion.java +++ b/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetTypeConversion.java @@ -70,6 +70,10 @@ public static List asRowValues(GeoParquetGroup group) { Schema schema = group.getSchema(); List fields = schema.fields(); for (int i = 0; i < fields.size(); i++) { + if (group.getValues(i).isEmpty()) { + values.add(null); + continue; + } Field field = fields.get(i); switch (field.type()) { case BINARY -> values.add(group.getBinaryValue(i).getBytes()); @@ -92,6 +96,9 @@ public static Map asNested(GeoParquetGroup group) { Schema schema = group.getSchema(); List fields = schema.fields(); for (int i = 0; i < fields.size(); i++) { + if (group.getValues(i).isEmpty()) { + continue; + } Field field = fields.get(i); nested.put(field.name(), switch (field.type()) { case BINARY -> group.getBinaryValue(i).getBytes(); diff --git a/baremaps-core/src/main/java/org/apache/baremaps/workflow/Task.java b/baremaps-core/src/main/java/org/apache/baremaps/workflow/Task.java index 1baedefe0..0a1f156f0 100644 --- a/baremaps-core/src/main/java/org/apache/baremaps/workflow/Task.java +++ b/baremaps-core/src/main/java/org/apache/baremaps/workflow/Task.java @@ -47,6 +47,7 @@ @Type(value = ImportDaylightFeatures.class, name = "ImportDaylightFeatures"), @Type(value = ImportDaylightTranslations.class, name = "ImportDaylightTranslations"), @Type(value = ImportGeoPackage.class, name = "ImportGeoPackage"), + @Type(value = ImportGeoParquet.class, name = "ImportGeoParquet"), @Type(value = ImportOsmOsc.class, name = "ImportOsmOsc"), @Type(value = ImportOsmPbf.class, name = "ImportOsmPbf"), @Type(value = ImportShapefile.class, name = "ImportShapefile"), diff --git a/baremaps-core/src/main/java/org/apache/baremaps/workflow/tasks/ImportGeoParquet.java b/baremaps-core/src/main/java/org/apache/baremaps/workflow/tasks/ImportGeoParquet.java new file mode 100644 index 000000000..9224d56ff --- /dev/null +++ b/baremaps-core/src/main/java/org/apache/baremaps/workflow/tasks/ImportGeoParquet.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.baremaps.workflow.tasks; + +import java.net.URI; +import java.util.StringJoiner; +import org.apache.baremaps.data.storage.DataTableGeometryMapper; +import org.apache.baremaps.data.storage.DataTableMapper; +import org.apache.baremaps.openstreetmap.function.ProjectionTransformer; +import org.apache.baremaps.storage.geoparquet.GeoParquetDataStore; +import org.apache.baremaps.storage.geoparquet.GeoParquetDataTable; +import org.apache.baremaps.storage.postgres.PostgresDataStore; +import org.apache.baremaps.workflow.Task; +import org.apache.baremaps.workflow.WorkflowContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Import a GeoParquet into a database. + */ +public class ImportGeoParquet implements Task { + + private static final Logger logger = LoggerFactory.getLogger(ImportGeoParquet.class); + + private URI uri; + private String tableName; + private Object database; + private Integer databaseSrid; + + /** + * Constructs a {@code ImportGeoParquet}. + */ + public ImportGeoParquet() { + + } + + /** + * Constructs an {@code ImportGeoParquet}. + * + * @param uri the GeoParquet uri + * @param database the database + * @param databaseSrid the target SRID + */ + public ImportGeoParquet(URI uri, String tableName, Object database, Integer databaseSrid) { + this.uri = uri; + this.tableName = tableName; + this.database = database; + this.databaseSrid = databaseSrid; + } + + /** + * {@inheritDoc} + */ + @Override + public void execute(WorkflowContext context) throws Exception { + var geoParquetDataStore = new GeoParquetDataStore(uri); + var dataSource = context.getDataSource(database); + var postgresDataStore = new PostgresDataStore(dataSource); + for (var name : geoParquetDataStore.list()) { + var geoParquetTable = (GeoParquetDataTable) geoParquetDataStore.get(name); + var projectionTransformer = + new ProjectionTransformer(geoParquetTable.srid("geometry"), databaseSrid); + var rowTransformer = + new DataTableGeometryMapper(geoParquetTable, projectionTransformer); + var transformedDataTable = + new DataTableMapper(geoParquetDataStore.get(name), rowTransformer); + postgresDataStore.add(tableName, transformedDataTable); + } + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() { + return new StringJoiner(", ", ImportGeoParquet.class.getSimpleName() + "[", "]") + .add("uri=" + uri) + .add("database=" + database) + .add("databaseSrid=" + databaseSrid) + .toString(); + } +} diff --git a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java index 4e760d8b1..1eae14172 100644 --- a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java +++ b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java @@ -36,6 +36,8 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider; +import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.ParquetReader; import org.apache.parquet.schema.MessageType; @@ -91,16 +93,14 @@ public Long size() throws URISyntaxException { return files().values().stream().map(FileInfo::recordCount).reduce(0L, Long::sum); } - private synchronized Map files() throws URISyntaxException { + private synchronized Map files() { try { if (files == null) { files = new HashMap<>(); - Path globPath = new Path(uri.getPath()); - URI rootUri = getRootUri(uri); - FileSystem fileSystem = FileSystem.get(rootUri, configuration); + FileSystem fs = FileSystem.get(uri, configuration); + FileStatus[] fileStatuses = fs.globStatus(new Path(uri)); - // Iterate over all the files in the path - for (FileStatus file : fileSystem.globStatus(globPath)) { + for (FileStatus file : fileStatuses) { files.put(file, buildFileInfo(file)); } @@ -115,7 +115,7 @@ private synchronized Map files() throws URISyntaxException } } } catch (IOException e) { - throw new GeoParquetException("IOException while attempting to list files.", e); + throw new GeoParquetException("IOException while attempting to list files.", e); } return files; } @@ -254,31 +254,11 @@ public int characteristics() { } private static Configuration createConfiguration() { - Configuration configuration = new Configuration(); - configuration.set("fs.s3a.aws.credentials.provider", - "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider"); - configuration.setBoolean("fs.s3a.path.style.access", true); - return configuration; + Configuration conf = new Configuration(); + conf.set("fs.s3a.endpoint", "s3.us-west-2.amazonaws.com"); + conf.set("fs.s3a.aws.credentials.provider", AnonymousAWSCredentialsProvider.class.getName()); + conf.set("fs.s3a.impl", S3AFileSystem.class.getName()); + conf.set("fs.s3a.path.style.access", "true"); + return conf; } - - private static URI getRootUri(URI uri) throws URISyntaxException { - // TODO: - // This is a quick and dirty way to get the root uri of the path. - // We take everything before the first wildcard in the path. - // This is not a perfect solution, and we should probably look for a better way to do this. - String path = uri.getPath(); - int index = path.indexOf("*"); - if (index != -1) { - path = path.substring(0, path.lastIndexOf("/", index) + 1); - } - return new URI( - uri.getScheme(), - uri.getUserInfo(), - uri.getHost(), - uri.getPort(), - path, - null, - null); - } - } diff --git a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/data/GeoParquetGroup.java b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/data/GeoParquetGroup.java index 5a3b3709b..46e57bb3d 100644 --- a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/data/GeoParquetGroup.java +++ b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/data/GeoParquetGroup.java @@ -59,6 +59,8 @@ public interface GeoParquetGroup { */ GeoParquetGroup createGroup(int fieldIndex); + List getValues(int fieldIndex); + Binary getBinaryValue(int fieldIndex); List getBinaryValues(int fieldIndex); diff --git a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/data/GeoParquetGroupFactory.java b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/data/GeoParquetGroupFactory.java index f925df50c..5abe77642 100644 --- a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/data/GeoParquetGroupFactory.java +++ b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/data/GeoParquetGroupFactory.java @@ -69,7 +69,7 @@ else if (!field.isPrimitive()) { GeoParquetGroup.Schema geoParquetSchema = createGeoParquetSchema(groupType, metadata); return (Field) new GeoParquetGroup.GroupField( groupType.getName(), - GeoParquetGroup.Cardinality.REQUIRED, + cardinality, geoParquetSchema); } diff --git a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/data/GeoParquetGroupImpl.java b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/data/GeoParquetGroupImpl.java index 9d959ca42..9da6d93d1 100644 --- a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/data/GeoParquetGroupImpl.java +++ b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/data/GeoParquetGroupImpl.java @@ -278,7 +278,8 @@ private void appendToString(StringBuilder builder, String indent) { } } - private List getValues(int fieldIndex) { + @Override + public List getValues(int fieldIndex) { return (List) data[fieldIndex]; } diff --git a/examples/overture/indexes.sql b/examples/overture/indexes.sql new file mode 100644 index 000000000..b7b7e5dbd --- /dev/null +++ b/examples/overture/indexes.sql @@ -0,0 +1,15 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to you under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +CREATE INDEX IF NOT EXISTS overture_admins_administrative_boundary_materialized_view_gist ON overture_admins_administrative_boundary_materialized_view USING GIST(geom); \ No newline at end of file diff --git a/examples/overture/style.json b/examples/overture/style.json new file mode 100644 index 000000000..400a7f523 --- /dev/null +++ b/examples/overture/style.json @@ -0,0 +1,24 @@ +{ + "version" : 8, + "sources" : { + "baremaps" : { + "type" : "vector", + "url" : "http://localhost:9000/tiles.json" + } + }, + "layers" : [ { + "id" : "administrative_boundary", + "type" : "line", + "source" : "baremaps", + "source-layer" : "administrative_boundary", + "layout" : { + "visibility" : "visible" + }, + "paint" : { + "line-color": "black", + "line-width": 1 + } + }], + "center" : [ 0, 0 ], + "zoom" : 2 +} \ No newline at end of file diff --git a/examples/overture/tiles.json b/examples/overture/tiles.json new file mode 100644 index 000000000..2d6da8854 --- /dev/null +++ b/examples/overture/tiles.json @@ -0,0 +1,24 @@ +{ + "tilejson": "2.2.0", + "tiles": [ + "http://localhost:9000/tiles/{z}/{x}/{y}.mvt" + ], + "minzoom": 0.0, + "maxzoom": 14.0, + "center": [0, 0], + "bounds": [-180, -85, 180, 85], + "zoom": 2, + "database": "jdbc:postgresql://localhost:5432/baremaps?&user=baremaps&password=baremaps", + "vector_layers": [ + { + "id": "administrative_boundary", + "queries": [ + { + "minzoom": 0, + "maxzoom": 14, + "sql": "SELECT id, tags, geom FROM overture_admins_administrative_boundary_materialized_view" + } + ] + } + ] +} \ No newline at end of file diff --git a/examples/overture/tileset.json b/examples/overture/tileset.json new file mode 100644 index 000000000..fb1dccc0e --- /dev/null +++ b/examples/overture/tileset.json @@ -0,0 +1,24 @@ +{ + "tilejson": "2.2.0", + "minzoom": 0, + "maxzoom": 14, + "center": [0, 0], + "bounds": [-180, -85, 180, 85], + "zoom": 2, + "tiles": [ + "http://localhost:9000/tiles/{z}/{x}/{y}.mvt" + ], + "database": "jdbc:postgresql://localhost:5432/baremaps?&user=baremaps&password=baremaps", + "vector_layers": [ + { + "id": "administrative_boundary", + "queries": [ + { + "minzoom": 0, + "maxzoom": 14, + "sql": "SELECT id, tags, geom FROM overture_admins_administrative_boundary_materialized_view" + } + ] + } + ] +} diff --git a/examples/overture/views.sql b/examples/overture/views.sql new file mode 100644 index 000000000..202585300 --- /dev/null +++ b/examples/overture/views.sql @@ -0,0 +1,31 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to you under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +CREATE MATERIALIZED VIEW IF NOT EXISTS overture_admins_administrative_boundary_materialized_view AS +SELECT + -- Generate a unique id for each row + row_number() OVER () AS id, + + -- Rename the geometry column + st_simplifypreservetopology(geometry, 78270 / power(2, 2)) AS geom, + + -- Aggregate other fields into a jsonb tags field + jsonb_build_object( + 'admin_level', admin_level, + 'version', version, + 'sources', sources, + 'population', population, + 'names', names + ) AS tags +FROM overture_admins_administrative_boundary; \ No newline at end of file diff --git a/examples/overture/workflow.json b/examples/overture/workflow.json new file mode 100644 index 000000000..3aaa3c1d9 --- /dev/null +++ b/examples/overture/workflow.json @@ -0,0 +1,27 @@ +{ + "steps": [ + { + "id": "overture", + "needs": [], + "tasks": [ + { + "type": "ImportGeoParquet", + "uri": "s3a://overturemaps-us-west-2/release/2024-05-16-beta.0/theme=admins/type=administrative_boundary/*", + "tableName": "overture_admins_administrative_boundary", + "database": "jdbc:postgresql://localhost:5432/baremaps?&user=baremaps&password=baremaps", + "databaseSrid": 3857 + }, + { + "type": "ExecuteSql", + "file": "views.sql", + "database": "jdbc:postgresql://localhost:5432/baremaps?&user=baremaps&password=baremaps" + }, + { + "type": "ExecuteSql", + "file": "indexes.sql", + "database": "jdbc:postgresql://localhost:5432/baremaps?&user=baremaps&password=baremaps" + } + ] + } + ] +} \ No newline at end of file