diff --git a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/OvertureMapsBenchmark.java b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/OvertureMapsBenchmark.java index a230e41da..59da4d153 100644 --- a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/OvertureMapsBenchmark.java +++ b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/OvertureMapsBenchmark.java @@ -83,14 +83,16 @@ public void setup() throws IOException { @SuppressWarnings({"squid:S1481", "squid:S2201"}) @Benchmark public void read() { - GeoParquetReader reader = new GeoParquetReader(directory.toUri()); + var path = new org.apache.hadoop.fs.Path(directory.toUri()); + GeoParquetReader reader = new GeoParquetReader(path); reader.read().count(); } @SuppressWarnings({"squid:S1481", "squid:S2201"}) @Benchmark public void readParallel() { - GeoParquetReader reader = new GeoParquetReader(directory.toUri()); + var path = new org.apache.hadoop.fs.Path(directory.toUri()); + GeoParquetReader reader = new GeoParquetReader(path); reader.readParallel().count(); } } diff --git a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/SmallFileBenchmark.java b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/SmallFileBenchmark.java index 11f468f00..1ae2a7476 100644 --- a/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/SmallFileBenchmark.java +++ b/baremaps-benchmarking/src/main/java/org/apache/baremaps/benchmarking/geoparquet/SmallFileBenchmark.java @@ -61,16 +61,17 @@ public void setup() throws IOException { @SuppressWarnings({"squid:S1481", "squid:S2201"}) @Benchmark public void read() { - GeoParquetReader reader = - new GeoParquetReader(Path.of("baremaps-benchmarking/data/small/*.parquet").toUri()); + var path = new org.apache.hadoop.fs.Path("baremaps-benchmarking/data/small/*.parquet"); + GeoParquetReader reader = new GeoParquetReader(path); reader.read().count(); } @SuppressWarnings({"squid:S1481", "squid:S2201"}) @Benchmark public void readParallel() { + var path = new org.apache.hadoop.fs.Path("baremaps-benchmarking/data/small/*.parquet"); GeoParquetReader reader = - new GeoParquetReader(Path.of("baremaps-benchmarking/data/small/*.parquet").toUri()); + new GeoParquetReader(path); reader.readParallel().count(); } } diff --git a/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java b/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java index 90c78d1a7..b3867cfa6 100644 --- a/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java +++ b/baremaps-core/src/main/java/org/apache/baremaps/storage/geoparquet/GeoParquetDataTable.java @@ -24,6 +24,7 @@ import org.apache.baremaps.data.storage.*; import org.apache.baremaps.geoparquet.GeoParquetException; import org.apache.baremaps.geoparquet.GeoParquetReader; +import org.apache.hadoop.fs.Path; public class GeoParquetDataTable implements DataTable { @@ -35,7 +36,7 @@ public class GeoParquetDataTable implements DataTable { public GeoParquetDataTable(URI path) { this.path = path; - this.reader = new GeoParquetReader(path); + this.reader = new GeoParquetReader(new Path(path)); } @Override diff --git a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroup.java b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroup.java index 38c6dd746..e524a229f 100644 --- a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroup.java +++ b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetGroup.java @@ -105,7 +105,7 @@ public int getFieldRepetitionCount(int fieldIndex) { } } - private Object getValue(int fieldIndex, int index) { + Object getValue(int fieldIndex, int index) { Object value = data[fieldIndex]; if (value instanceof Listlist) { return list.get(index); diff --git a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java index 8b4697aa1..e965d250a 100644 --- a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java +++ b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetReader.java @@ -20,7 +20,6 @@ import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException; -import java.net.URI; import java.util.*; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; @@ -53,31 +52,31 @@ public class GeoParquetReader { /** * Constructs a new {@code GeoParquetReader}. * - * @param uri the URI to read from + * @param path the path to read from */ - public GeoParquetReader(URI uri) { - this(uri, null, new Configuration()); + public GeoParquetReader(Path path) { + this(path, null, new Configuration()); } /** * Constructs a new {@code GeoParquetReader}. * - * @param uri the URI to read from + * @param path the path to read from * @param envelope the envelope to filter records */ - public GeoParquetReader(URI uri, Envelope envelope) { - this(uri, envelope, new Configuration()); + public GeoParquetReader(Path path, Envelope envelope) { + this(path, envelope, new Configuration()); } /** * Constructs a new {@code GeoParquetReader}. * - * @param uri the URI to read from + * @param path the path to read from * @param configuration the configuration */ - public GeoParquetReader(URI uri, Envelope envelope, Configuration configuration) { + public GeoParquetReader(Path path, Envelope envelope, Configuration configuration) { this.configuration = configuration; - this.files = initializeFiles(uri, configuration); + this.files = initializeFiles(path, configuration); this.envelope = envelope; } @@ -168,11 +167,10 @@ private FileInfo getFileInfo(FileStatus fileStatus) { } } - private static List initializeFiles(URI uri, Configuration configuration) { + private static List initializeFiles(Path path, Configuration configuration) { try { - Path globPath = new Path(uri.getPath()); - FileSystem fileSystem = FileSystem.get(uri, configuration); - FileStatus[] fileStatuses = fileSystem.globStatus(globPath); + FileSystem fileSystem = FileSystem.get(path.toUri(), configuration); + FileStatus[] fileStatuses = fileSystem.globStatus(path); if (fileStatuses == null) { throw new GeoParquetException("No files found at the specified URI."); } diff --git a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriteSupport.java b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriteSupport.java new file mode 100644 index 000000000..b6be38165 --- /dev/null +++ b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriteSupport.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.baremaps.geoparquet; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.util.HashMap; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.parquet.hadoop.api.WriteSupport; +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.io.api.RecordConsumer; +import org.apache.parquet.schema.*; + +/** + * WriteSupport implementation for writing GeoParquetGroup instances to Parquet. + */ +public class GeoParquetWriteSupport extends WriteSupport { + + private Configuration configuration; + private final MessageType schema; + private final GeoParquetMetadata metadata; + private RecordConsumer recordConsumer; + private final ObjectMapper objectMapper = new ObjectMapper(); + + /** + * Constructs a new GeoParquetWriteSupport. + * + * @param schema the Parquet schema + * @param metadata the GeoParquet metadata + */ + public GeoParquetWriteSupport(MessageType schema, GeoParquetMetadata metadata) { + this.schema = schema; + this.metadata = metadata; + } + + @Override + public WriteContext init(Configuration configuration) { + Map extraMetadata = new HashMap<>(); + String geoMetadataJson = serializeMetadata(metadata); + extraMetadata.put("geo", geoMetadataJson); + return new WriteContext(schema, extraMetadata); + } + + @Override + public void prepareForWrite(RecordConsumer recordConsumer) { + this.recordConsumer = recordConsumer; + } + + @Override + public void write(GeoParquetGroup group) { + recordConsumer.startMessage(); + writeGroup(group, schema, true); + recordConsumer.endMessage(); + } + + private void writeGroup(GeoParquetGroup group, GroupType groupType, boolean isRoot) { + if (!isRoot) { + recordConsumer.startGroup(); + } + for (int i = 0; i < groupType.getFieldCount(); i++) { + Type fieldType = groupType.getType(i); + String fieldName = fieldType.getName(); + int repetitionCount = group.getFieldRepetitionCount(i); + if (repetitionCount == 0) { + continue; // Skip if no values are present + } + for (int j = 0; j < repetitionCount; j++) { + recordConsumer.startField(fieldName, i); + if (fieldType.isPrimitive()) { + Object value = group.getValue(i, j); + writePrimitive(value, fieldType.asPrimitiveType()); + } else { + GeoParquetGroup childGroup = group.getGroup(i, j); + writeGroup(childGroup, fieldType.asGroupType(), false); + } + recordConsumer.endField(fieldName, i); + } + } + if (!isRoot) { + recordConsumer.endGroup(); + } + } + + private void writePrimitive(Object value, PrimitiveType primitiveType) { + if (value == null) { + // The Parquet format does not support writing null values directly. + // If the field is optional and the value is null, we simply do not write it. + return; + } + switch (primitiveType.getPrimitiveTypeName()) { + case INT32: + recordConsumer.addInteger((Integer) value); + break; + case INT64: + recordConsumer.addLong((Long) value); + break; + case FLOAT: + recordConsumer.addFloat((Float) value); + break; + case DOUBLE: + recordConsumer.addDouble((Double) value); + break; + case BOOLEAN: + recordConsumer.addBoolean((Boolean) value); + break; + case BINARY, FIXED_LEN_BYTE_ARRAY: + recordConsumer.addBinary((Binary) value); + break; + default: + throw new GeoParquetException( + "Unsupported type: " + primitiveType.getPrimitiveTypeName()); + } + } + + private String serializeMetadata(GeoParquetMetadata metadata) { + try { + return objectMapper.writeValueAsString(metadata); + } catch (JsonProcessingException e) { + throw new GeoParquetException("Failed to serialize GeoParquet metadata", e); + } + } +} diff --git a/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriter.java b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriter.java new file mode 100644 index 000000000..e2e4292a8 --- /dev/null +++ b/baremaps-geoparquet/src/main/java/org/apache/baremaps/geoparquet/GeoParquetWriter.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.baremaps.geoparquet; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.conf.ParquetConfiguration; +import org.apache.parquet.hadoop.ParquetWriter; +import org.apache.parquet.hadoop.api.WriteSupport; +import org.apache.parquet.schema.MessageType; + +/** + * A writer for GeoParquet files that writes GeoParquetGroup instances to a Parquet file. + */ +public class GeoParquetWriter { + + private GeoParquetWriter() { + // Prevent instantiation + } + + public static Builder builder(Path file) { + return new Builder(file); + } + + public static class Builder + extends ParquetWriter.Builder { + + private MessageType type = null; + + private GeoParquetMetadata metadata = null; + + private Builder(Path file) { + super(file); + } + + /** + * Replace the message type with the specified one. + * + * @param type the message type + * @return the builder + */ + public GeoParquetWriter.Builder withType(MessageType type) { + this.type = type; + return this; + } + + /** + * Replace the metadata with the specified one. + * + * @param metadata the metadata + * @return the builder + */ + public GeoParquetWriter.Builder withGeoParquetMetadata(GeoParquetMetadata metadata) { + this.metadata = metadata; + return this; + } + + /** + * {@inheritDoc} + */ + @Override + protected WriteSupport getWriteSupport(Configuration conf) { + // We don't need access to the hadoop configuration for now + return getWriteSupport((ParquetConfiguration) null); + } + + /** + * {@inheritDoc} + */ + @Override + protected WriteSupport getWriteSupport(ParquetConfiguration conf) { + return new GeoParquetWriteSupport(type, metadata); + } + + /** + * {@inheritDoc} + */ + @Override + protected GeoParquetWriter.Builder self() { + return this; + } + } +} diff --git a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetReaderTest.java b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetReaderTest.java index 849720a3e..b7b73a9d5 100644 --- a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetReaderTest.java +++ b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetReaderTest.java @@ -19,8 +19,8 @@ import static org.junit.jupiter.api.Assertions.*; -import java.net.URI; import org.apache.baremaps.testing.TestFiles; +import org.apache.hadoop.fs.Path; import org.junit.jupiter.api.Test; import org.locationtech.jts.geom.Envelope; @@ -28,14 +28,14 @@ class GeoParquetReaderTest { @Test void read() { - URI geoParquet = TestFiles.GEOPARQUET.toUri(); + Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri()); GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet); assertEquals(5, geoParquetReader.read().count()); } @Test void readFiltered() { - URI geoParquet = TestFiles.GEOPARQUET.toUri(); + Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri()); GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet, new Envelope(-172, -65, 18, 72)); assertEquals(1, geoParquetReader.read().count()); @@ -43,21 +43,21 @@ void readFiltered() { @Test void size() { - URI geoParquet = TestFiles.GEOPARQUET.toUri(); + Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri()); GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet); assertEquals(5, geoParquetReader.size()); } @Test void count() { - URI geoParquet = TestFiles.GEOPARQUET.toUri(); + Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri()); GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet); assertEquals(5, geoParquetReader.read().count()); } @Test void validateSchemas() { - URI geoParquet = TestFiles.GEOPARQUET.toUri(); + Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri()); GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet); assertTrue(geoParquetReader.validateSchemasAreIdentical()); } diff --git a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetWriterTest.java b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetWriterTest.java new file mode 100644 index 000000000..f1c577fd7 --- /dev/null +++ b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/GeoParquetWriterTest.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.baremaps.geoparquet; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import org.apache.baremaps.geoparquet.GeoParquetMetadata.Column; +import org.apache.baremaps.testing.TestFiles; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.hadoop.ParquetWriter; +import org.apache.parquet.schema.LogicalTypeAnnotation; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; +import org.apache.parquet.schema.Types; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.GeometryFactory; +import org.locationtech.jts.geom.Point; + +class GeoParquetWriterTest { + + @Test + @Tag("integration") + void testWriteAndReadGeoParquet() throws IOException { + // Create the output file + Configuration conf = new Configuration(); + Path outputPath = new Path("target/test-output/geoparquet-test.parquet"); + + try { + // Define the Parquet schema + MessageType type = Types.buildMessage() + .required(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named("name") + .required(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named("city") + .optional(PrimitiveTypeName.BINARY).named("geometry") + .named("GeoParquetSchema"); + + // Create GeoParquet metadata + Map columns = new HashMap<>(); + columns.put("geometry", new GeoParquetMetadata.Column( + "WKB", + List.of("Point"), + null, + null, + null, + null)); + + GeoParquetMetadata metadata = new GeoParquetMetadata( + "1.0", + "geometry", + columns, + null, + null, + null, + null, + null, + null, + null); + + // Create a Point geometry + GeometryFactory geometryFactory = new GeometryFactory(); + Point point = geometryFactory.createPoint(new Coordinate(1.0, 2.0)); + + // Create the GeoParquetWriter + try (ParquetWriter writer = GeoParquetWriter.builder(outputPath) + .withType(type) + .withGeoParquetMetadata(metadata) + .build()) { + + // Create a GeoParquetGroup and write it + GeoParquetSchema geoParquetSchema = + GeoParquetGroupFactory.createGeoParquetSchema(type, metadata); + GeoParquetGroup group = + new GeoParquetGroup(type.asGroupType(), metadata, geoParquetSchema); + group.add("name", "Test Point"); + group.add("city", "Test City"); + group.add("geometry", point); + + // Write the group + writer.write(group); + } + + // Now read back the file using GeoParquetReader + GeoParquetReader reader = new GeoParquetReader(outputPath, null, conf); + GeoParquetGroup readGroup = reader.read().findFirst().orElse(null); + + assertNotNull(readGroup, "Read group should not be null"); + + // Verify the data + assertEquals("Test Point", readGroup.getStringValue("name")); + assertEquals("Test City", readGroup.getStringValue("city")); + + Point readPoint = (Point) readGroup.getGeometryValue("geometry"); + assertEquals(point.getX(), readPoint.getX(), 0.0001); + assertEquals(point.getY(), readPoint.getY(), 0.0001); + } finally { + outputPath.getFileSystem(conf).delete(outputPath, false); + } + } + + @Test + @Tag("integration") + void copyGeoParquetData() throws IOException { + Path geoParquet = new Path(TestFiles.GEOPARQUET.toUri()); + + Configuration conf = new Configuration(); + Path outputPath = new Path("target/test-output/geoparquet-copy.parquet"); + + try { + // Write the GeoParquet file + GeoParquetReader reader = new GeoParquetReader(geoParquet, null, conf); + GeoParquetWriter.Builder builder = GeoParquetWriter.builder(outputPath); + ParquetWriter writer = builder.withType(reader.getParquetSchema()) + .withGeoParquetMetadata(reader.getGeoParquetMetadata()).build(); + Iterator iterator = reader.read().iterator(); + while (iterator.hasNext()) { + writer.write(iterator.next()); + } + writer.close(); + + // Read the copied file + GeoParquetReader copiedReader = new GeoParquetReader(outputPath, null, conf); + assertEquals(5, copiedReader.read().count()); + } finally { + outputPath.getFileSystem(conf).delete(outputPath, false); + } + + + + } + +} diff --git a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/OvertureMapsTest.java b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/OvertureMapsTest.java index 0fb5b9205..206aa9f19 100644 --- a/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/OvertureMapsTest.java +++ b/baremaps-geoparquet/src/test/java/org/apache/baremaps/geoparquet/OvertureMapsTest.java @@ -20,8 +20,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.net.URI; -import java.net.URISyntaxException; +import org.apache.hadoop.fs.Path; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.locationtech.jts.geom.Envelope; @@ -30,9 +29,9 @@ class OvertureMapsTest { @Disabled("Requires access to the Internet") @Test - void countAddresses() throws URISyntaxException { - URI geoParquet = new URI( - "s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet"); + void countAddressesInSwitzerland() { + Path geoParquet = + new Path("s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet"); Envelope switzerland = new Envelope(6.02260949059, 10.4427014502, 45.7769477403, 47.8308275417); GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet, switzerland, OvertureMaps.configuration()); @@ -41,20 +40,9 @@ void countAddresses() throws URISyntaxException { @Disabled("Requires access to the Internet") @Test - void countAddressesInSwitzerland() throws URISyntaxException { - URI geoParquet = new URI( - "s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet"); - Envelope switzerland = new Envelope(6.02260949059, 10.4427014502, 45.7769477403, 47.8308275417); - GeoParquetReader geoParquetReader = - new GeoParquetReader(geoParquet, switzerland, OvertureMaps.configuration()); - assertEquals(10397434, geoParquetReader.readParallel().count()); - } - - @Disabled("Requires access to the Internet") - @Test - void validateSchemas() throws URISyntaxException { - URI geoParquet = new URI( - "s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet"); + void validateSchemas() { + Path geoParquet = + new Path("s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet"); GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet, null, OvertureMaps.configuration()); assertTrue(geoParquetReader.validateSchemasAreIdentical(), "Schemas are identical"); @@ -62,9 +50,9 @@ void validateSchemas() throws URISyntaxException { @Disabled("Requires access to the Internet") @Test - void size() throws URISyntaxException { - URI geoParquet = new URI( - "s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet"); + void size() { + Path geoParquet = + new Path("s3a://overturemaps-us-west-2/release/2024-09-18.0/theme=addresses/**/*.parquet"); GeoParquetReader geoParquetReader = new GeoParquetReader(geoParquet, null, OvertureMaps.configuration()); assertEquals(213535887L, geoParquetReader.size());