From b8920f6f56bef7e23191d9ab56a4f16006bf9dea Mon Sep 17 00:00:00 2001 From: Bertil Chapuis Date: Wed, 6 Dec 2023 00:49:08 +0100 Subject: [PATCH] Enable internal compression in pmtiles --- .../baremaps/tilestore/pmtiles/PMTiles.java | 164 ++++++++++-------- .../tilestore/pmtiles/PMTilesWriter.java | 23 ++- .../tilestore/pmtiles/PMTilesTest.java | 17 +- 3 files changed, 109 insertions(+), 95 deletions(-) diff --git a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java index e8bf2f304..38e8544ea 100644 --- a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java +++ b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java @@ -18,10 +18,13 @@ package org.apache.baremaps.tilestore.pmtiles; import com.google.common.io.LittleEndianDataInputStream; -import com.google.common.io.LittleEndianDataOutputStream; import com.google.common.math.LongMath; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.ArrayList; import java.util.List; @@ -31,35 +34,35 @@ public static long toNum(long low, long high) { return high * 0x100000000L + low; } - public static long readVarIntRemainder(LittleEndianDataInputStream input, long l) + public static long readVarIntRemainder(InputStream input, long l) throws IOException { long h, b; - b = input.readByte() & 0xff; + b = input.read() & 0xff; h = (b & 0x70) >> 4; if (b < 0x80) { return toNum(l, h); } - b = input.readByte() & 0xff; + b = input.read() & 0xff; h |= (b & 0x7f) << 3; if (b < 0x80) { return toNum(l, h); } - b = input.readByte() & 0xff; + b = input.read() & 0xff; h |= (b & 0x7f) << 10; if (b < 0x80) { return toNum(l, h); } - b = input.readByte() & 0xff; + b = input.read() & 0xff; h |= (b & 0x7f) << 17; if (b < 0x80) { return toNum(l, h); } - b = input.readByte() & 0xff; + b = input.read() & 0xff; h |= (b & 0x7f) << 24; if (b < 0x80) { return toNum(l, h); } - b = input.readByte() & 0xff; + b = input.read() & 0xff; h |= (b & 0x01) << 31; if (b < 0x80) { return toNum(l, h); @@ -67,36 +70,36 @@ public static long readVarIntRemainder(LittleEndianDataInputStream input, long l throw new RuntimeException("Expected varint not more than 10 bytes"); } - public static int writeVarInt(LittleEndianDataOutputStream output, long value) + public static int writeVarInt(OutputStream output, long value) throws IOException { int n = 1; while (value >= 0x80) { - output.writeByte((byte) (value | 0x80)); + output.write((byte) (value | 0x80)); value >>>= 7; n++; } - output.writeByte((byte) value); + output.write((byte) value); return n; } - public static long readVarInt(LittleEndianDataInputStream input) throws IOException { + public static long readVarInt(InputStream input) throws IOException { long val, b; - b = input.readByte() & 0xff; + b = input.read() & 0xff; val = b & 0x7f; if (b < 0x80) { return val; } - b = input.readByte() & 0xff; + b = input.read() & 0xff; val |= (b & 0x7f) << 7; if (b < 0x80) { return val; } - b = input.readByte() & 0xff; + b = input.read() & 0xff; val |= (b & 0x7f) << 14; if (b < 0x80) { return val; } - b = input.readByte() & 0xff; + b = input.read() & 0xff; val |= (b & 0x7f) << 21; if (b < 0x80) { return val; @@ -209,44 +212,47 @@ public static Header deserializeHeader(LittleEndianDataInputStream input) throws (double) input.readInt() / 10000000); } - public static void serializeHeader(LittleEndianDataOutputStream output, Header header) - throws IOException { - output.writeByte((byte) 0x50); - output.writeByte((byte) 0x4D); - output.writeByte((byte) 0x54); - output.writeByte((byte) 0x69); - output.writeByte((byte) 0x6C); - output.writeByte((byte) 0x65); - output.writeByte((byte) 0x73); - output.writeByte((byte) header.getSpecVersion()); - output.writeLong(header.getRootDirectoryOffset()); - output.writeLong(header.getRootDirectoryLength()); - output.writeLong(header.getJsonMetadataOffset()); - output.writeLong(header.getJsonMetadataLength()); - output.writeLong(header.getLeafDirectoryOffset()); - output.writeLong(header.getLeafDirectoryLength()); - output.writeLong(header.getTileDataOffset()); - output.writeLong(header.getTileDataLength()); - output.writeLong(header.getNumAddressedTiles()); - output.writeLong(header.getNumTileEntries()); - output.writeLong(header.getNumTileContents()); - output.writeByte((byte) (header.isClustered() ? 1 : 0)); - output.writeByte((byte) header.getInternalCompression().ordinal()); - output.writeByte((byte) header.getTileCompression().ordinal()); - output.writeByte((byte) header.getTileType().ordinal()); - output.writeByte((byte) header.getMinZoom()); - output.writeByte((byte) header.getMaxZoom()); - output.writeInt((int) (header.getMinLon() * 10000000)); - output.writeInt((int) (header.getMinLat() * 10000000)); - output.writeInt((int) (header.getMaxLon() * 10000000)); - output.writeInt((int) (header.getMaxLat() * 10000000)); - output.writeByte((byte) header.getCenterZoom()); - output.writeInt((int) (header.getCenterLon() * 10000000)); - output.writeInt((int) (header.getCenterLat() * 10000000)); + public static byte[] serializeHeader(Header header) { + var buffer = ByteBuffer.allocate(HEADER_SIZE_BYTES).order(ByteOrder.LITTLE_ENDIAN); + buffer.put((byte) 0x50); + buffer.put((byte) 0x4D); + buffer.put((byte) 0x54); + buffer.put((byte) 0x69); + buffer.put((byte) 0x6C); + buffer.put((byte) 0x65); + buffer.put((byte) 0x73); + buffer.put((byte) header.getSpecVersion()); + buffer.putLong(header.getRootDirectoryOffset()); + buffer.putLong(header.getRootDirectoryLength()); + buffer.putLong(header.getJsonMetadataOffset()); + buffer.putLong(header.getJsonMetadataLength()); + buffer.putLong(header.getLeafDirectoryOffset()); + buffer.putLong(header.getLeafDirectoryLength()); + buffer.putLong(header.getTileDataOffset()); + buffer.putLong(header.getTileDataLength()); + buffer.putLong(header.getNumAddressedTiles()); + buffer.putLong(header.getNumTileEntries()); + buffer.putLong(header.getNumTileContents()); + buffer.put((byte) (header.isClustered() ? 1 : 0)); + buffer.put((byte) header.getInternalCompression().ordinal()); + buffer.put((byte) header.getTileCompression().ordinal()); + buffer.put((byte) header.getTileType().ordinal()); + buffer.put((byte) header.getMinZoom()); + buffer.put((byte) header.getMaxZoom()); + buffer.putInt((int) (header.getMinLon() * 10000000)); + buffer.putInt((int) (header.getMinLat() * 10000000)); + buffer.putInt((int) (header.getMaxLon() * 10000000)); + buffer.putInt((int) (header.getMaxLat() * 10000000)); + buffer.put((byte) header.getCenterZoom()); + buffer.putInt((int) (header.getCenterLon() * 10000000)); + buffer.putInt((int) (header.getCenterLat() * 10000000)); + buffer.flip(); + return buffer.array(); } - public static void serializeEntries(LittleEndianDataOutputStream output, List entries) + public static void serializeEntries(OutputStream output, List entries) throws IOException { + var buffer = ByteBuffer.allocate(entries.size() * 48); writeVarInt(output, entries.size()); long lastId = 0; for (Entry entry : entries) { @@ -268,6 +274,8 @@ public static void serializeEntries(LittleEndianDataOutputStream output, List deserializeEntries(LittleEndianDataInputStream buffer) @@ -329,60 +337,62 @@ public static Entry findTile(List entries, long tileId) { return null; } - public static Directories buildRootLeaves(List entries, int leafSize) throws IOException { + public static Directories buildRootLeaves(List entries, int leafSize, + Compression compression) throws IOException { var rootEntries = new ArrayList(); var numLeaves = 0; byte[] leavesBytes; byte[] rootBytes; - try (var leavesOutput = new ByteArrayOutputStream(); - var leavesDataOutput = new LittleEndianDataOutputStream(leavesOutput)) { + try (var leavesOutput = new ByteArrayOutputStream()) { for (var i = 0; i < entries.size(); i += leafSize) { numLeaves++; var end = i + leafSize; if (i + leafSize > entries.size()) { end = entries.size(); } - var offset = leavesOutput.size(); - serializeEntries(leavesDataOutput, entries.subList(i, end)); - var length = leavesOutput.size(); - rootEntries.add(new Entry(entries.get(i).getTileId(), offset, length, 0)); + try (var leafOutput = new ByteArrayOutputStream()) { + try (var compressedLeafOutput = compression.compress(leafOutput)) { + serializeEntries(compressedLeafOutput, entries.subList(i, end)); + } + var length = leafOutput.size(); + rootEntries.add(new Entry(entries.get(i).getTileId(), offset, length, 0)); + leavesOutput.write(leafOutput.toByteArray()); + } } - leavesBytes = leavesOutput.toByteArray(); } - try (var rootOutput = new ByteArrayOutputStream(); - var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) { - serializeEntries(rootDataOutput, rootEntries); + try (var rootOutput = new ByteArrayOutputStream()) { + try (var compressedRootOutput = compression.compress(rootOutput)) { + serializeEntries(compressedRootOutput, rootEntries); + } rootBytes = rootOutput.toByteArray(); } return new Directories(rootBytes, leavesBytes, numLeaves); } - public static Directories optimizeDirectories(List entries, int targetRootLenght) + public static Directories optimizeDirectories(List entries, int targetRootLength, + Compression compression) throws IOException { if (entries.size() < 16384) { - byte[] rootBytes; - try (var rootOutput = new ByteArrayOutputStream(); - var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) { - serializeEntries(rootDataOutput, entries); - rootBytes = rootOutput.toByteArray(); - } - if (rootBytes.length <= targetRootLenght) { - return new Directories(rootBytes, new byte[] {}, 0); + try (var rootOutput = new ByteArrayOutputStream()) { + try (var compressedOutput = compression.compress(rootOutput)) { + serializeEntries(compressedOutput, entries); + } + byte[] rootBytes = rootOutput.toByteArray(); + if (rootBytes.length <= targetRootLength) { + return new Directories(rootBytes, new byte[] {}, 0); + } } } - double leafSize = (double) entries.size() / 3500; - if (leafSize < 4096) { - leafSize = 4096; - } + double leafSize = Math.max((double) entries.size() / 3500, 4096); for (;;) { - var directories = buildRootLeaves(entries, (int) leafSize); - if (directories.getRoot().length <= targetRootLenght) { + var directories = buildRootLeaves(entries, (int) leafSize, compression); + if (directories.getRoot().length <= targetRootLength) { return directories; } leafSize = leafSize * 1.2; diff --git a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java index f01aaf3ee..892a692d3 100644 --- a/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java +++ b/baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTilesWriter.java @@ -19,7 +19,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.hash.Hashing; -import com.google.common.io.LittleEndianDataOutputStream; +import java.io.ByteArrayOutputStream; import java.io.FileOutputStream; import java.io.IOException; import java.nio.file.Files; @@ -28,6 +28,8 @@ public class PMTilesWriter { + private Compression compression = Compression.Gzip; + private Path path; private Map metadata = new HashMap<>(); @@ -153,9 +155,16 @@ public void write() throws IOException { entries.sort(Comparator.comparingLong(Entry::getTileId)); } - var metadataBytes = new ObjectMapper().writeValueAsBytes(metadata); + var directories = PMTiles.optimizeDirectories(entries, 16247, compression); + + byte[] metadataBytes; + try (var metadataOutput = new ByteArrayOutputStream()) { + try (var compressedMetadataOutput = compression.compress(metadataOutput)) { + new ObjectMapper().writeValue(compressedMetadataOutput, metadata); + } + metadataBytes = metadataOutput.toByteArray(); + } - var directories = PMTiles.optimizeDirectories(entries, 16247); var rootOffset = 127; var rootLength = directories.getRoot().length; var metadataOffset = rootOffset + rootLength; @@ -172,8 +181,8 @@ public void write() throws IOException { header.setNumTileContents(numTiles); header.setClustered(true); - header.setInternalCompression(Compression.None); - header.setTileCompression(Compression.Gzip); + header.setInternalCompression(compression); + header.setTileCompression(compression); header.setTileType(TileType.mvt); header.setRootOffset(rootOffset); header.setRootLength(rootLength); @@ -194,8 +203,8 @@ public void write() throws IOException { header.setCenterLat(centerLat); header.setCenterLon(centerLon); - try (var output = new LittleEndianDataOutputStream(new FileOutputStream(path.toFile()))) { - PMTiles.serializeHeader(output, header); + try (var output = new FileOutputStream(path.toFile())) { + output.write(PMTiles.serializeHeader(header)); output.write(directories.getRoot()); output.write(metadataBytes); output.write(directories.getLeaves()); diff --git a/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java b/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java index e078d1d53..e06f8d038 100644 --- a/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java +++ b/baremaps-core/src/test/java/org/apache/baremaps/tilestore/pmtiles/PMTilesTest.java @@ -20,7 +20,6 @@ import static org.junit.jupiter.api.Assertions.*; import com.google.common.io.LittleEndianDataInputStream; -import com.google.common.io.LittleEndianDataOutputStream; import com.google.common.math.LongMath; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -59,15 +58,13 @@ void decodeVarInt() throws IOException { void encodeVarInt() throws IOException { for (long i = 0; i < 1000; i++) { var array = new ByteArrayOutputStream(); - var output = new LittleEndianDataOutputStream(array); - PMTiles.writeVarInt(output, i); + PMTiles.writeVarInt(array, i); var input = new LittleEndianDataInputStream(new ByteArrayInputStream(array.toByteArray())); assertEquals(i, PMTiles.readVarInt(input)); } for (long i = Long.MAX_VALUE - 1000; i < Long.MAX_VALUE; i++) { var array = new ByteArrayOutputStream(); - var output = new LittleEndianDataOutputStream(array); - PMTiles.writeVarInt(output, i); + PMTiles.writeVarInt(array, i); var input = new LittleEndianDataInputStream(new ByteArrayInputStream(array.toByteArray())); assertEquals(i, PMTiles.readVarInt(input)); } @@ -190,9 +187,7 @@ void encodeHeader() throws IOException { 0); var array = new ByteArrayOutputStream(); - - var output = new LittleEndianDataOutputStream(array); - PMTiles.serializeHeader(output, header); + array.write(PMTiles.serializeHeader(header)); var input = new LittleEndianDataInputStream(new ByteArrayInputStream(array.toByteArray())); var header2 = PMTiles.deserializeHeader(input); @@ -259,7 +254,7 @@ void leafSearch() { @Test void buildRootLeaves() throws IOException { var entries = List.of(new Entry(100, 1, 1, 0)); - var directories = PMTiles.buildRootLeaves(entries, 1); + var directories = PMTiles.buildRootLeaves(entries, 1, Compression.None); assertEquals(directories.getNumLeaves(), 1); } @@ -269,7 +264,7 @@ void optimizeDirectories() throws IOException { var random = new Random(3857); var entries = new ArrayList(); entries.add(new Entry(0, 0, 100, 1)); - var directories = PMTiles.optimizeDirectories(entries, 100); + var directories = PMTiles.optimizeDirectories(entries, 100, Compression.None); assertFalse(directories.getLeaves().length > 0); assertEquals(0, directories.getNumLeaves()); @@ -280,7 +275,7 @@ void optimizeDirectories() throws IOException { entries.add(new Entry(i, offset, randTileSize, 1)); offset += randTileSize; } - directories = PMTiles.optimizeDirectories(entries, 1024); + directories = PMTiles.optimizeDirectories(entries, 1024, Compression.None); assertFalse(directories.getRoot().length > 1024); assertFalse(directories.getNumLeaves() == 0); assertFalse(directories.getLeaves().length == 0);