Skip to content

Commit

Permalink
Enable internal compression in pmtiles
Browse files Browse the repository at this point in the history
  • Loading branch information
bchapuis committed Dec 5, 2023
1 parent f4b2820 commit b8920f6
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 95 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,13 @@
package org.apache.baremaps.tilestore.pmtiles;

import com.google.common.io.LittleEndianDataInputStream;
import com.google.common.io.LittleEndianDataOutputStream;
import com.google.common.math.LongMath;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.List;

Expand All @@ -31,72 +34,72 @@ public static long toNum(long low, long high) {
return high * 0x100000000L + low;
}

public static long readVarIntRemainder(LittleEndianDataInputStream input, long l)
public static long readVarIntRemainder(InputStream input, long l)
throws IOException {
long h, b;
b = input.readByte() & 0xff;
b = input.read() & 0xff;
h = (b & 0x70) >> 4;
if (b < 0x80) {
return toNum(l, h);
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
h |= (b & 0x7f) << 3;
if (b < 0x80) {
return toNum(l, h);
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
h |= (b & 0x7f) << 10;
if (b < 0x80) {
return toNum(l, h);
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
h |= (b & 0x7f) << 17;
if (b < 0x80) {
return toNum(l, h);
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
h |= (b & 0x7f) << 24;
if (b < 0x80) {
return toNum(l, h);
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
h |= (b & 0x01) << 31;
if (b < 0x80) {
return toNum(l, h);
}
throw new RuntimeException("Expected varint not more than 10 bytes");
}

public static int writeVarInt(LittleEndianDataOutputStream output, long value)
public static int writeVarInt(OutputStream output, long value)
throws IOException {
int n = 1;
while (value >= 0x80) {
output.writeByte((byte) (value | 0x80));
output.write((byte) (value | 0x80));
value >>>= 7;
n++;
}
output.writeByte((byte) value);
output.write((byte) value);
return n;
}

public static long readVarInt(LittleEndianDataInputStream input) throws IOException {
public static long readVarInt(InputStream input) throws IOException {
long val, b;
b = input.readByte() & 0xff;
b = input.read() & 0xff;
val = b & 0x7f;
if (b < 0x80) {
return val;
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
val |= (b & 0x7f) << 7;
if (b < 0x80) {
return val;
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
val |= (b & 0x7f) << 14;
if (b < 0x80) {
return val;
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
val |= (b & 0x7f) << 21;
if (b < 0x80) {
return val;
Expand Down Expand Up @@ -209,44 +212,47 @@ public static Header deserializeHeader(LittleEndianDataInputStream input) throws
(double) input.readInt() / 10000000);
}

public static void serializeHeader(LittleEndianDataOutputStream output, Header header)
throws IOException {
output.writeByte((byte) 0x50);
output.writeByte((byte) 0x4D);
output.writeByte((byte) 0x54);
output.writeByte((byte) 0x69);
output.writeByte((byte) 0x6C);
output.writeByte((byte) 0x65);
output.writeByte((byte) 0x73);
output.writeByte((byte) header.getSpecVersion());
output.writeLong(header.getRootDirectoryOffset());
output.writeLong(header.getRootDirectoryLength());
output.writeLong(header.getJsonMetadataOffset());
output.writeLong(header.getJsonMetadataLength());
output.writeLong(header.getLeafDirectoryOffset());
output.writeLong(header.getLeafDirectoryLength());
output.writeLong(header.getTileDataOffset());
output.writeLong(header.getTileDataLength());
output.writeLong(header.getNumAddressedTiles());
output.writeLong(header.getNumTileEntries());
output.writeLong(header.getNumTileContents());
output.writeByte((byte) (header.isClustered() ? 1 : 0));
output.writeByte((byte) header.getInternalCompression().ordinal());
output.writeByte((byte) header.getTileCompression().ordinal());
output.writeByte((byte) header.getTileType().ordinal());
output.writeByte((byte) header.getMinZoom());
output.writeByte((byte) header.getMaxZoom());
output.writeInt((int) (header.getMinLon() * 10000000));
output.writeInt((int) (header.getMinLat() * 10000000));
output.writeInt((int) (header.getMaxLon() * 10000000));
output.writeInt((int) (header.getMaxLat() * 10000000));
output.writeByte((byte) header.getCenterZoom());
output.writeInt((int) (header.getCenterLon() * 10000000));
output.writeInt((int) (header.getCenterLat() * 10000000));
public static byte[] serializeHeader(Header header) {
var buffer = ByteBuffer.allocate(HEADER_SIZE_BYTES).order(ByteOrder.LITTLE_ENDIAN);
buffer.put((byte) 0x50);
buffer.put((byte) 0x4D);
buffer.put((byte) 0x54);
buffer.put((byte) 0x69);
buffer.put((byte) 0x6C);
buffer.put((byte) 0x65);
buffer.put((byte) 0x73);
buffer.put((byte) header.getSpecVersion());
buffer.putLong(header.getRootDirectoryOffset());
buffer.putLong(header.getRootDirectoryLength());
buffer.putLong(header.getJsonMetadataOffset());
buffer.putLong(header.getJsonMetadataLength());
buffer.putLong(header.getLeafDirectoryOffset());
buffer.putLong(header.getLeafDirectoryLength());
buffer.putLong(header.getTileDataOffset());
buffer.putLong(header.getTileDataLength());
buffer.putLong(header.getNumAddressedTiles());
buffer.putLong(header.getNumTileEntries());
buffer.putLong(header.getNumTileContents());
buffer.put((byte) (header.isClustered() ? 1 : 0));
buffer.put((byte) header.getInternalCompression().ordinal());
buffer.put((byte) header.getTileCompression().ordinal());
buffer.put((byte) header.getTileType().ordinal());
buffer.put((byte) header.getMinZoom());
buffer.put((byte) header.getMaxZoom());
buffer.putInt((int) (header.getMinLon() * 10000000));
buffer.putInt((int) (header.getMinLat() * 10000000));
buffer.putInt((int) (header.getMaxLon() * 10000000));
buffer.putInt((int) (header.getMaxLat() * 10000000));
buffer.put((byte) header.getCenterZoom());
buffer.putInt((int) (header.getCenterLon() * 10000000));
buffer.putInt((int) (header.getCenterLat() * 10000000));
buffer.flip();
return buffer.array();
}

public static void serializeEntries(LittleEndianDataOutputStream output, List<Entry> entries)
public static void serializeEntries(OutputStream output, List<Entry> entries)
throws IOException {
var buffer = ByteBuffer.allocate(entries.size() * 48);
writeVarInt(output, entries.size());
long lastId = 0;
for (Entry entry : entries) {
Expand All @@ -268,6 +274,8 @@ public static void serializeEntries(LittleEndianDataOutputStream output, List<En
writeVarInt(output, entry.getOffset() + 1);
}
}
buffer.flip();
output.write(buffer.array(), 0, buffer.limit());
}

public static List<Entry> deserializeEntries(LittleEndianDataInputStream buffer)
Expand Down Expand Up @@ -329,60 +337,62 @@ public static Entry findTile(List<Entry> entries, long tileId) {
return null;
}

public static Directories buildRootLeaves(List<Entry> entries, int leafSize) throws IOException {
public static Directories buildRootLeaves(List<Entry> entries, int leafSize,
Compression compression) throws IOException {
var rootEntries = new ArrayList<Entry>();
var numLeaves = 0;
byte[] leavesBytes;
byte[] rootBytes;

try (var leavesOutput = new ByteArrayOutputStream();
var leavesDataOutput = new LittleEndianDataOutputStream(leavesOutput)) {
try (var leavesOutput = new ByteArrayOutputStream()) {
for (var i = 0; i < entries.size(); i += leafSize) {
numLeaves++;
var end = i + leafSize;
if (i + leafSize > entries.size()) {
end = entries.size();
}

var offset = leavesOutput.size();
serializeEntries(leavesDataOutput, entries.subList(i, end));
var length = leavesOutput.size();
rootEntries.add(new Entry(entries.get(i).getTileId(), offset, length, 0));
try (var leafOutput = new ByteArrayOutputStream()) {
try (var compressedLeafOutput = compression.compress(leafOutput)) {
serializeEntries(compressedLeafOutput, entries.subList(i, end));
}
var length = leafOutput.size();
rootEntries.add(new Entry(entries.get(i).getTileId(), offset, length, 0));
leavesOutput.write(leafOutput.toByteArray());
}
}

leavesBytes = leavesOutput.toByteArray();
}

try (var rootOutput = new ByteArrayOutputStream();
var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) {
serializeEntries(rootDataOutput, rootEntries);
try (var rootOutput = new ByteArrayOutputStream()) {
try (var compressedRootOutput = compression.compress(rootOutput)) {
serializeEntries(compressedRootOutput, rootEntries);
}
rootBytes = rootOutput.toByteArray();
}

return new Directories(rootBytes, leavesBytes, numLeaves);
}

public static Directories optimizeDirectories(List<Entry> entries, int targetRootLenght)
public static Directories optimizeDirectories(List<Entry> entries, int targetRootLength,
Compression compression)
throws IOException {
if (entries.size() < 16384) {
byte[] rootBytes;
try (var rootOutput = new ByteArrayOutputStream();
var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) {
serializeEntries(rootDataOutput, entries);
rootBytes = rootOutput.toByteArray();
}
if (rootBytes.length <= targetRootLenght) {
return new Directories(rootBytes, new byte[] {}, 0);
try (var rootOutput = new ByteArrayOutputStream()) {
try (var compressedOutput = compression.compress(rootOutput)) {
serializeEntries(compressedOutput, entries);
}
byte[] rootBytes = rootOutput.toByteArray();
if (rootBytes.length <= targetRootLength) {
return new Directories(rootBytes, new byte[] {}, 0);
}
}
}

double leafSize = (double) entries.size() / 3500;
if (leafSize < 4096) {
leafSize = 4096;
}
double leafSize = Math.max((double) entries.size() / 3500, 4096);
for (;;) {
var directories = buildRootLeaves(entries, (int) leafSize);
if (directories.getRoot().length <= targetRootLenght) {
var directories = buildRootLeaves(entries, (int) leafSize, compression);
if (directories.getRoot().length <= targetRootLength) {
return directories;
}
leafSize = leafSize * 1.2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.hash.Hashing;
import com.google.common.io.LittleEndianDataOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
Expand All @@ -28,6 +28,8 @@

public class PMTilesWriter {

private Compression compression = Compression.Gzip;

private Path path;

private Map<String, Object> metadata = new HashMap<>();
Expand Down Expand Up @@ -153,9 +155,16 @@ public void write() throws IOException {
entries.sort(Comparator.comparingLong(Entry::getTileId));
}

var metadataBytes = new ObjectMapper().writeValueAsBytes(metadata);
var directories = PMTiles.optimizeDirectories(entries, 16247, compression);

byte[] metadataBytes;
try (var metadataOutput = new ByteArrayOutputStream()) {
try (var compressedMetadataOutput = compression.compress(metadataOutput)) {
new ObjectMapper().writeValue(compressedMetadataOutput, metadata);
}
metadataBytes = metadataOutput.toByteArray();
}

var directories = PMTiles.optimizeDirectories(entries, 16247);
var rootOffset = 127;
var rootLength = directories.getRoot().length;
var metadataOffset = rootOffset + rootLength;
Expand All @@ -172,8 +181,8 @@ public void write() throws IOException {
header.setNumTileContents(numTiles);
header.setClustered(true);

header.setInternalCompression(Compression.None);
header.setTileCompression(Compression.Gzip);
header.setInternalCompression(compression);
header.setTileCompression(compression);
header.setTileType(TileType.mvt);
header.setRootOffset(rootOffset);
header.setRootLength(rootLength);
Expand All @@ -194,8 +203,8 @@ public void write() throws IOException {
header.setCenterLat(centerLat);
header.setCenterLon(centerLon);

try (var output = new LittleEndianDataOutputStream(new FileOutputStream(path.toFile()))) {
PMTiles.serializeHeader(output, header);
try (var output = new FileOutputStream(path.toFile())) {
output.write(PMTiles.serializeHeader(header));
output.write(directories.getRoot());
output.write(metadataBytes);
output.write(directories.getLeaves());
Expand Down
Loading

0 comments on commit b8920f6

Please sign in to comment.