Skip to content

Commit

Permalink
Enable internal compression in pmtiles (#811)
Browse files Browse the repository at this point in the history
* Enable internal compression in pmtiles

* Use buffer instead of little endian stream when deserializing headers
  • Loading branch information
bchapuis authored Dec 9, 2023
1 parent f4b2820 commit b56b325
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 124 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@

package org.apache.baremaps.tilestore.pmtiles;

import com.google.common.io.LittleEndianDataInputStream;
import com.google.common.io.LittleEndianDataOutputStream;
import com.google.common.math.LongMath;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.List;

Expand All @@ -31,72 +33,72 @@ public static long toNum(long low, long high) {
return high * 0x100000000L + low;
}

public static long readVarIntRemainder(LittleEndianDataInputStream input, long l)
public static long readVarIntRemainder(InputStream input, long l)
throws IOException {
long h, b;
b = input.readByte() & 0xff;
b = input.read() & 0xff;
h = (b & 0x70) >> 4;
if (b < 0x80) {
return toNum(l, h);
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
h |= (b & 0x7f) << 3;
if (b < 0x80) {
return toNum(l, h);
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
h |= (b & 0x7f) << 10;
if (b < 0x80) {
return toNum(l, h);
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
h |= (b & 0x7f) << 17;
if (b < 0x80) {
return toNum(l, h);
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
h |= (b & 0x7f) << 24;
if (b < 0x80) {
return toNum(l, h);
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
h |= (b & 0x01) << 31;
if (b < 0x80) {
return toNum(l, h);
}
throw new RuntimeException("Expected varint not more than 10 bytes");
}

public static int writeVarInt(LittleEndianDataOutputStream output, long value)
public static int writeVarInt(OutputStream output, long value)
throws IOException {
int n = 1;
while (value >= 0x80) {
output.writeByte((byte) (value | 0x80));
output.write((byte) (value | 0x80));
value >>>= 7;
n++;
}
output.writeByte((byte) value);
output.write((byte) value);
return n;
}

public static long readVarInt(LittleEndianDataInputStream input) throws IOException {
public static long readVarInt(InputStream input) throws IOException {
long val, b;
b = input.readByte() & 0xff;
b = input.read() & 0xff;
val = b & 0x7f;
if (b < 0x80) {
return val;
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
val |= (b & 0x7f) << 7;
if (b < 0x80) {
return val;
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
val |= (b & 0x7f) << 14;
if (b < 0x80) {
return val;
}
b = input.readByte() & 0xff;
b = input.read() & 0xff;
val |= (b & 0x7f) << 21;
if (b < 0x80) {
return val;
Expand Down Expand Up @@ -179,74 +181,83 @@ public static long[] tileIdToZxy(long i) {

private static final int HEADER_SIZE_BYTES = 127;

public static Header deserializeHeader(LittleEndianDataInputStream input) throws IOException {
input.skipBytes(7);
public static Header deserializeHeader(InputStream input) throws IOException {
byte[] bytes = new byte[HEADER_SIZE_BYTES];
var num = input.read(bytes);
if (num != HEADER_SIZE_BYTES) {
throw new IOException("Invalid header size");
}
var buffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN);
buffer.position(7);
return new Header(
input.readByte(),
input.readLong(),
input.readLong(),
input.readLong(),
input.readLong(),
input.readLong(),
input.readLong(),
input.readLong(),
input.readLong(),
input.readLong(),
input.readLong(),
input.readLong(),
input.readByte() == 1,
Compression.values()[input.readByte()],
Compression.values()[input.readByte()],
TileType.values()[input.readByte()],
input.readByte(),
input.readByte(),
(double) input.readInt() / 10000000,
(double) input.readInt() / 10000000,
(double) input.readInt() / 10000000,
(double) input.readInt() / 10000000,
input.readByte(),
(double) input.readInt() / 10000000,
(double) input.readInt() / 10000000);
buffer.get(),
buffer.getLong(),
buffer.getLong(),
buffer.getLong(),
buffer.getLong(),
buffer.getLong(),
buffer.getLong(),
buffer.getLong(),
buffer.getLong(),
buffer.getLong(),
buffer.getLong(),
buffer.getLong(),
buffer.get() == 1,
Compression.values()[buffer.get()],
Compression.values()[buffer.get()],
TileType.values()[buffer.get()],
buffer.get(),
buffer.get(),
(double) buffer.getInt() / 10000000,
(double) buffer.getInt() / 10000000,
(double) buffer.getInt() / 10000000,
(double) buffer.getInt() / 10000000,
buffer.get(),
(double) buffer.getInt() / 10000000,
(double) buffer.getInt() / 10000000);
}

public static void serializeHeader(LittleEndianDataOutputStream output, Header header)
throws IOException {
output.writeByte((byte) 0x50);
output.writeByte((byte) 0x4D);
output.writeByte((byte) 0x54);
output.writeByte((byte) 0x69);
output.writeByte((byte) 0x6C);
output.writeByte((byte) 0x65);
output.writeByte((byte) 0x73);
output.writeByte((byte) header.getSpecVersion());
output.writeLong(header.getRootDirectoryOffset());
output.writeLong(header.getRootDirectoryLength());
output.writeLong(header.getJsonMetadataOffset());
output.writeLong(header.getJsonMetadataLength());
output.writeLong(header.getLeafDirectoryOffset());
output.writeLong(header.getLeafDirectoryLength());
output.writeLong(header.getTileDataOffset());
output.writeLong(header.getTileDataLength());
output.writeLong(header.getNumAddressedTiles());
output.writeLong(header.getNumTileEntries());
output.writeLong(header.getNumTileContents());
output.writeByte((byte) (header.isClustered() ? 1 : 0));
output.writeByte((byte) header.getInternalCompression().ordinal());
output.writeByte((byte) header.getTileCompression().ordinal());
output.writeByte((byte) header.getTileType().ordinal());
output.writeByte((byte) header.getMinZoom());
output.writeByte((byte) header.getMaxZoom());
output.writeInt((int) (header.getMinLon() * 10000000));
output.writeInt((int) (header.getMinLat() * 10000000));
output.writeInt((int) (header.getMaxLon() * 10000000));
output.writeInt((int) (header.getMaxLat() * 10000000));
output.writeByte((byte) header.getCenterZoom());
output.writeInt((int) (header.getCenterLon() * 10000000));
output.writeInt((int) (header.getCenterLat() * 10000000));
public static byte[] serializeHeader(Header header) {
var buffer = ByteBuffer.allocate(HEADER_SIZE_BYTES).order(ByteOrder.LITTLE_ENDIAN);
buffer.put((byte) 0x50);
buffer.put((byte) 0x4D);
buffer.put((byte) 0x54);
buffer.put((byte) 0x69);
buffer.put((byte) 0x6C);
buffer.put((byte) 0x65);
buffer.put((byte) 0x73);
buffer.put((byte) header.getSpecVersion());
buffer.putLong(header.getRootDirectoryOffset());
buffer.putLong(header.getRootDirectoryLength());
buffer.putLong(header.getJsonMetadataOffset());
buffer.putLong(header.getJsonMetadataLength());
buffer.putLong(header.getLeafDirectoryOffset());
buffer.putLong(header.getLeafDirectoryLength());
buffer.putLong(header.getTileDataOffset());
buffer.putLong(header.getTileDataLength());
buffer.putLong(header.getNumAddressedTiles());
buffer.putLong(header.getNumTileEntries());
buffer.putLong(header.getNumTileContents());
buffer.put((byte) (header.isClustered() ? 1 : 0));
buffer.put((byte) header.getInternalCompression().ordinal());
buffer.put((byte) header.getTileCompression().ordinal());
buffer.put((byte) header.getTileType().ordinal());
buffer.put((byte) header.getMinZoom());
buffer.put((byte) header.getMaxZoom());
buffer.putInt((int) (header.getMinLon() * 10000000));
buffer.putInt((int) (header.getMinLat() * 10000000));
buffer.putInt((int) (header.getMaxLon() * 10000000));
buffer.putInt((int) (header.getMaxLat() * 10000000));
buffer.put((byte) header.getCenterZoom());
buffer.putInt((int) (header.getCenterLon() * 10000000));
buffer.putInt((int) (header.getCenterLat() * 10000000));
buffer.flip();
return buffer.array();
}

public static void serializeEntries(LittleEndianDataOutputStream output, List<Entry> entries)
public static void serializeEntries(OutputStream output, List<Entry> entries)
throws IOException {
var buffer = ByteBuffer.allocate(entries.size() * 48);
writeVarInt(output, entries.size());
long lastId = 0;
for (Entry entry : entries) {
Expand All @@ -268,9 +279,11 @@ public static void serializeEntries(LittleEndianDataOutputStream output, List<En
writeVarInt(output, entry.getOffset() + 1);
}
}
buffer.flip();
output.write(buffer.array(), 0, buffer.limit());
}

public static List<Entry> deserializeEntries(LittleEndianDataInputStream buffer)
public static List<Entry> deserializeEntries(InputStream buffer)
throws IOException {
long numEntries = readVarInt(buffer);
List<Entry> entries = new ArrayList<>((int) numEntries);
Expand Down Expand Up @@ -329,60 +342,62 @@ public static Entry findTile(List<Entry> entries, long tileId) {
return null;
}

public static Directories buildRootLeaves(List<Entry> entries, int leafSize) throws IOException {
public static Directories buildRootLeaves(List<Entry> entries, int leafSize,
Compression compression) throws IOException {
var rootEntries = new ArrayList<Entry>();
var numLeaves = 0;
byte[] leavesBytes;
byte[] rootBytes;

try (var leavesOutput = new ByteArrayOutputStream();
var leavesDataOutput = new LittleEndianDataOutputStream(leavesOutput)) {
try (var leavesOutput = new ByteArrayOutputStream()) {
for (var i = 0; i < entries.size(); i += leafSize) {
numLeaves++;
var end = i + leafSize;
if (i + leafSize > entries.size()) {
end = entries.size();
}

var offset = leavesOutput.size();
serializeEntries(leavesDataOutput, entries.subList(i, end));
var length = leavesOutput.size();
rootEntries.add(new Entry(entries.get(i).getTileId(), offset, length, 0));
try (var leafOutput = new ByteArrayOutputStream()) {
try (var compressedLeafOutput = compression.compress(leafOutput)) {
serializeEntries(compressedLeafOutput, entries.subList(i, end));
}
var length = leafOutput.size();
rootEntries.add(new Entry(entries.get(i).getTileId(), offset, length, 0));
leavesOutput.write(leafOutput.toByteArray());
}
}

leavesBytes = leavesOutput.toByteArray();
}

try (var rootOutput = new ByteArrayOutputStream();
var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) {
serializeEntries(rootDataOutput, rootEntries);
try (var rootOutput = new ByteArrayOutputStream()) {
try (var compressedRootOutput = compression.compress(rootOutput)) {
serializeEntries(compressedRootOutput, rootEntries);
}
rootBytes = rootOutput.toByteArray();
}

return new Directories(rootBytes, leavesBytes, numLeaves);
}

public static Directories optimizeDirectories(List<Entry> entries, int targetRootLenght)
public static Directories optimizeDirectories(List<Entry> entries, int targetRootLength,
Compression compression)
throws IOException {
if (entries.size() < 16384) {
byte[] rootBytes;
try (var rootOutput = new ByteArrayOutputStream();
var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) {
serializeEntries(rootDataOutput, entries);
rootBytes = rootOutput.toByteArray();
}
if (rootBytes.length <= targetRootLenght) {
return new Directories(rootBytes, new byte[] {}, 0);
try (var rootOutput = new ByteArrayOutputStream()) {
try (var compressedOutput = compression.compress(rootOutput)) {
serializeEntries(compressedOutput, entries);
}
byte[] rootBytes = rootOutput.toByteArray();
if (rootBytes.length <= targetRootLength) {
return new Directories(rootBytes, new byte[] {}, 0);
}
}
}

double leafSize = (double) entries.size() / 3500;
if (leafSize < 4096) {
leafSize = 4096;
}
double leafSize = Math.max((double) entries.size() / 3500, 4096);
for (;;) {
var directories = buildRootLeaves(entries, (int) leafSize);
if (directories.getRoot().length <= targetRootLenght) {
var directories = buildRootLeaves(entries, (int) leafSize, compression);
if (directories.getRoot().length <= targetRootLength) {
return directories;
}
leafSize = leafSize * 1.2;
Expand Down
Loading

0 comments on commit b56b325

Please sign in to comment.