Skip to content

Commit

Permalink
fix for mbtiles apply-patch with raw bindiff (#1384)
Browse files Browse the repository at this point in the history
When applying raw (no gzip) patches, `mbtiles` was trying to un-gzip
them first. Now handles it properly.  Also adds a number of tests to catch these cases.
  • Loading branch information
nyurik authored Jun 26, 2024
1 parent 5d52ca2 commit b71c846
Show file tree
Hide file tree
Showing 25 changed files with 754 additions and 125 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ lambda-web = { version = "0.2.1", features = ["actix4"] }
libsqlite3-sys = { version = ">=0.27", features = ["bundled"] }
log = "0.4"
martin-tile-utils = { path = "./martin-tile-utils", version = "0.5.0" }
mbtiles = { path = "./mbtiles", version = "0.10.0" }
mbtiles = { path = "./mbtiles", version = "0.11.0" }
md5 = "0.7.0"
moka = { version = "0.12", features = ["future"] }
num_cpus = "1"
Expand Down
2 changes: 1 addition & 1 deletion mbtiles/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ lints.workspace = true

[package]
name = "mbtiles"
version = "0.10.0"
version = "0.11.0"
authors = ["Yuri Astrakhan <[email protected]>", "MapLibre contributors"]
description = "A simple low-level MbTiles access and processing library, with some tile format detection and other relevant heuristics."
keywords = ["mbtiles", "maps", "tiles", "mvt", "tilejson"]
Expand Down
17 changes: 8 additions & 9 deletions mbtiles/src/bin/mbtiles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use clap::{Parser, Subcommand};
use log::error;
use mbtiles::{
apply_patch, AggHashType, CopyDuplicateMode, CopyType, IntegrityCheckType, MbtResult,
MbtTypeCli, Mbtiles, MbtilesCopier, PatchType, UpdateZoomType,
MbtTypeCli, Mbtiles, MbtilesCopier, PatchTypeCli, UpdateZoomType,
};
use tilejson::Bounds;

Expand Down Expand Up @@ -116,8 +116,8 @@ pub struct CopyArgs {
#[arg(long, conflicts_with("diff_with_file"))]
apply_patch: Option<PathBuf>,
/// Specify the type of patch file to generate.
#[arg(long, requires("diff_with_file"), default_value_t=PatchType::default())]
patch_type: PatchType,
#[arg(long, requires("diff_with_file"), default_value_t=PatchTypeCli::default())]
patch_type: PatchTypeCli,
}

#[allow(clippy::doc_markdown)]
Expand All @@ -130,8 +130,8 @@ pub struct DiffArgs {
/// Output file to write the resulting difference to
diff: PathBuf,
/// Specify the type of patch file to generate.
#[arg(long, default_value_t=PatchType::default())]
patch_type: PatchType,
#[arg(long, default_value_t=PatchTypeCli::default())]
patch_type: PatchTypeCli,

#[command(flatten)]
pub options: SharedCopyOpts,
Expand Down Expand Up @@ -181,12 +181,12 @@ impl SharedCopyOpts {
dst_file: PathBuf,
diff_with_file: Option<PathBuf>,
apply_patch: Option<PathBuf>,
patch_type: PatchType,
patch_type: PatchTypeCli,
) -> MbtilesCopier {
MbtilesCopier {
src_file,
dst_file,
diff_with_file: diff_with_file.map(|p| (p, patch_type)),
diff_with_file: diff_with_file.map(|p| (p, patch_type.into())),
apply_patch,
// Shared
copy: self.copy,
Expand Down Expand Up @@ -329,7 +329,6 @@ mod tests {
use clap::error::ErrorKind;
use clap::Parser;
use mbtiles::CopyDuplicateMode;
use mbtiles::PatchType::Whole;

use super::*;
use crate::Commands::{ApplyPatch, Copy, Diff, MetaGetValue, MetaSetValue, Validate};
Expand Down Expand Up @@ -540,7 +539,7 @@ mod tests {
file1: PathBuf::from("file1.mbtiles"),
file2: PathBuf::from("file2.mbtiles"),
diff: PathBuf::from("../delta.mbtiles"),
patch_type: Whole,
patch_type: PatchTypeCli::Whole,
options: SharedCopyOpts {
on_duplicate: Some(CopyDuplicateMode::Override),
..Default::default()
Expand Down
74 changes: 58 additions & 16 deletions mbtiles/src/bindiff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,51 @@ use std::sync::atomic::Ordering::Relaxed;
use std::sync::Arc;
use std::time::Instant;

use enum_display::EnumDisplay;
use flume::{bounded, Receiver, Sender};
use futures::TryStreamExt;
use log::{debug, error, info};
use martin_tile_utils::{decode_brotli, decode_gzip, encode_brotli, encode_gzip, TileCoord};
use serde::{Deserialize, Serialize};
use sqlite_compressions::{BsdiffRawDiffer, Differ as _};
use sqlx::{query, Executor, Row, SqliteConnection};
use xxhash_rust::xxh3::xxh3_64;

use crate::MbtType::{Flat, FlatWithHash, Normalized};
use crate::PatchType::Whole;
use crate::{
create_bsdiffraw_tables, get_bsdiff_tbl_name, MbtError, MbtResult, MbtType, Mbtiles, PatchType,
};
use crate::PatchType::{BinDiffGz, BinDiffRaw};
use crate::{create_bsdiffraw_tables, get_bsdiff_tbl_name, MbtError, MbtResult, MbtType, Mbtiles};

#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, EnumDisplay)]
#[enum_display(case = "Kebab")]
#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
pub enum PatchTypeCli {
/// Patch file will contain the entire tile if it is different from the source
#[default]
Whole,
/// Use bin-diff to store only the bytes changed between two versions of each tile. Treats content as gzipped blobs, decoding them before diffing.
BinDiffGz,
/// Use bin-diff to store only the bytes changed between two versions of each tile. Treats content as blobs without any special encoding.
BinDiffRaw,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, EnumDisplay)]
#[enum_display(case = "Kebab")]
pub enum PatchType {
/// Use bin-diff to store only the bytes changed between two versions of each tile. Treats content as gzipped blobs, decoding them before diffing.
BinDiffGz,
/// Use bin-diff to store only the bytes changed between two versions of each tile. Treats content as blobs without any special encoding.
BinDiffRaw,
}

impl From<PatchTypeCli> for Option<PatchType> {
fn from(cli: PatchTypeCli) -> Self {
match cli {
PatchTypeCli::Whole => None,
PatchTypeCli::BinDiffGz => Some(BinDiffGz),
PatchTypeCli::BinDiffRaw => Some(BinDiffRaw),
}
}
}

pub trait BinDiffer<S: Send + 'static, T: Send + 'static>: Sized + Send + Sync + 'static {
fn query(
Expand Down Expand Up @@ -153,7 +185,6 @@ impl BinDiffDiffer {
dif_type: MbtType,
patch_type: PatchType,
) -> Self {
assert_ne!(patch_type, Whole, "Invalid for BinDiffDiffer");
let insert_sql = format!(
"INSERT INTO {}(zoom_level, tile_column, tile_row, patch_data, tile_xxh3_64_hash) VALUES (?, ?, ?, ?, ?)",
get_bsdiff_tbl_name(patch_type));
Expand Down Expand Up @@ -219,7 +250,7 @@ impl BinDiffer<DifferBefore, DifferAfter> for BinDiffDiffer {
fn process(&self, value: DifferBefore) -> MbtResult<DifferAfter> {
let mut old_tile = value.old_tile_data;
let mut new_tile = value.new_tile_data;
if self.patch_type == PatchType::BinDiffGz {
if self.patch_type == BinDiffGz {
old_tile = decode_gzip(&old_tile).inspect_err(|e| {
error!("Unable to gzip-decode source tile {:?}: {e}", value.coord);
})?;
Expand Down Expand Up @@ -258,14 +289,14 @@ impl BinDiffer<DifferBefore, DifferAfter> for BinDiffDiffer {

pub struct ApplierBefore {
coord: TileCoord,
tile_data: Vec<u8>,
old_tile: Vec<u8>,
patch_data: Vec<u8>,
uncompressed_tile_hash: u64,
}

pub struct ApplierAfter {
coord: TileCoord,
data: Vec<u8>,
new_tile: Vec<u8>,
new_tile_hash: String,
}

Expand Down Expand Up @@ -322,7 +353,7 @@ impl BinDiffer<ApplierBefore, ApplierAfter> for BinDiffPatcher {
x: row.get(1),
y: row.get(2),
},
tile_data: row.get(3),
old_tile: row.get(3),
patch_data: row.get(4),
#[allow(clippy::cast_sign_loss)]
uncompressed_tile_hash: row.get::<i64, _>(5) as u64,
Expand All @@ -336,12 +367,21 @@ impl BinDiffer<ApplierBefore, ApplierAfter> for BinDiffPatcher {
}

fn process(&self, value: ApplierBefore) -> MbtResult<ApplierAfter> {
let tile_data = decode_gzip(&value.tile_data)
.inspect_err(|e| error!("Unable to gzip-decode source tile {:?}: {e}", value.coord))?;
let old_tile = if self.patch_type == BinDiffGz {
decode_gzip(&value.old_tile).inspect_err(|e| {
error!("Unable to gzip-decode source tile {:?}: {e}", value.coord);
})?
} else {
value.old_tile
};

let patch_data = decode_brotli(&value.patch_data)
.inspect_err(|e| error!("Unable to brotli-decode patch data {:?}: {e}", value.coord))?;
let new_tile = BsdiffRawDiffer::patch(&tile_data, &patch_data)

let mut new_tile = BsdiffRawDiffer::patch(&old_tile, &patch_data)
.inspect_err(|e| error!("Unable to patch tile {:?}: {e}", value.coord))?;

// Verify the hash of the patched tile is what we expect
let new_tile_hash = xxh3_64(&new_tile);
if new_tile_hash != value.uncompressed_tile_hash {
return Err(MbtError::BinDiffIncorrectTileHash(
Expand All @@ -351,16 +391,18 @@ impl BinDiffer<ApplierBefore, ApplierAfter> for BinDiffPatcher {
));
}

let data = encode_gzip(&new_tile)?;
if self.patch_type == BinDiffGz {
new_tile = encode_gzip(&new_tile)?;
};

Ok(ApplierAfter {
coord: value.coord,
new_tile_hash: if self.dst_type == FlatWithHash {
format!("{:X}", md5::compute(&data))
format!("{:X}", md5::compute(&new_tile))
} else {
String::default() // This is a fast noop, no memory alloc is performed
},
data,
new_tile,
})
}

Expand All @@ -378,7 +420,7 @@ impl BinDiffer<ApplierBefore, ApplierAfter> for BinDiffPatcher {
.bind(value.coord.z)
.bind(value.coord.x)
.bind(value.coord.y)
.bind(value.data);
.bind(value.new_tile);

if self.dst_type == FlatWithHash {
q = q.bind(value.new_tile_hash);
Expand Down
Loading

0 comments on commit b71c846

Please sign in to comment.