Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add integrity and global hash check to validation #851

Merged
merged 2 commits into from
Aug 31, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_yaml = "0.9"
spreet = { version = "0.8", default-features = false }
sqlite-hashes = "0.2"
sqlite-hashes = "0.3"
sqlx = { version = "0.7", features = ["sqlite"] }
subst = { version = "0.2", features = ["yaml"] }
thiserror = "1"
Expand Down
33 changes: 27 additions & 6 deletions martin-mbtiles/src/bin/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use std::path::{Path, PathBuf};

use anyhow::Result;
use clap::{Parser, Subcommand};
use martin_mbtiles::{apply_mbtiles_diff, copy_mbtiles_file, Mbtiles, TileCopierOptions};
use martin_mbtiles::{
apply_mbtiles_diff, copy_mbtiles_file, IntegrityCheck, Mbtiles, TileCopierOptions,
};
use sqlx::sqlite::SqliteConnectOptions;
use sqlx::{Connection, SqliteConnection};

Expand Down Expand Up @@ -62,6 +64,12 @@ enum Commands {
Validate {
/// MBTiles file to validate
file: PathBuf,
/// Value to specify the extent of the SQLite integrity check performed
#[arg(long, value_enum, default_value_t=IntegrityCheck::default())]
integrity_check: IntegrityCheck,
/// Generate a hash of the tile data hashes and store under the 'global_hash' key in metadata
#[arg(long)]
generate_global_hash: bool,
},
}

Expand All @@ -85,8 +93,12 @@ async fn main() -> Result<()> {
} => {
apply_mbtiles_diff(src_file, diff_file).await?;
}
Commands::Validate { file } => {
validate_mbtiles(file.as_path()).await?;
Commands::Validate {
file,
integrity_check,
generate_global_hash,
} => {
validate_mbtiles(file.as_path(), integrity_check, generate_global_hash).await?;
}
}

Expand All @@ -111,11 +123,18 @@ async fn meta_set_value(file: &Path, key: &str, value: Option<String>) -> Result
Ok(())
}

async fn validate_mbtiles(file: &Path) -> Result<()> {
async fn validate_mbtiles(
file: &Path,
integrity_check: IntegrityCheck,
generate_global_hash: bool,
) -> Result<()> {
let mbt = Mbtiles::new(file)?;
let opt = SqliteConnectOptions::new().filename(file).read_only(true);
let mut conn = SqliteConnection::connect_with(&opt).await?;
mbt.validate_mbtiles(&mut conn).await?;
mbt.validate_mbtiles(integrity_check, &mut conn).await?;
if generate_global_hash {
mbt.generate_global_hash(&mut conn).await?;
}
Ok(())
}

Expand All @@ -127,8 +146,8 @@ mod tests {
use clap::Parser;
use martin_mbtiles::{CopyDuplicateMode, TileCopierOptions};

use crate::Args;
use crate::Commands::{ApplyDiff, Copy, MetaGetValue, MetaSetValue, Validate};
use crate::{Args, IntegrityCheck};

#[test]
fn test_copy_no_arguments() {
Expand Down Expand Up @@ -407,6 +426,8 @@ mod tests {
verbose: false,
command: Validate {
file: PathBuf::from("src_file"),
integrity_check: IntegrityCheck::Quick,
generate_global_hash: false
}
}
);
Expand Down
8 changes: 3 additions & 5 deletions martin-mbtiles/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ use std::path::PathBuf;
use martin_tile_utils::TileInfo;
use sqlite_hashes::rusqlite;

use crate::mbtiles::MbtType;

#[derive(thiserror::Error, Debug)]
pub enum MbtError {
#[error("SQL Error {0}")]
Expand All @@ -22,12 +20,12 @@ pub enum MbtError {
#[error("Invalid data format for MBTile file {0}")]
InvalidDataFormat(String),

#[error("Integrity check failed for MBTile file {0}")]
FailedIntegrityCheck(String),

#[error("Invalid tile data for MBTile file {0}")]
InvalidTileData(String),

#[error("Incorrect data format for MBTile file {0}; expected {1:?} and got {2:?}")]
IncorrectDataFormat(String, &'static [MbtType], MbtType),

#[error(r#"Filename "{0}" passed to SQLite must be valid UTF-8"#)]
InvalidFilenameType(PathBuf),

Expand Down
2 changes: 1 addition & 1 deletion martin-mbtiles/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ mod mbtiles_queries;
mod tile_copier;

pub use errors::MbtError;
pub use mbtiles::{Mbtiles, Metadata};
pub use mbtiles::{IntegrityCheck, Mbtiles, Metadata};
pub use mbtiles_pool::MbtilesPool;
pub use tile_copier::{
apply_mbtiles_diff, copy_mbtiles_file, CopyDuplicateMode, TileCopierOptions,
Expand Down
127 changes: 110 additions & 17 deletions martin-mbtiles/src/mbtiles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@ use sqlx::{query, Row, SqliteExecutor};
use tilejson::{tilejson, Bounds, Center, TileJSON};

use crate::errors::{MbtError, MbtResult};
use crate::mbtiles::MbtType::Flat;
use crate::mbtiles_queries::{
is_flat_tables_type, is_flat_with_hash_tables_type, is_normalized_tables_type,
};
use crate::MbtError::{IncorrectDataFormat, InvalidTileData};
use crate::MbtError::{FailedIntegrityCheck, InvalidTileData};

#[derive(Clone, Debug, PartialEq)]
pub struct Metadata {
Expand All @@ -42,6 +43,15 @@ pub enum MbtType {
Normalized,
}

#[derive(PartialEq, Eq, Default, Debug, Clone)]
#[cfg_attr(feature = "cli", derive(ValueEnum))]
pub enum IntegrityCheck {
Full,
#[default]
Quick,
Off,
}

#[derive(Clone, Debug)]
pub struct Mbtiles {
filepath: String,
Expand Down Expand Up @@ -378,32 +388,102 @@ impl Mbtiles {
Err(MbtError::NoUniquenessConstraint(self.filepath.clone()))
}

pub async fn validate_mbtiles<T>(&self, conn: &mut T) -> MbtResult<()>
pub async fn generate_global_hash<T>(&self, conn: &mut T) -> MbtResult<()>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
let mbttype = self.detect_type(&mut *conn).await?;

let sql = match mbttype {
MbtType::Flat => {
return Err(IncorrectDataFormat(
self.filepath().to_string(),
&[MbtType::FlatWithHash, MbtType::Normalized],
MbtType::Flat,
));
}
MbtType::FlatWithHash => {
"SELECT * FROM tiles_with_hash WHERE tile_hash!=hex(md5(tile_data)) LIMIT 1;"
let select_from = match mbttype {
Flat => {
println!("Cannot generate global hash, no hash column in flat table format. Skipping global_hash generation...");
return Ok(());
}
MbtType::Normalized => {
"SELECT * FROM images WHERE tile_id!=hex(md5(tile_data)) LIMIT 1;"
MbtType::FlatWithHash => "SELECT 'global_hash', hex(md5_concat(tile_hash)) FROM tiles_with_hash ORDER BY zoom_level, tile_column, tile_row;",
MbtType::Normalized => "SELECT 'global_hash', hex(md5_concat(images.tile_id)) FROM images JOIN map ON images.tile_id=map.tile_id ORDER BY zoom_level, tile_column, tile_row;"
upsicleclown marked this conversation as resolved.
Show resolved Hide resolved

}.to_string();

let rusqlite_conn = RusqliteConnection::open(Path::new(&self.filepath()))?;
register_md5_function(&rusqlite_conn)?;
rusqlite_conn.execute(
format!("INSERT OR REPLACE INTO metadata(name, value) {select_from}").as_str(),
upsicleclown marked this conversation as resolved.
Show resolved Hide resolved
[],
)?;
Ok(())
}

pub async fn validate_mbtiles<T>(
&self,
integrity_check: IntegrityCheck,
conn: &mut T,
) -> MbtResult<()>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
// SQLite Integrity check
if "ok"
!= match integrity_check {
IntegrityCheck::Full => query("PRAGMA integrity_check;")
.fetch_one(&mut *conn)
.await?
.get::<String, _>(0),
IntegrityCheck::Quick => query("PRAGMA integrity_check;")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
IntegrityCheck::Quick => query("PRAGMA integrity_check;")
IntegrityCheck::Quick => query("PRAGMA quick_check;")

.fetch_one(&mut *conn)
.await?
.get::<String, _>(0),
IntegrityCheck::Off => "ok".to_string(),
}
{
return Err(FailedIntegrityCheck(self.filepath().to_string()));
upsicleclown marked this conversation as resolved.
Show resolved Hide resolved
}

let mbttype = self.detect_type(&mut *conn).await?;

if mbttype == MbtType::Flat {
println!(
"No hash column in flat table format, skipping hash-based validation steps..."
);
return Ok(());
}
.to_string();

let rusqlite_conn = RusqliteConnection::open(Path::new(self.filepath()))?;
register_md5_function(&rusqlite_conn)?;

// Global hash check
if self
.get_metadata_value(&mut *conn, "global_hash")
.await?
.is_none()
{
println!(
upsicleclown marked this conversation as resolved.
Show resolved Hide resolved
"No value for 'global_hash' key found in metadata, skipping global hash validation step..."
);
} else {
let sql = if mbttype == MbtType::FlatWithHash {
upsicleclown marked this conversation as resolved.
Show resolved Hide resolved
"SELECT * FROM metadata
WHERE name='global_hash'
AND value!=(SELECT hex(md5_concat(tile_hash)) FROM tiles_with_hash ORDER BY zoom_level, tile_column, tile_row);"
} else {
"SELECT * FROM metadata
WHERE name='global_hash'
AND value!=(hex(md5_concat(images.tile_id)) FROM images JOIN map ON images.tile_id=map.tile_id ORDER BY zoom_level, tile_column, tile_row);"
}
.to_string();

if rusqlite_conn.prepare(&sql)?.exists(())? {
return Err(InvalidTileData(self.filepath().to_string()));
}
}

// Per-tile hash check
let sql = if mbttype == MbtType::FlatWithHash {
"SELECT * FROM tiles_with_hash WHERE tile_hash!=hex(md5(tile_data)) LIMIT 1;"
upsicleclown marked this conversation as resolved.
Show resolved Hide resolved
} else {
"SELECT * FROM images WHERE tile_id!=hex(md5(tile_data)) LIMIT 1;"
upsicleclown marked this conversation as resolved.
Show resolved Hide resolved
}
.to_string();
upsicleclown marked this conversation as resolved.
Show resolved Hide resolved

if rusqlite_conn.prepare(&sql)?.exists(())? {
return Err(InvalidTileData(self.filepath().to_string()));
}
Expand Down Expand Up @@ -569,7 +649,9 @@ mod tests {
async fn validate_valid_file() {
let (mut conn, mbt) = open("../tests/fixtures/files/zoomed_world_cities.mbtiles").await;

mbt.validate_mbtiles(&mut conn).await.unwrap();
mbt.validate_mbtiles(IntegrityCheck::Quick, &mut conn)
.await
.unwrap();
}

#[actix_rt::test]
Expand All @@ -578,8 +660,19 @@ mod tests {
open("../tests/fixtures/files/invalid_zoomed_world_cities.mbtiles").await;

assert!(matches!(
mbt.validate_mbtiles(&mut conn).await.unwrap_err(),
mbt.validate_mbtiles(IntegrityCheck::Quick, &mut conn)
.await
.unwrap_err(),
MbtError::InvalidTileData(..)
));
}

#[actix_rt::test]
async fn validate_file_with_global_hash() {
let (mut conn, mbt) = open("../tests/fixtures/files/zoomed_world_cities.mbtiles").await;

mbt.validate_mbtiles(IntegrityCheck::Quick, &mut conn)
.await
.unwrap();
}
}
4 changes: 2 additions & 2 deletions martin-mbtiles/src/tile_copier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ pub struct TileCopierOptions {
src_file: PathBuf,
/// MBTiles file to write to
dst_file: PathBuf,
/// TODO: add documentation Output format of the destination file, ignored if the file exists. if not specified, defaults to the type of source
/// Output format of the destination file, ignored if the file exists. if not specified, defaults to the type of source
upsicleclown marked this conversation as resolved.
Show resolved Hide resolved
#[cfg_attr(feature = "cli", arg(long, value_enum))]
dst_mbttype: Option<MbtType>,
/// Specify copying behaviour when tiles with duplicate (zoom_level, tile_column, tile_row) values are found
#[cfg_attr(feature = "cli", arg(long, value_enum, default_value_t = CopyDuplicateMode::Override))]
#[cfg_attr(feature = "cli", arg(long, value_enum, default_value_t = CopyDuplicateMode::default()))]
on_duplicate: CopyDuplicateMode,
/// Minimum zoom level to copy
#[cfg_attr(feature = "cli", arg(long, conflicts_with("zoom_levels")))]
Expand Down
Binary file modified tests/fixtures/files/zoomed_world_cities.mbtiles
Binary file not shown.
Loading