Skip to content

Commit

Permalink
checksum: add FxHash and GxHash based checksum (#58)
Browse files Browse the repository at this point in the history
* checksum: add fxhash

This is just to test out the performance compared to the xxhash we've been
using until now.  Early measurements with 4M blobs have shown that it could be
worth experimenting with fxhash which is used in the rust compiler.

* checksum: structure module

* checksum: fix typo

* storage_pool: remove obsolete in_memory module

Noticed this while grepping for XxHash, seemed to have evaded the cleaning
process some generations ago.

* superblock: remove dependency on XxHash

* checksum: add gxhash

This commit required modifying the build context to allow for the AES
optimizations of GxHash. It should not prove to be an issue on the system we
use (x86-64 and maybe ARM64) which I've tested before this commit.

* checksum: remove potential ambiguous module ref

* betree: move .cargo config to workspace level

* checksum: correct module description

* database: make GxHash the default checksum
  • Loading branch information
jwuensche authored Apr 16, 2024
1 parent 2ce688e commit 585b5fa
Show file tree
Hide file tree
Showing 10 changed files with 297 additions and 259 deletions.
2 changes: 2 additions & 0 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[build]
rustflags = ["-C","target-cpu=native"]
2 changes: 2 additions & 0 deletions betree/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ lfu_cache = { git = "https://github.com/parcio/lfu-cache", rev = "haura-v5" }
rand = { version = "0.8", features = ["std_rng"] }

pmdk = { path = "./pmdk", optional = true }
rustc-hash = "1.1.0"
gxhash = "3.1.1"

[dev-dependencies]
rand_xorshift = "0.3"
Expand Down
124 changes: 0 additions & 124 deletions betree/src/checksum.rs

This file was deleted.

67 changes: 67 additions & 0 deletions betree/src/checksum/fxhash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/// Impl Checksum with FxHash.
use super::{Builder, Checksum, ChecksumError, State};
use crate::size::StaticSize;
use rustc_hash::FxHasher;
use serde::{Deserialize, Serialize};
use std::hash::Hasher;

/// The rustc own hash impl originally from Firefox.
#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)]
pub struct FxHash(u64);

impl StaticSize for FxHash {
fn static_size() -> usize {
8
}
}

impl Checksum for FxHash {
type Builder = FxHashBuilder;

fn verify_buffer<I: IntoIterator<Item = T>, T: AsRef<[u8]>>(
&self,
data: I,
) -> Result<(), ChecksumError> {
let mut state = FxHashBuilder.build();
for x in data {
state.ingest(x.as_ref());
}
let other = state.finish();
if *self == other {
Ok(())
} else {
Err(ChecksumError)
}
}

fn builder() -> Self::Builder {
FxHashBuilder
}
}

/// The corresponding `Builder` for `FxHash`.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct FxHashBuilder;

impl Builder<FxHash> for FxHashBuilder {
type State = FxHashState;

fn build(&self) -> Self::State {
FxHashState(FxHasher::default())
}
}

/// The internal state of `FxHash`.
pub struct FxHashState(FxHasher);

impl State for FxHashState {
type Checksum = FxHash;

fn ingest(&mut self, data: &[u8]) {
self.0.write(data);
}

fn finish(self) -> Self::Checksum {
FxHash(self.0.finish())
}
}
69 changes: 69 additions & 0 deletions betree/src/checksum/gxhash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/// Impl Checksum with GxHash.
use super::{Builder, Checksum, ChecksumError, State};
use crate::size::StaticSize;
use gxhash::GxHasher;
use serde::{Deserialize, Serialize};
use std::hash::Hasher;

/// A checksum created by `GxHash`.
#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)]
pub struct GxHash(u64);

impl StaticSize for GxHash {
fn static_size() -> usize {
8
}
}

impl Checksum for GxHash {
type Builder = GxHashBuilder;

fn verify_buffer<I: IntoIterator<Item = T>, T: AsRef<[u8]>>(
&self,
data: I,
) -> Result<(), ChecksumError> {
let mut state = GxHashBuilder.build();
for x in data {
state.ingest(x.as_ref());
}
let other = state.finish();
if *self == other {
Ok(())
} else {
Err(ChecksumError)
}
}

fn builder() -> Self::Builder {
GxHashBuilder
}
}

/// The corresponding `Builder` for `GxHash`.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct GxHashBuilder;

impl Builder<GxHash> for GxHashBuilder {
type State = GxHashState;

fn build(&self) -> Self::State {
// Due to security concerns the default `GxHasher` is randomized, which
// does not work for us, therefore, use pinned seed.
GxHashState(GxHasher::with_seed(0))
}
}

/// The internal state of `GxHash`.
pub struct GxHashState(GxHasher);

impl State for GxHashState {
type Checksum = GxHash;

fn ingest(&mut self, data: &[u8]) {
self.0.write(data);
}

fn finish(self) -> Self::Checksum {
GxHash(self.0.finish())
}
}
76 changes: 76 additions & 0 deletions betree/src/checksum/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
//! This module provides a `Checksum` trait and implementors for verifying data
//! integrity.
use crate::size::Size;
use serde::{de::DeserializeOwned, Serialize};
use std::{error::Error, fmt, iter::once};

mod fxhash;
mod gxhash;
mod xxhash;

pub use self::gxhash::{GxHash, GxHashBuilder};
pub use fxhash::{FxHash, FxHashBuilder};
pub use xxhash::{XxHash, XxHashBuilder};

/// A checksum to verify data integrity.
pub trait Checksum:
Serialize + DeserializeOwned + Size + Clone + Send + Sync + fmt::Debug + 'static
{
/// Builds a new `Checksum`.
type Builder: Builder<Self>;

/// Verifies the contents of the given buffer which consists of multiple
/// `u8` slices.
fn verify_buffer<I: IntoIterator<Item = T>, T: AsRef<[u8]>>(
&self,
data: I,
) -> Result<(), ChecksumError>;

/// Verifies the contents of the given buffer.
fn verify(&self, data: &[u8]) -> Result<(), ChecksumError> {
self.verify_buffer(once(data))
}

/// Create a valid empty builder for this checksum type.
fn builder() -> Self::Builder;
}

/// A checksum builder
pub trait Builder<C: Checksum>:
Serialize + DeserializeOwned + Clone + Send + Sync + fmt::Debug + 'static
{
/// The internal state of the checksum.
type State: State<Checksum = C>;

/// Create a new state to build a checksum.
fn build(&self) -> Self::State;
}

/// Holds a state for building a new `Checksum`.
pub trait State {
/// The resulting `Checksum`.
type Checksum: Checksum;

/// Ingests the given data into the state.
fn ingest(&mut self, data: &[u8]);

/// Builds the actual `Checksum`.
fn finish(self) -> Self::Checksum;
}

/// This is the error that will be returned when a `Checksum` does not match.
#[derive(Debug)]
pub struct ChecksumError;

impl fmt::Display for ChecksumError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
writeln!(f, "Failed to verify the integrity")
}
}

impl Error for ChecksumError {
fn description(&self) -> &str {
"a checksum error occurred"
}
}
Loading

0 comments on commit 585b5fa

Please sign in to comment.