-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
checksum: add FxHash and GxHash based checksum (#58)
* checksum: add fxhash This is just to test out the performance compared to the xxhash we've been using until now. Early measurements with 4M blobs have shown that it could be worth experimenting with fxhash which is used in the rust compiler. * checksum: structure module * checksum: fix typo * storage_pool: remove obsolete in_memory module Noticed this while grepping for XxHash, seemed to have evaded the cleaning process some generations ago. * superblock: remove dependency on XxHash * checksum: add gxhash This commit required modifying the build context to allow for the AES optimizations of GxHash. It should not prove to be an issue on the system we use (x86-64 and maybe ARM64) which I've tested before this commit. * checksum: remove potential ambiguous module ref * betree: move .cargo config to workspace level * checksum: correct module description * database: make GxHash the default checksum
- Loading branch information
jwuensche
authored
Apr 16, 2024
1 parent
2ce688e
commit 585b5fa
Showing
10 changed files
with
297 additions
and
259 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[build] | ||
rustflags = ["-C","target-cpu=native"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
/// Impl Checksum with FxHash. | ||
use super::{Builder, Checksum, ChecksumError, State}; | ||
use crate::size::StaticSize; | ||
use rustc_hash::FxHasher; | ||
use serde::{Deserialize, Serialize}; | ||
use std::hash::Hasher; | ||
|
||
/// The rustc own hash impl originally from Firefox. | ||
#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] | ||
pub struct FxHash(u64); | ||
|
||
impl StaticSize for FxHash { | ||
fn static_size() -> usize { | ||
8 | ||
} | ||
} | ||
|
||
impl Checksum for FxHash { | ||
type Builder = FxHashBuilder; | ||
|
||
fn verify_buffer<I: IntoIterator<Item = T>, T: AsRef<[u8]>>( | ||
&self, | ||
data: I, | ||
) -> Result<(), ChecksumError> { | ||
let mut state = FxHashBuilder.build(); | ||
for x in data { | ||
state.ingest(x.as_ref()); | ||
} | ||
let other = state.finish(); | ||
if *self == other { | ||
Ok(()) | ||
} else { | ||
Err(ChecksumError) | ||
} | ||
} | ||
|
||
fn builder() -> Self::Builder { | ||
FxHashBuilder | ||
} | ||
} | ||
|
||
/// The corresponding `Builder` for `FxHash`. | ||
#[derive(Clone, Debug, Serialize, Deserialize)] | ||
pub struct FxHashBuilder; | ||
|
||
impl Builder<FxHash> for FxHashBuilder { | ||
type State = FxHashState; | ||
|
||
fn build(&self) -> Self::State { | ||
FxHashState(FxHasher::default()) | ||
} | ||
} | ||
|
||
/// The internal state of `FxHash`. | ||
pub struct FxHashState(FxHasher); | ||
|
||
impl State for FxHashState { | ||
type Checksum = FxHash; | ||
|
||
fn ingest(&mut self, data: &[u8]) { | ||
self.0.write(data); | ||
} | ||
|
||
fn finish(self) -> Self::Checksum { | ||
FxHash(self.0.finish()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/// Impl Checksum with GxHash. | ||
use super::{Builder, Checksum, ChecksumError, State}; | ||
use crate::size::StaticSize; | ||
use gxhash::GxHasher; | ||
use serde::{Deserialize, Serialize}; | ||
use std::hash::Hasher; | ||
|
||
/// A checksum created by `GxHash`. | ||
#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] | ||
pub struct GxHash(u64); | ||
|
||
impl StaticSize for GxHash { | ||
fn static_size() -> usize { | ||
8 | ||
} | ||
} | ||
|
||
impl Checksum for GxHash { | ||
type Builder = GxHashBuilder; | ||
|
||
fn verify_buffer<I: IntoIterator<Item = T>, T: AsRef<[u8]>>( | ||
&self, | ||
data: I, | ||
) -> Result<(), ChecksumError> { | ||
let mut state = GxHashBuilder.build(); | ||
for x in data { | ||
state.ingest(x.as_ref()); | ||
} | ||
let other = state.finish(); | ||
if *self == other { | ||
Ok(()) | ||
} else { | ||
Err(ChecksumError) | ||
} | ||
} | ||
|
||
fn builder() -> Self::Builder { | ||
GxHashBuilder | ||
} | ||
} | ||
|
||
/// The corresponding `Builder` for `GxHash`. | ||
#[derive(Clone, Debug, Serialize, Deserialize)] | ||
pub struct GxHashBuilder; | ||
|
||
impl Builder<GxHash> for GxHashBuilder { | ||
type State = GxHashState; | ||
|
||
fn build(&self) -> Self::State { | ||
// Due to security concerns the default `GxHasher` is randomized, which | ||
// does not work for us, therefore, use pinned seed. | ||
GxHashState(GxHasher::with_seed(0)) | ||
} | ||
} | ||
|
||
/// The internal state of `GxHash`. | ||
pub struct GxHashState(GxHasher); | ||
|
||
impl State for GxHashState { | ||
type Checksum = GxHash; | ||
|
||
fn ingest(&mut self, data: &[u8]) { | ||
self.0.write(data); | ||
} | ||
|
||
fn finish(self) -> Self::Checksum { | ||
GxHash(self.0.finish()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
//! This module provides a `Checksum` trait and implementors for verifying data | ||
//! integrity. | ||
use crate::size::Size; | ||
use serde::{de::DeserializeOwned, Serialize}; | ||
use std::{error::Error, fmt, iter::once}; | ||
|
||
mod fxhash; | ||
mod gxhash; | ||
mod xxhash; | ||
|
||
pub use self::gxhash::{GxHash, GxHashBuilder}; | ||
pub use fxhash::{FxHash, FxHashBuilder}; | ||
pub use xxhash::{XxHash, XxHashBuilder}; | ||
|
||
/// A checksum to verify data integrity. | ||
pub trait Checksum: | ||
Serialize + DeserializeOwned + Size + Clone + Send + Sync + fmt::Debug + 'static | ||
{ | ||
/// Builds a new `Checksum`. | ||
type Builder: Builder<Self>; | ||
|
||
/// Verifies the contents of the given buffer which consists of multiple | ||
/// `u8` slices. | ||
fn verify_buffer<I: IntoIterator<Item = T>, T: AsRef<[u8]>>( | ||
&self, | ||
data: I, | ||
) -> Result<(), ChecksumError>; | ||
|
||
/// Verifies the contents of the given buffer. | ||
fn verify(&self, data: &[u8]) -> Result<(), ChecksumError> { | ||
self.verify_buffer(once(data)) | ||
} | ||
|
||
/// Create a valid empty builder for this checksum type. | ||
fn builder() -> Self::Builder; | ||
} | ||
|
||
/// A checksum builder | ||
pub trait Builder<C: Checksum>: | ||
Serialize + DeserializeOwned + Clone + Send + Sync + fmt::Debug + 'static | ||
{ | ||
/// The internal state of the checksum. | ||
type State: State<Checksum = C>; | ||
|
||
/// Create a new state to build a checksum. | ||
fn build(&self) -> Self::State; | ||
} | ||
|
||
/// Holds a state for building a new `Checksum`. | ||
pub trait State { | ||
/// The resulting `Checksum`. | ||
type Checksum: Checksum; | ||
|
||
/// Ingests the given data into the state. | ||
fn ingest(&mut self, data: &[u8]); | ||
|
||
/// Builds the actual `Checksum`. | ||
fn finish(self) -> Self::Checksum; | ||
} | ||
|
||
/// This is the error that will be returned when a `Checksum` does not match. | ||
#[derive(Debug)] | ||
pub struct ChecksumError; | ||
|
||
impl fmt::Display for ChecksumError { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
writeln!(f, "Failed to verify the integrity") | ||
} | ||
} | ||
|
||
impl Error for ChecksumError { | ||
fn description(&self) -> &str { | ||
"a checksum error occurred" | ||
} | ||
} |
Oops, something went wrong.