From 585b5fadaa5d1447ea9ccc38859a15347d5cf402 Mon Sep 17 00:00:00 2001 From: jwuensche Date: Tue, 16 Apr 2024 15:25:31 +0200 Subject: [PATCH] checksum: add FxHash and GxHash based checksum (#58) * checksum: add fxhash This is just to test out the performance compared to the xxhash we've been using until now. Early measurements with 4M blobs have shown that it could be worth experimenting with fxhash which is used in the rust compiler. * checksum: structure module * checksum: fix typo * storage_pool: remove obsolete in_memory module Noticed this while grepping for XxHash, seemed to have evaded the cleaning process some generations ago. * superblock: remove dependency on XxHash * checksum: add gxhash This commit required modifying the build context to allow for the AES optimizations of GxHash. It should not prove to be an issue on the system we use (x86-64 and maybe ARM64) which I've tested before this commit. * checksum: remove potential ambiguous module ref * betree: move .cargo config to workspace level * checksum: correct module description * database: make GxHash the default checksum --- .cargo/config.toml | 2 + betree/Cargo.toml | 2 + betree/src/checksum.rs | 124 --------------------------- betree/src/checksum/fxhash.rs | 67 +++++++++++++++ betree/src/checksum/gxhash.rs | 69 +++++++++++++++ betree/src/checksum/mod.rs | 76 ++++++++++++++++ betree/src/checksum/xxhash.rs | 68 +++++++++++++++ betree/src/database/mod.rs | 12 +-- betree/src/database/superblock.rs | 12 +-- betree/src/storage_pool/in_memory.rs | 124 --------------------------- 10 files changed, 297 insertions(+), 259 deletions(-) create mode 100644 .cargo/config.toml delete mode 100644 betree/src/checksum.rs create mode 100644 betree/src/checksum/fxhash.rs create mode 100644 betree/src/checksum/gxhash.rs create mode 100644 betree/src/checksum/mod.rs create mode 100644 betree/src/checksum/xxhash.rs delete mode 100644 betree/src/storage_pool/in_memory.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 00000000..e6ac8df3 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["-C","target-cpu=native"] diff --git a/betree/Cargo.toml b/betree/Cargo.toml index e0b8a029..58806262 100644 --- a/betree/Cargo.toml +++ b/betree/Cargo.toml @@ -59,6 +59,8 @@ lfu_cache = { git = "https://github.com/parcio/lfu-cache", rev = "haura-v5" } rand = { version = "0.8", features = ["std_rng"] } pmdk = { path = "./pmdk", optional = true } +rustc-hash = "1.1.0" +gxhash = "3.1.1" [dev-dependencies] rand_xorshift = "0.3" diff --git a/betree/src/checksum.rs b/betree/src/checksum.rs deleted file mode 100644 index fadc9a4a..00000000 --- a/betree/src/checksum.rs +++ /dev/null @@ -1,124 +0,0 @@ -//! This module provides a `Checksum` trait for verifying data integrity. - -use crate::size::{Size, StaticSize}; -use serde::{de::DeserializeOwned, Deserialize, Serialize}; -use std::{error::Error, fmt, hash::Hasher, iter::once}; -use twox_hash; - -/// A checksum to verify data integrity. -pub trait Checksum: - Serialize + DeserializeOwned + Size + Clone + Send + Sync + fmt::Debug + 'static -{ - /// Builds a new `Checksum`. - type Builder: Builder; - - /// Verifies the contents of the given buffer which consists of multiple - /// `u8` slices. - fn verify_buffer, T: AsRef<[u8]>>( - &self, - data: I, - ) -> Result<(), ChecksumError>; - - /// Verifies the contents of the given buffer. - fn verify(&self, data: &[u8]) -> Result<(), ChecksumError> { - self.verify_buffer(once(data)) - } -} - -/// A checksum builder -pub trait Builder: - Serialize + DeserializeOwned + Clone + Send + Sync + fmt::Debug + 'static -{ - /// The internal state of the checksum. - type State: State; - - /// Create a new state to build a checksum. - fn build(&self) -> Self::State; -} - -/// Holds a state for building a new `Checksum`. -pub trait State { - /// The resulting `Checksum`. - type Checksum: Checksum; - - /// Ingests the given data into the state. - fn ingest(&mut self, data: &[u8]); - - /// Builds the actual `Checksum`. - fn finish(self) -> Self::Checksum; -} - -/// This is the error that will be returned when a `Checksum` does not match. -#[derive(Debug)] -pub struct ChecksumError; - -impl fmt::Display for ChecksumError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - writeln!(f, "Failed to verify the integrity") - } -} - -impl Error for ChecksumError { - fn description(&self) -> &str { - "a checksum error occurred" - } -} - -/// `XxHash` contains a digest of `xxHash` -/// which is an "extremely fast non-cryptographic hash algorithm" -/// () -#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] -pub struct XxHash(u64); - -impl StaticSize for XxHash { - fn static_size() -> usize { - 8 - } -} - -impl Checksum for XxHash { - type Builder = XxHashBuilder; - - fn verify_buffer, T: AsRef<[u8]>>( - &self, - data: I, - ) -> Result<(), ChecksumError> { - let mut state = XxHashBuilder.build(); - for x in data { - state.ingest(x.as_ref()); - } - let other = state.finish(); - if *self == other { - Ok(()) - } else { - Err(ChecksumError) - } - } -} - -/// The corresponding `Builder` for `XxHash`. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct XxHashBuilder; - -impl Builder for XxHashBuilder { - type State = XxHashState; - - fn build(&self) -> Self::State { - XxHashState(twox_hash::XxHash::with_seed(0)) - } -} - -/// The internal state of `XxHash`. -pub struct XxHashState(twox_hash::XxHash); - -impl State for XxHashState { - type Checksum = XxHash; - - fn ingest(&mut self, data: &[u8]) { - self.0.write(data); - } - - fn finish(self) -> Self::Checksum { - XxHash(self.0.finish()) - } -} diff --git a/betree/src/checksum/fxhash.rs b/betree/src/checksum/fxhash.rs new file mode 100644 index 00000000..3837d947 --- /dev/null +++ b/betree/src/checksum/fxhash.rs @@ -0,0 +1,67 @@ +/// Impl Checksum with FxHash. +use super::{Builder, Checksum, ChecksumError, State}; +use crate::size::StaticSize; +use rustc_hash::FxHasher; +use serde::{Deserialize, Serialize}; +use std::hash::Hasher; + +/// The rustc own hash impl originally from Firefox. +#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] +pub struct FxHash(u64); + +impl StaticSize for FxHash { + fn static_size() -> usize { + 8 + } +} + +impl Checksum for FxHash { + type Builder = FxHashBuilder; + + fn verify_buffer, T: AsRef<[u8]>>( + &self, + data: I, + ) -> Result<(), ChecksumError> { + let mut state = FxHashBuilder.build(); + for x in data { + state.ingest(x.as_ref()); + } + let other = state.finish(); + if *self == other { + Ok(()) + } else { + Err(ChecksumError) + } + } + + fn builder() -> Self::Builder { + FxHashBuilder + } +} + +/// The corresponding `Builder` for `FxHash`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FxHashBuilder; + +impl Builder for FxHashBuilder { + type State = FxHashState; + + fn build(&self) -> Self::State { + FxHashState(FxHasher::default()) + } +} + +/// The internal state of `FxHash`. +pub struct FxHashState(FxHasher); + +impl State for FxHashState { + type Checksum = FxHash; + + fn ingest(&mut self, data: &[u8]) { + self.0.write(data); + } + + fn finish(self) -> Self::Checksum { + FxHash(self.0.finish()) + } +} diff --git a/betree/src/checksum/gxhash.rs b/betree/src/checksum/gxhash.rs new file mode 100644 index 00000000..f0ce0a1c --- /dev/null +++ b/betree/src/checksum/gxhash.rs @@ -0,0 +1,69 @@ +/// Impl Checksum with GxHash. +use super::{Builder, Checksum, ChecksumError, State}; +use crate::size::StaticSize; +use gxhash::GxHasher; +use serde::{Deserialize, Serialize}; +use std::hash::Hasher; + +/// A checksum created by `GxHash`. +#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] +pub struct GxHash(u64); + +impl StaticSize for GxHash { + fn static_size() -> usize { + 8 + } +} + +impl Checksum for GxHash { + type Builder = GxHashBuilder; + + fn verify_buffer, T: AsRef<[u8]>>( + &self, + data: I, + ) -> Result<(), ChecksumError> { + let mut state = GxHashBuilder.build(); + for x in data { + state.ingest(x.as_ref()); + } + let other = state.finish(); + if *self == other { + Ok(()) + } else { + Err(ChecksumError) + } + } + + fn builder() -> Self::Builder { + GxHashBuilder + } +} + +/// The corresponding `Builder` for `GxHash`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct GxHashBuilder; + +impl Builder for GxHashBuilder { + type State = GxHashState; + + fn build(&self) -> Self::State { + // Due to security concerns the default `GxHasher` is randomized, which + // does not work for us, therefore, use pinned seed. + GxHashState(GxHasher::with_seed(0)) + } +} + +/// The internal state of `GxHash`. +pub struct GxHashState(GxHasher); + +impl State for GxHashState { + type Checksum = GxHash; + + fn ingest(&mut self, data: &[u8]) { + self.0.write(data); + } + + fn finish(self) -> Self::Checksum { + GxHash(self.0.finish()) + } +} diff --git a/betree/src/checksum/mod.rs b/betree/src/checksum/mod.rs new file mode 100644 index 00000000..755cc2ea --- /dev/null +++ b/betree/src/checksum/mod.rs @@ -0,0 +1,76 @@ +//! This module provides a `Checksum` trait and implementors for verifying data +//! integrity. + +use crate::size::Size; +use serde::{de::DeserializeOwned, Serialize}; +use std::{error::Error, fmt, iter::once}; + +mod fxhash; +mod gxhash; +mod xxhash; + +pub use self::gxhash::{GxHash, GxHashBuilder}; +pub use fxhash::{FxHash, FxHashBuilder}; +pub use xxhash::{XxHash, XxHashBuilder}; + +/// A checksum to verify data integrity. +pub trait Checksum: + Serialize + DeserializeOwned + Size + Clone + Send + Sync + fmt::Debug + 'static +{ + /// Builds a new `Checksum`. + type Builder: Builder; + + /// Verifies the contents of the given buffer which consists of multiple + /// `u8` slices. + fn verify_buffer, T: AsRef<[u8]>>( + &self, + data: I, + ) -> Result<(), ChecksumError>; + + /// Verifies the contents of the given buffer. + fn verify(&self, data: &[u8]) -> Result<(), ChecksumError> { + self.verify_buffer(once(data)) + } + + /// Create a valid empty builder for this checksum type. + fn builder() -> Self::Builder; +} + +/// A checksum builder +pub trait Builder: + Serialize + DeserializeOwned + Clone + Send + Sync + fmt::Debug + 'static +{ + /// The internal state of the checksum. + type State: State; + + /// Create a new state to build a checksum. + fn build(&self) -> Self::State; +} + +/// Holds a state for building a new `Checksum`. +pub trait State { + /// The resulting `Checksum`. + type Checksum: Checksum; + + /// Ingests the given data into the state. + fn ingest(&mut self, data: &[u8]); + + /// Builds the actual `Checksum`. + fn finish(self) -> Self::Checksum; +} + +/// This is the error that will be returned when a `Checksum` does not match. +#[derive(Debug)] +pub struct ChecksumError; + +impl fmt::Display for ChecksumError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + writeln!(f, "Failed to verify the integrity") + } +} + +impl Error for ChecksumError { + fn description(&self) -> &str { + "a checksum error occurred" + } +} diff --git a/betree/src/checksum/xxhash.rs b/betree/src/checksum/xxhash.rs new file mode 100644 index 00000000..839c0795 --- /dev/null +++ b/betree/src/checksum/xxhash.rs @@ -0,0 +1,68 @@ +/// `XxHash` contains a digest of `xxHash` +/// which is an "extremely fast non-cryptographic hash algorithm" +/// () +use super::{Builder, Checksum, ChecksumError, State}; +use crate::size::StaticSize; +use serde::{Deserialize, Serialize}; +use std::hash::Hasher; + +/// A checksum created by `XxHash`. +#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] +pub struct XxHash(u64); + +impl StaticSize for XxHash { + fn static_size() -> usize { + 8 + } +} + +impl Checksum for XxHash { + type Builder = XxHashBuilder; + + fn verify_buffer, T: AsRef<[u8]>>( + &self, + data: I, + ) -> Result<(), ChecksumError> { + let mut state = XxHashBuilder.build(); + for x in data { + state.ingest(x.as_ref()); + } + let other = state.finish(); + if *self == other { + Ok(()) + } else { + Err(ChecksumError) + } + } + + fn builder() -> Self::Builder { + XxHashBuilder + } +} + +/// The corresponding `Builder` for `XxHash`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct XxHashBuilder; + +impl Builder for XxHashBuilder { + type State = XxHashState; + + fn build(&self) -> Self::State { + XxHashState(twox_hash::XxHash::with_seed(0)) + } +} + +/// The internal state of `XxHash`. +pub struct XxHashState(twox_hash::XxHash); + +impl State for XxHashState { + type Checksum = XxHash; + + fn ingest(&mut self, data: &[u8]) { + self.0.write(data); + } + + fn finish(self) -> Self::Checksum { + XxHash(self.0.finish()) + } +} diff --git a/betree/src/database/mod.rs b/betree/src/database/mod.rs index 50ae4f0c..399d4c8c 100644 --- a/betree/src/database/mod.rs +++ b/betree/src/database/mod.rs @@ -2,7 +2,7 @@ use crate::{ atomic_option::AtomicOption, cache::ClockCache, - checksum::{XxHash, XxHashBuilder}, + checksum::GxHash, compression::CompressionConfiguration, cow_bytes::SlicedCowBytes, data_management::{ @@ -67,14 +67,16 @@ const ROOT_TREE_STORAGE_PREFERENCE: StoragePreference = StoragePreference::FASTE const DEFAULT_CACHE_SIZE: usize = 256 * 1024 * 1024; const DEFAULT_SYNC_INTERVAL_MS: u64 = 1000; -type Checksum = XxHash; +// This is the hash used overall in the entire database. For reconfiguration +// recompilation is necessary and this type changed. +type Checksum = GxHash; type ObjectPointer = data_management::ObjectPointer; pub(crate) type ObjectRef = data_management::impls::ObjRef; pub(crate) type Object = Node; type DbHandler = Handler; -pub(crate) type RootSpu = StoragePoolUnit; +pub(crate) type RootSpu = StoragePoolUnit; pub(crate) type RootDmu = Dmu< ClockCache< data_management::impls::ObjectKey, @@ -179,7 +181,7 @@ impl DatabaseConfiguration { impl DatabaseConfiguration { pub fn new_spu(&self) -> Result { - Ok(StoragePoolUnit::::new(&self.storage)?) + Ok(StoragePoolUnit::::new(&self.storage)?) } pub fn new_handler(&self, spu: &RootSpu) -> DbHandler { @@ -229,7 +231,7 @@ impl DatabaseConfiguration { Dmu::new( self.compression.to_builder(), - XxHashBuilder, + ::builder(), self.default_storage_class, spu, strategy, diff --git a/betree/src/database/superblock.rs b/betree/src/database/superblock.rs index 3fc8ea73..8adab350 100644 --- a/betree/src/database/superblock.rs +++ b/betree/src/database/superblock.rs @@ -1,7 +1,7 @@ -use super::{errors::*, StorageInfo}; +use super::{errors::*, Checksum as DbChecksum, StorageInfo}; use crate::{ buffer::{Buf, BufWrite}, - checksum::{Builder, State, XxHash, XxHashBuilder}, + checksum::{Builder, Checksum, State}, size::StaticSize, storage_pool::{StoragePoolLayer, NUM_STORAGE_CLASSES}, vdev::{Block, BLOCK_SIZE}, @@ -21,8 +21,8 @@ pub struct Superblock

{ pub(crate) tiers: [StorageInfo; NUM_STORAGE_CLASSES], } -fn checksum(b: &[u8]) -> XxHash { - let mut state = XxHashBuilder.build(); +fn checksum(b: &[u8]) -> DbChecksum { + let mut state = DbChecksum::builder().build(); state.ingest(b); state.finish() } @@ -34,7 +34,7 @@ impl Superblock

{ /// this sequence is explicitly not part of the stability guarantees), /// or the contained checksum doesn't match the actual checksum of the superblock. pub fn unpack(b: &[u8]) -> Result> { - let checksum_size = XxHash::static_size(); + let checksum_size = DbChecksum::static_size(); let correct_checksum = checksum(&b[..b.len() - checksum_size]); let actual_checksum = deserialize(&b[b.len() - checksum_size..])?; if correct_checksum != actual_checksum { @@ -100,7 +100,7 @@ impl Superblock

{ this.magic.copy_from_slice(MAGIC); serialize_into(&mut data, &this)?; } - let checksum_size = XxHash::static_size(); + let checksum_size = DbChecksum::static_size(); data.seek(io::SeekFrom::End(-i64::from(checksum_size as u32)))?; let checksum = checksum(&data.as_ref()[..BLOCK_SIZE - checksum_size]); serialize_into(&mut data, &checksum)?; diff --git a/betree/src/storage_pool/in_memory.rs b/betree/src/storage_pool/in_memory.rs deleted file mode 100644 index 17bd9470..00000000 --- a/betree/src/storage_pool/in_memory.rs +++ /dev/null @@ -1,124 +0,0 @@ -use crate::{ - checksum::{Checksum, XxHash}, - storage_pool::{DiskOffset, StoragePoolLayer}, - vdev::{Block, Error as VdevError}, -}; -use futures::{executor::block_on, prelude::*}; - -use std::{ - io, - pin::Pin, - sync::{Arc, Mutex}, -}; - -#[derive(Clone)] -pub struct InMemory { - data: Arc>>, -} - -impl StoragePoolLayer for InMemory { - type Checksum = XxHash; - type Configuration = u64; - - fn new(configuration: &Self::Configuration) -> Result { - Ok(InMemory { - data: Arc::new(Mutex::new(vec![0; *configuration as usize])), - }) - } - - /// Reads `size` blocks from the given `offset`. - fn read( - &self, - size: Block, - offset: DiskOffset, - checksum: Self::Checksum, - ) -> Result, VdevError> { - block_on(self.read_async(size, offset, checksum)?.into_future()) - } - - /// Future returned by `read_async`. - type ReadAsync = Pin, VdevError>> + Send>>; - - /// Reads `size` blocks asynchronously from the given `offset`. - fn read_async( - &self, - size: Block, - offset: DiskOffset, - checksum: Self::Checksum, - ) -> Result { - Ok(Box::pin(future::ok({ - if offset.disk_id() != 0 { - Vec::new().into_boxed_slice() - } else { - let offset = offset.block_offset().to_bytes() as usize; - self.data.lock().unwrap()[offset..offset + size.to_bytes() as usize] - .to_vec() - .into_boxed_slice() - } - }))) - } - - /// Issues a write request that might happen in the background. - fn begin_write(&self, data: Box<[u8]>, offset: DiskOffset) -> Result<(), VdevError> { - if offset.disk_id() != 0 { - return Ok(()); - } - self.write_raw(data, offset.block_offset()) - } - - /// Writes the given `data` at `offset` for every `LeafVdev`. - fn write_raw(&self, data: Box<[u8]>, offset: Block) -> Result<(), VdevError> { - let offset = offset.to_bytes() as usize; - self.data.lock().unwrap()[offset..offset + data.len()].copy_from_slice(&data); - Ok(()) - } - - /// Reads `size` blocks from the given `offset` for every `LeafVdev`. - fn read_raw(&self, size: Block, offset: Block) -> Vec> { - let data = self.data.lock().unwrap(); - let offset = offset.to_bytes() as usize; - let range = offset..offset + size.to_bytes() as usize; - - vec![data[range].to_vec().into_boxed_slice()] - } - - /// Returns the actual size of a data block for a specific `Vdev` - /// which may be larger due to parity data. - fn actual_size(&self, disk_id: u16, size: Block) -> Block { - size - } - - /// Returns the size for a specific `Vdev`. - fn size_in_blocks(&self, disk_id: u16) -> Block { - Block::from_bytes(self.data.lock().unwrap().len() as u64) - } - - /// Return the number of leaf vdevs for a specific `Vdev`. - fn num_disks(&self, disk_id: u16) -> usize { - if disk_id == 0 { - 1 - } else { - 0 - } - } - - /// Returns the effective free size for a specific `Vdev`. - fn effective_free_size(&self, disk_id: u16, free_size: Block) -> Block { - // NOTE: Is this correct? - if disk_id == 0 { - self.size_in_blocks(0) - } else { - Block::from_bytes(0) - } - } - - /// Returns the number of `Vdev`s. - fn disk_count(&self) -> u16 { - 1 - } - - /// Flushes the write-back queue and the underlying storage backend. - fn flush(&self) -> Result<(), VdevError> { - Ok(()) - } -}