From f2098e2565d0a1495c5bdea700b3d424be1c8c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20W=C3=BCnsche?= Date: Thu, 11 Apr 2024 17:30:46 +0200 Subject: [PATCH 01/10] checksum: add fxhash This is just to test out the performance compared to the xxhash we've been using until now. Early measurements with 4M blobs have shown that it could be worth experimenting with fxhash which is used in the rust compiler. --- betree/src/checksum.rs | 70 ++++++++++++++++++++++++++++++++++++++ betree/src/database/mod.rs | 10 +++--- 2 files changed, 76 insertions(+), 4 deletions(-) diff --git a/betree/src/checksum.rs b/betree/src/checksum.rs index fadc9a4a..ee194c03 100644 --- a/betree/src/checksum.rs +++ b/betree/src/checksum.rs @@ -23,6 +23,9 @@ pub trait Checksum: fn verify(&self, data: &[u8]) -> Result<(), ChecksumError> { self.verify_buffer(once(data)) } + + /// Create a valid empty builder for this checksum type. + fn builder() -> Self::Builder; } /// A checksum builder @@ -94,6 +97,10 @@ impl Checksum for XxHash { Err(ChecksumError) } } + + fn builder() -> Self::Builder { + XxHashBuilder + } } /// The corresponding `Builder` for `XxHash`. @@ -122,3 +129,66 @@ impl State for XxHashState { XxHash(self.0.finish()) } } + +/// The rustc own hash impl originally from Firefox. +#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] +pub struct FxHash(u64); + +impl StaticSize for FxHash { + fn static_size() -> usize { + 8 + } +} + +impl Checksum for FxHash { + type Builder = FxHashBuilder; + + fn verify_buffer, T: AsRef<[u8]>>( + &self, + data: I, + ) -> Result<(), ChecksumError> { + let mut state = FxHashBuilder.build(); + for x in data { + state.ingest(x.as_ref()); + } + let other = state.finish(); + if *self == other { + Ok(()) + } else { + Err(ChecksumError) + } + } + + fn builder() -> Self::Builder { + FxHashBuilder + } +} + +/// The corresponding `Builder` for `XxHash`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FxHashBuilder; + +impl Builder for FxHashBuilder { + type State = FxHashState; + + fn build(&self) -> Self::State { + FxHashState(FxHasher::default()) + } +} + +use rustc_hash::FxHasher; + +/// The internal state of `XxHash`. +pub struct FxHashState(FxHasher); + +impl State for FxHashState { + type Checksum = FxHash; + + fn ingest(&mut self, data: &[u8]) { + self.0.write(data); + } + + fn finish(self) -> Self::Checksum { + FxHash(self.0.finish()) + } +} diff --git a/betree/src/database/mod.rs b/betree/src/database/mod.rs index 50ae4f0c..02fa4fe5 100644 --- a/betree/src/database/mod.rs +++ b/betree/src/database/mod.rs @@ -2,7 +2,7 @@ use crate::{ atomic_option::AtomicOption, cache::ClockCache, - checksum::{XxHash, XxHashBuilder}, + checksum::{FxHash, FxHashBuilder, XxHash, XxHashBuilder}, compression::CompressionConfiguration, cow_bytes::SlicedCowBytes, data_management::{ @@ -67,6 +67,8 @@ const ROOT_TREE_STORAGE_PREFERENCE: StoragePreference = StoragePreference::FASTE const DEFAULT_CACHE_SIZE: usize = 256 * 1024 * 1024; const DEFAULT_SYNC_INTERVAL_MS: u64 = 1000; +// This is the hash used overall in the entire database. For reconfiguration +// recompilation is necessary and this type changed. type Checksum = XxHash; type ObjectPointer = data_management::ObjectPointer; @@ -74,7 +76,7 @@ pub(crate) type ObjectRef = data_management::impls::ObjRef; pub(crate) type Object = Node; type DbHandler = Handler; -pub(crate) type RootSpu = StoragePoolUnit; +pub(crate) type RootSpu = StoragePoolUnit; pub(crate) type RootDmu = Dmu< ClockCache< data_management::impls::ObjectKey, @@ -179,7 +181,7 @@ impl DatabaseConfiguration { impl DatabaseConfiguration { pub fn new_spu(&self) -> Result { - Ok(StoragePoolUnit::::new(&self.storage)?) + Ok(StoragePoolUnit::::new(&self.storage)?) } pub fn new_handler(&self, spu: &RootSpu) -> DbHandler { @@ -229,7 +231,7 @@ impl DatabaseConfiguration { Dmu::new( self.compression.to_builder(), - XxHashBuilder, + ::builder(), self.default_storage_class, spu, strategy, From eef9c7d24214c323f46b2a95731a0e0efcad8347 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20W=C3=BCnsche?= Date: Fri, 12 Apr 2024 12:13:22 +0200 Subject: [PATCH 02/10] checksum: structure module --- betree/Cargo.toml | 1 + betree/src/checksum.rs | 194 ---------------------------------- betree/src/checksum/fxhash.rs | 67 ++++++++++++ betree/src/checksum/mod.rs | 73 +++++++++++++ betree/src/checksum/xxhash.rs | 67 ++++++++++++ 5 files changed, 208 insertions(+), 194 deletions(-) delete mode 100644 betree/src/checksum.rs create mode 100644 betree/src/checksum/fxhash.rs create mode 100644 betree/src/checksum/mod.rs create mode 100644 betree/src/checksum/xxhash.rs diff --git a/betree/Cargo.toml b/betree/Cargo.toml index e0b8a029..63507ced 100644 --- a/betree/Cargo.toml +++ b/betree/Cargo.toml @@ -59,6 +59,7 @@ lfu_cache = { git = "https://github.com/parcio/lfu-cache", rev = "haura-v5" } rand = { version = "0.8", features = ["std_rng"] } pmdk = { path = "./pmdk", optional = true } +rustc-hash = "1.1.0" [dev-dependencies] rand_xorshift = "0.3" diff --git a/betree/src/checksum.rs b/betree/src/checksum.rs deleted file mode 100644 index ee194c03..00000000 --- a/betree/src/checksum.rs +++ /dev/null @@ -1,194 +0,0 @@ -//! This module provides a `Checksum` trait for verifying data integrity. - -use crate::size::{Size, StaticSize}; -use serde::{de::DeserializeOwned, Deserialize, Serialize}; -use std::{error::Error, fmt, hash::Hasher, iter::once}; -use twox_hash; - -/// A checksum to verify data integrity. -pub trait Checksum: - Serialize + DeserializeOwned + Size + Clone + Send + Sync + fmt::Debug + 'static -{ - /// Builds a new `Checksum`. - type Builder: Builder; - - /// Verifies the contents of the given buffer which consists of multiple - /// `u8` slices. - fn verify_buffer, T: AsRef<[u8]>>( - &self, - data: I, - ) -> Result<(), ChecksumError>; - - /// Verifies the contents of the given buffer. - fn verify(&self, data: &[u8]) -> Result<(), ChecksumError> { - self.verify_buffer(once(data)) - } - - /// Create a valid empty builder for this checksum type. - fn builder() -> Self::Builder; -} - -/// A checksum builder -pub trait Builder: - Serialize + DeserializeOwned + Clone + Send + Sync + fmt::Debug + 'static -{ - /// The internal state of the checksum. - type State: State; - - /// Create a new state to build a checksum. - fn build(&self) -> Self::State; -} - -/// Holds a state for building a new `Checksum`. -pub trait State { - /// The resulting `Checksum`. - type Checksum: Checksum; - - /// Ingests the given data into the state. - fn ingest(&mut self, data: &[u8]); - - /// Builds the actual `Checksum`. - fn finish(self) -> Self::Checksum; -} - -/// This is the error that will be returned when a `Checksum` does not match. -#[derive(Debug)] -pub struct ChecksumError; - -impl fmt::Display for ChecksumError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - writeln!(f, "Failed to verify the integrity") - } -} - -impl Error for ChecksumError { - fn description(&self) -> &str { - "a checksum error occurred" - } -} - -/// `XxHash` contains a digest of `xxHash` -/// which is an "extremely fast non-cryptographic hash algorithm" -/// () -#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] -pub struct XxHash(u64); - -impl StaticSize for XxHash { - fn static_size() -> usize { - 8 - } -} - -impl Checksum for XxHash { - type Builder = XxHashBuilder; - - fn verify_buffer, T: AsRef<[u8]>>( - &self, - data: I, - ) -> Result<(), ChecksumError> { - let mut state = XxHashBuilder.build(); - for x in data { - state.ingest(x.as_ref()); - } - let other = state.finish(); - if *self == other { - Ok(()) - } else { - Err(ChecksumError) - } - } - - fn builder() -> Self::Builder { - XxHashBuilder - } -} - -/// The corresponding `Builder` for `XxHash`. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct XxHashBuilder; - -impl Builder for XxHashBuilder { - type State = XxHashState; - - fn build(&self) -> Self::State { - XxHashState(twox_hash::XxHash::with_seed(0)) - } -} - -/// The internal state of `XxHash`. -pub struct XxHashState(twox_hash::XxHash); - -impl State for XxHashState { - type Checksum = XxHash; - - fn ingest(&mut self, data: &[u8]) { - self.0.write(data); - } - - fn finish(self) -> Self::Checksum { - XxHash(self.0.finish()) - } -} - -/// The rustc own hash impl originally from Firefox. -#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] -pub struct FxHash(u64); - -impl StaticSize for FxHash { - fn static_size() -> usize { - 8 - } -} - -impl Checksum for FxHash { - type Builder = FxHashBuilder; - - fn verify_buffer, T: AsRef<[u8]>>( - &self, - data: I, - ) -> Result<(), ChecksumError> { - let mut state = FxHashBuilder.build(); - for x in data { - state.ingest(x.as_ref()); - } - let other = state.finish(); - if *self == other { - Ok(()) - } else { - Err(ChecksumError) - } - } - - fn builder() -> Self::Builder { - FxHashBuilder - } -} - -/// The corresponding `Builder` for `XxHash`. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct FxHashBuilder; - -impl Builder for FxHashBuilder { - type State = FxHashState; - - fn build(&self) -> Self::State { - FxHashState(FxHasher::default()) - } -} - -use rustc_hash::FxHasher; - -/// The internal state of `XxHash`. -pub struct FxHashState(FxHasher); - -impl State for FxHashState { - type Checksum = FxHash; - - fn ingest(&mut self, data: &[u8]) { - self.0.write(data); - } - - fn finish(self) -> Self::Checksum { - FxHash(self.0.finish()) - } -} diff --git a/betree/src/checksum/fxhash.rs b/betree/src/checksum/fxhash.rs new file mode 100644 index 00000000..dd56c985 --- /dev/null +++ b/betree/src/checksum/fxhash.rs @@ -0,0 +1,67 @@ +/// Impl for Checksum for FxHash. +use super::{Builder, Checksum, ChecksumError, State}; +use crate::size::StaticSize; +use rustc_hash::FxHasher; +use serde::{Deserialize, Serialize}; +use std::hash::Hasher; + +/// The rustc own hash impl originally from Firefox. +#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] +pub struct FxHash(u64); + +impl StaticSize for FxHash { + fn static_size() -> usize { + 8 + } +} + +impl Checksum for FxHash { + type Builder = FxHashBuilder; + + fn verify_buffer, T: AsRef<[u8]>>( + &self, + data: I, + ) -> Result<(), ChecksumError> { + let mut state = FxHashBuilder.build(); + for x in data { + state.ingest(x.as_ref()); + } + let other = state.finish(); + if *self == other { + Ok(()) + } else { + Err(ChecksumError) + } + } + + fn builder() -> Self::Builder { + FxHashBuilder + } +} + +/// The corresponding `Builder` for `FxHash`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FxHashBuilder; + +impl Builder for FxHashBuilder { + type State = FxHashState; + + fn build(&self) -> Self::State { + FxHashState(FxHasher::default()) + } +} + +/// The internal state of `XxHash`. +pub struct FxHashState(FxHasher); + +impl State for FxHashState { + type Checksum = FxHash; + + fn ingest(&mut self, data: &[u8]) { + self.0.write(data); + } + + fn finish(self) -> Self::Checksum { + FxHash(self.0.finish()) + } +} diff --git a/betree/src/checksum/mod.rs b/betree/src/checksum/mod.rs new file mode 100644 index 00000000..c4d5c0c4 --- /dev/null +++ b/betree/src/checksum/mod.rs @@ -0,0 +1,73 @@ +//! This module provides a `Checksum` trait for verifying data integrity. + +use crate::size::Size; +use serde::{de::DeserializeOwned, Serialize}; +use std::{error::Error, fmt, iter::once}; + +mod fxhash; +mod xxhash; + +pub use fxhash::{FxHash, FxHashBuilder}; +pub use xxhash::{XxHash, XxHashBuilder}; + +/// A checksum to verify data integrity. +pub trait Checksum: + Serialize + DeserializeOwned + Size + Clone + Send + Sync + fmt::Debug + 'static +{ + /// Builds a new `Checksum`. + type Builder: Builder; + + /// Verifies the contents of the given buffer which consists of multiple + /// `u8` slices. + fn verify_buffer, T: AsRef<[u8]>>( + &self, + data: I, + ) -> Result<(), ChecksumError>; + + /// Verifies the contents of the given buffer. + fn verify(&self, data: &[u8]) -> Result<(), ChecksumError> { + self.verify_buffer(once(data)) + } + + /// Create a valid empty builder for this checksum type. + fn builder() -> Self::Builder; +} + +/// A checksum builder +pub trait Builder: + Serialize + DeserializeOwned + Clone + Send + Sync + fmt::Debug + 'static +{ + /// The internal state of the checksum. + type State: State; + + /// Create a new state to build a checksum. + fn build(&self) -> Self::State; +} + +/// Holds a state for building a new `Checksum`. +pub trait State { + /// The resulting `Checksum`. + type Checksum: Checksum; + + /// Ingests the given data into the state. + fn ingest(&mut self, data: &[u8]); + + /// Builds the actual `Checksum`. + fn finish(self) -> Self::Checksum; +} + +/// This is the error that will be returned when a `Checksum` does not match. +#[derive(Debug)] +pub struct ChecksumError; + +impl fmt::Display for ChecksumError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + writeln!(f, "Failed to verify the integrity") + } +} + +impl Error for ChecksumError { + fn description(&self) -> &str { + "a checksum error occurred" + } +} diff --git a/betree/src/checksum/xxhash.rs b/betree/src/checksum/xxhash.rs new file mode 100644 index 00000000..de0c0afd --- /dev/null +++ b/betree/src/checksum/xxhash.rs @@ -0,0 +1,67 @@ +/// `XxHash` contains a digest of `xxHash` +/// which is an "extremely fast non-cryptographic hash algorithm" +/// () +use super::{Builder, Checksum, ChecksumError, State}; +use crate::size::StaticSize; +use serde::{Deserialize, Serialize}; +use std::hash::Hasher; + +#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] +pub struct XxHash(u64); + +impl StaticSize for XxHash { + fn static_size() -> usize { + 8 + } +} + +impl Checksum for XxHash { + type Builder = XxHashBuilder; + + fn verify_buffer, T: AsRef<[u8]>>( + &self, + data: I, + ) -> Result<(), ChecksumError> { + let mut state = XxHashBuilder.build(); + for x in data { + state.ingest(x.as_ref()); + } + let other = state.finish(); + if *self == other { + Ok(()) + } else { + Err(ChecksumError) + } + } + + fn builder() -> Self::Builder { + XxHashBuilder + } +} + +/// The corresponding `Builder` for `XxHash`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct XxHashBuilder; + +impl Builder for XxHashBuilder { + type State = XxHashState; + + fn build(&self) -> Self::State { + XxHashState(twox_hash::XxHash::with_seed(0)) + } +} + +/// The internal state of `XxHash`. +pub struct XxHashState(twox_hash::XxHash); + +impl State for XxHashState { + type Checksum = XxHash; + + fn ingest(&mut self, data: &[u8]) { + self.0.write(data); + } + + fn finish(self) -> Self::Checksum { + XxHash(self.0.finish()) + } +} From 61eefd9a40b05d21ca10a441a75e2b3f6c63f9de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20W=C3=BCnsche?= Date: Fri, 12 Apr 2024 16:56:27 +0200 Subject: [PATCH 03/10] checksum: fix typo --- betree/src/checksum/fxhash.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/betree/src/checksum/fxhash.rs b/betree/src/checksum/fxhash.rs index dd56c985..f70bbe1e 100644 --- a/betree/src/checksum/fxhash.rs +++ b/betree/src/checksum/fxhash.rs @@ -51,7 +51,7 @@ impl Builder for FxHashBuilder { } } -/// The internal state of `XxHash`. +/// The internal state of `FxHash`. pub struct FxHashState(FxHasher); impl State for FxHashState { From 0b27542c7e80de58b612dfbad444925b1424cf1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20W=C3=BCnsche?= Date: Fri, 12 Apr 2024 17:08:26 +0200 Subject: [PATCH 04/10] storage_pool: remove obsolete in_memory module Noticed this while grepping for XxHash, seemed to have evaded the cleaning process some generations ago. --- betree/src/storage_pool/in_memory.rs | 124 --------------------------- 1 file changed, 124 deletions(-) delete mode 100644 betree/src/storage_pool/in_memory.rs diff --git a/betree/src/storage_pool/in_memory.rs b/betree/src/storage_pool/in_memory.rs deleted file mode 100644 index 17bd9470..00000000 --- a/betree/src/storage_pool/in_memory.rs +++ /dev/null @@ -1,124 +0,0 @@ -use crate::{ - checksum::{Checksum, XxHash}, - storage_pool::{DiskOffset, StoragePoolLayer}, - vdev::{Block, Error as VdevError}, -}; -use futures::{executor::block_on, prelude::*}; - -use std::{ - io, - pin::Pin, - sync::{Arc, Mutex}, -}; - -#[derive(Clone)] -pub struct InMemory { - data: Arc>>, -} - -impl StoragePoolLayer for InMemory { - type Checksum = XxHash; - type Configuration = u64; - - fn new(configuration: &Self::Configuration) -> Result { - Ok(InMemory { - data: Arc::new(Mutex::new(vec![0; *configuration as usize])), - }) - } - - /// Reads `size` blocks from the given `offset`. - fn read( - &self, - size: Block, - offset: DiskOffset, - checksum: Self::Checksum, - ) -> Result, VdevError> { - block_on(self.read_async(size, offset, checksum)?.into_future()) - } - - /// Future returned by `read_async`. - type ReadAsync = Pin, VdevError>> + Send>>; - - /// Reads `size` blocks asynchronously from the given `offset`. - fn read_async( - &self, - size: Block, - offset: DiskOffset, - checksum: Self::Checksum, - ) -> Result { - Ok(Box::pin(future::ok({ - if offset.disk_id() != 0 { - Vec::new().into_boxed_slice() - } else { - let offset = offset.block_offset().to_bytes() as usize; - self.data.lock().unwrap()[offset..offset + size.to_bytes() as usize] - .to_vec() - .into_boxed_slice() - } - }))) - } - - /// Issues a write request that might happen in the background. - fn begin_write(&self, data: Box<[u8]>, offset: DiskOffset) -> Result<(), VdevError> { - if offset.disk_id() != 0 { - return Ok(()); - } - self.write_raw(data, offset.block_offset()) - } - - /// Writes the given `data` at `offset` for every `LeafVdev`. - fn write_raw(&self, data: Box<[u8]>, offset: Block) -> Result<(), VdevError> { - let offset = offset.to_bytes() as usize; - self.data.lock().unwrap()[offset..offset + data.len()].copy_from_slice(&data); - Ok(()) - } - - /// Reads `size` blocks from the given `offset` for every `LeafVdev`. - fn read_raw(&self, size: Block, offset: Block) -> Vec> { - let data = self.data.lock().unwrap(); - let offset = offset.to_bytes() as usize; - let range = offset..offset + size.to_bytes() as usize; - - vec![data[range].to_vec().into_boxed_slice()] - } - - /// Returns the actual size of a data block for a specific `Vdev` - /// which may be larger due to parity data. - fn actual_size(&self, disk_id: u16, size: Block) -> Block { - size - } - - /// Returns the size for a specific `Vdev`. - fn size_in_blocks(&self, disk_id: u16) -> Block { - Block::from_bytes(self.data.lock().unwrap().len() as u64) - } - - /// Return the number of leaf vdevs for a specific `Vdev`. - fn num_disks(&self, disk_id: u16) -> usize { - if disk_id == 0 { - 1 - } else { - 0 - } - } - - /// Returns the effective free size for a specific `Vdev`. - fn effective_free_size(&self, disk_id: u16, free_size: Block) -> Block { - // NOTE: Is this correct? - if disk_id == 0 { - self.size_in_blocks(0) - } else { - Block::from_bytes(0) - } - } - - /// Returns the number of `Vdev`s. - fn disk_count(&self) -> u16 { - 1 - } - - /// Flushes the write-back queue and the underlying storage backend. - fn flush(&self) -> Result<(), VdevError> { - Ok(()) - } -} From 57bd6be66e29d32a8fa23797d78007b000736a35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20W=C3=BCnsche?= Date: Fri, 12 Apr 2024 17:11:06 +0200 Subject: [PATCH 05/10] superblock: remove dependency on XxHash --- betree/src/database/superblock.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/betree/src/database/superblock.rs b/betree/src/database/superblock.rs index 3fc8ea73..8adab350 100644 --- a/betree/src/database/superblock.rs +++ b/betree/src/database/superblock.rs @@ -1,7 +1,7 @@ -use super::{errors::*, StorageInfo}; +use super::{errors::*, Checksum as DbChecksum, StorageInfo}; use crate::{ buffer::{Buf, BufWrite}, - checksum::{Builder, State, XxHash, XxHashBuilder}, + checksum::{Builder, Checksum, State}, size::StaticSize, storage_pool::{StoragePoolLayer, NUM_STORAGE_CLASSES}, vdev::{Block, BLOCK_SIZE}, @@ -21,8 +21,8 @@ pub struct Superblock

{ pub(crate) tiers: [StorageInfo; NUM_STORAGE_CLASSES], } -fn checksum(b: &[u8]) -> XxHash { - let mut state = XxHashBuilder.build(); +fn checksum(b: &[u8]) -> DbChecksum { + let mut state = DbChecksum::builder().build(); state.ingest(b); state.finish() } @@ -34,7 +34,7 @@ impl Superblock

{ /// this sequence is explicitly not part of the stability guarantees), /// or the contained checksum doesn't match the actual checksum of the superblock. pub fn unpack(b: &[u8]) -> Result> { - let checksum_size = XxHash::static_size(); + let checksum_size = DbChecksum::static_size(); let correct_checksum = checksum(&b[..b.len() - checksum_size]); let actual_checksum = deserialize(&b[b.len() - checksum_size..])?; if correct_checksum != actual_checksum { @@ -100,7 +100,7 @@ impl Superblock

{ this.magic.copy_from_slice(MAGIC); serialize_into(&mut data, &this)?; } - let checksum_size = XxHash::static_size(); + let checksum_size = DbChecksum::static_size(); data.seek(io::SeekFrom::End(-i64::from(checksum_size as u32)))?; let checksum = checksum(&data.as_ref()[..BLOCK_SIZE - checksum_size]); serialize_into(&mut data, &checksum)?; From 9192b435571e66b652339d8e7f08197fc39ee903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20W=C3=BCnsche?= Date: Mon, 15 Apr 2024 11:58:24 +0200 Subject: [PATCH 06/10] checksum: add gxhash This commit required modifying the build context to allow for the AES optimizations of GxHash. It should not prove to be an issue on the system we use (x86-64 and maybe ARM64) which I've tested before this commit. --- betree/.cargo/config.toml | 2 + betree/Cargo.toml | 1 + betree/src/checksum/gxhash.rs | 69 +++++++++++++++++++++++++++++++++++ betree/src/checksum/mod.rs | 2 + betree/src/checksum/xxhash.rs | 1 + betree/src/database/mod.rs | 2 +- 6 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 betree/.cargo/config.toml create mode 100644 betree/src/checksum/gxhash.rs diff --git a/betree/.cargo/config.toml b/betree/.cargo/config.toml new file mode 100644 index 00000000..e6ac8df3 --- /dev/null +++ b/betree/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["-C","target-cpu=native"] diff --git a/betree/Cargo.toml b/betree/Cargo.toml index 63507ced..58806262 100644 --- a/betree/Cargo.toml +++ b/betree/Cargo.toml @@ -60,6 +60,7 @@ rand = { version = "0.8", features = ["std_rng"] } pmdk = { path = "./pmdk", optional = true } rustc-hash = "1.1.0" +gxhash = "3.1.1" [dev-dependencies] rand_xorshift = "0.3" diff --git a/betree/src/checksum/gxhash.rs b/betree/src/checksum/gxhash.rs new file mode 100644 index 00000000..5d6b4f1b --- /dev/null +++ b/betree/src/checksum/gxhash.rs @@ -0,0 +1,69 @@ +/// Impl for Checksum for FxHashw. +use super::{Builder, Checksum, ChecksumError, State}; +use crate::size::StaticSize; +use gxhash::GxHasher; +use serde::{Deserialize, Serialize}; +use std::hash::Hasher; + +/// A checksum created by `GxHash`. +#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] +pub struct GxHash(u64); + +impl StaticSize for GxHash { + fn static_size() -> usize { + 8 + } +} + +impl Checksum for GxHash { + type Builder = GxHashBuilder; + + fn verify_buffer, T: AsRef<[u8]>>( + &self, + data: I, + ) -> Result<(), ChecksumError> { + let mut state = GxHashBuilder.build(); + for x in data { + state.ingest(x.as_ref()); + } + let other = state.finish(); + if *self == other { + Ok(()) + } else { + Err(ChecksumError) + } + } + + fn builder() -> Self::Builder { + GxHashBuilder + } +} + +/// The corresponding `Builder` for `GxHash`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct GxHashBuilder; + +impl Builder for GxHashBuilder { + type State = GxHashState; + + fn build(&self) -> Self::State { + // Due to security concerns the default `GxHasher` is randomized, which + // does not work for us, therefore, use pinned seed. + GxHashState(GxHasher::with_seed(0)) + } +} + +/// The internal state of `GxHash`. +pub struct GxHashState(GxHasher); + +impl State for GxHashState { + type Checksum = GxHash; + + fn ingest(&mut self, data: &[u8]) { + self.0.write(data); + } + + fn finish(self) -> Self::Checksum { + GxHash(self.0.finish()) + } +} diff --git a/betree/src/checksum/mod.rs b/betree/src/checksum/mod.rs index c4d5c0c4..0a72bb4d 100644 --- a/betree/src/checksum/mod.rs +++ b/betree/src/checksum/mod.rs @@ -5,9 +5,11 @@ use serde::{de::DeserializeOwned, Serialize}; use std::{error::Error, fmt, iter::once}; mod fxhash; +mod gxhash; mod xxhash; pub use fxhash::{FxHash, FxHashBuilder}; +pub use gxhash::{GxHash, GxHashBuilder}; pub use xxhash::{XxHash, XxHashBuilder}; /// A checksum to verify data integrity. diff --git a/betree/src/checksum/xxhash.rs b/betree/src/checksum/xxhash.rs index de0c0afd..839c0795 100644 --- a/betree/src/checksum/xxhash.rs +++ b/betree/src/checksum/xxhash.rs @@ -6,6 +6,7 @@ use crate::size::StaticSize; use serde::{Deserialize, Serialize}; use std::hash::Hasher; +/// A checksum created by `XxHash`. #[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] pub struct XxHash(u64); diff --git a/betree/src/database/mod.rs b/betree/src/database/mod.rs index 02fa4fe5..c6071fbe 100644 --- a/betree/src/database/mod.rs +++ b/betree/src/database/mod.rs @@ -2,7 +2,7 @@ use crate::{ atomic_option::AtomicOption, cache::ClockCache, - checksum::{FxHash, FxHashBuilder, XxHash, XxHashBuilder}, + checksum::XxHash, compression::CompressionConfiguration, cow_bytes::SlicedCowBytes, data_management::{ From 5712976d3856f75ff8f081bb84bae4ceebc47591 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20W=C3=BCnsche?= Date: Mon, 15 Apr 2024 12:12:37 +0200 Subject: [PATCH 07/10] checksum: remove potential ambiguous module ref --- betree/src/checksum/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/betree/src/checksum/mod.rs b/betree/src/checksum/mod.rs index 0a72bb4d..8b0e0aee 100644 --- a/betree/src/checksum/mod.rs +++ b/betree/src/checksum/mod.rs @@ -8,8 +8,8 @@ mod fxhash; mod gxhash; mod xxhash; +pub use self::gxhash::{GxHash, GxHashBuilder}; pub use fxhash::{FxHash, FxHashBuilder}; -pub use gxhash::{GxHash, GxHashBuilder}; pub use xxhash::{XxHash, XxHashBuilder}; /// A checksum to verify data integrity. From e01b43d411ec483ad079535a75d6211b3890750e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20W=C3=BCnsche?= Date: Mon, 15 Apr 2024 12:15:17 +0200 Subject: [PATCH 08/10] betree: move .cargo config to workspace level --- {betree/.cargo => .cargo}/config.toml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {betree/.cargo => .cargo}/config.toml (100%) diff --git a/betree/.cargo/config.toml b/.cargo/config.toml similarity index 100% rename from betree/.cargo/config.toml rename to .cargo/config.toml From 0810a216ce349aa2d8332fa986831937fadc9c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20W=C3=BCnsche?= Date: Mon, 15 Apr 2024 12:49:25 +0200 Subject: [PATCH 09/10] checksum: correct module description --- betree/src/checksum/fxhash.rs | 2 +- betree/src/checksum/gxhash.rs | 2 +- betree/src/checksum/mod.rs | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/betree/src/checksum/fxhash.rs b/betree/src/checksum/fxhash.rs index f70bbe1e..3837d947 100644 --- a/betree/src/checksum/fxhash.rs +++ b/betree/src/checksum/fxhash.rs @@ -1,4 +1,4 @@ -/// Impl for Checksum for FxHash. +/// Impl Checksum with FxHash. use super::{Builder, Checksum, ChecksumError, State}; use crate::size::StaticSize; use rustc_hash::FxHasher; diff --git a/betree/src/checksum/gxhash.rs b/betree/src/checksum/gxhash.rs index 5d6b4f1b..f0ce0a1c 100644 --- a/betree/src/checksum/gxhash.rs +++ b/betree/src/checksum/gxhash.rs @@ -1,4 +1,4 @@ -/// Impl for Checksum for FxHashw. +/// Impl Checksum with GxHash. use super::{Builder, Checksum, ChecksumError, State}; use crate::size::StaticSize; use gxhash::GxHasher; diff --git a/betree/src/checksum/mod.rs b/betree/src/checksum/mod.rs index 8b0e0aee..755cc2ea 100644 --- a/betree/src/checksum/mod.rs +++ b/betree/src/checksum/mod.rs @@ -1,4 +1,5 @@ -//! This module provides a `Checksum` trait for verifying data integrity. +//! This module provides a `Checksum` trait and implementors for verifying data +//! integrity. use crate::size::Size; use serde::{de::DeserializeOwned, Serialize}; From b244798cd969cf19cba2ebc82c20881171a24dcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20W=C3=BCnsche?= Date: Tue, 16 Apr 2024 14:55:43 +0200 Subject: [PATCH 10/10] database: make GxHash the default checksum --- betree/src/database/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/betree/src/database/mod.rs b/betree/src/database/mod.rs index c6071fbe..399d4c8c 100644 --- a/betree/src/database/mod.rs +++ b/betree/src/database/mod.rs @@ -2,7 +2,7 @@ use crate::{ atomic_option::AtomicOption, cache::ClockCache, - checksum::XxHash, + checksum::GxHash, compression::CompressionConfiguration, cow_bytes::SlicedCowBytes, data_management::{ @@ -69,7 +69,7 @@ const DEFAULT_SYNC_INTERVAL_MS: u64 = 1000; // This is the hash used overall in the entire database. For reconfiguration // recompilation is necessary and this type changed. -type Checksum = XxHash; +type Checksum = GxHash; type ObjectPointer = data_management::ObjectPointer; pub(crate) type ObjectRef = data_management::impls::ObjRef;