From 9320451721924431d8396126151c7a4e6ad21630 Mon Sep 17 00:00:00 2001 From: Li Zhanhui Date: Wed, 15 Nov 2023 14:38:49 +0800 Subject: [PATCH 1/5] Calculate CRC32C using SIMD acceleration Signed-off-by: Li Zhanhui --- Cargo.toml | 8 ++++++- benches/benchmark.rs | 50 ++++++++++++++++++++++++++++++++++++++++++++ src/segment.rs | 47 +++++++++++++++++++++++++++++++++-------- 3 files changed, 95 insertions(+), 10 deletions(-) create mode 100644 benches/benchmark.rs diff --git a/Cargo.toml b/Cargo.toml index dd16d95..6fdb501 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ edition = "2021" [dependencies] byteorder = "1.4" -crc = "3" +crc32c = "0.6.4" log = "0.4" memmap2 = "0.9.0" rand = "0.8.5" @@ -31,8 +31,14 @@ env_logger = "0.10" serde = { version = "1", features = ["derive"] } [dev-dependencies] +crc = "3" hdrhistogram = "7.5.2" quickcheck = "1.0.3" regex = "1.8.1" tempfile = "3.5.0" chrono = "0.4.31" +criterion = "0.5.1" + +[[bench]] +name = "benchmark" +harness = false diff --git a/benches/benchmark.rs b/benches/benchmark.rs new file mode 100644 index 0000000..e606bf1 --- /dev/null +++ b/benches/benchmark.rs @@ -0,0 +1,50 @@ +use crc::{Crc, CRC_32_ISCSI}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use rand::{rngs::OsRng, RngCore}; + +pub const CASTAGNOLI: Crc = Crc::::new(&CRC_32_ISCSI); + +pub fn criterion_benchmark_4k(c: &mut Criterion) { + let mut buffer = [0u8; 8192]; + OsRng.fill_bytes(&mut buffer); + + let mut group = c.benchmark_group("8k"); + group.throughput(criterion::Throughput::Bytes(8192)); + group.bench_function("crc", |b| { + b.iter(|| { + let mut digest = CASTAGNOLI.digest(); + digest.update(&buffer); + black_box(digest.finalize()); + }) + }); + + group.bench_function("crc32c", |b| { + b.iter(|| { + black_box(crc32c::crc32c(&buffer)); + }) + }); +} + +pub fn criterion_benchmark_1024k(c: &mut Criterion) { + let mut buffer = [0u8; 1048576]; + OsRng.fill_bytes(&mut buffer); + + let mut group = c.benchmark_group("1M"); + group.throughput(criterion::Throughput::Bytes(1048576)); + group.bench_function("crc", |b| { + b.iter(|| { + let mut digest = CASTAGNOLI.digest(); + digest.update(&buffer); + black_box(digest.finalize()); + }) + }); + + group.bench_function("crc32c", |b| { + b.iter(|| { + black_box(crc32c::crc32c(&buffer)); + }) + }); +} + +criterion_group!(benches, criterion_benchmark_4k, criterion_benchmark_1024k); +criterion_main!(benches); diff --git a/src/segment.rs b/src/segment.rs index 3a8e6f6..adb01d8 100644 --- a/src/segment.rs +++ b/src/segment.rs @@ -2,6 +2,7 @@ use log::{debug, error, log_enabled, trace}; use std::cmp::Ordering; use std::fmt; use std::fs::{self, OpenOptions}; +use std::hash::Hasher; use std::io::{Error, ErrorKind, Result}; use std::mem; use std::ops::Deref; @@ -13,7 +14,6 @@ use std::time::Duration; use crate::mmap_view_sync::MmapViewSync; use byteorder::{ByteOrder, LittleEndian}; -use crc::{Crc, CRC_32_ISCSI}; #[cfg(not(unix))] use fs4::FileExt; @@ -27,8 +27,6 @@ const HEADER_LEN: usize = 8; /// The length of a CRC value. const CRC_LEN: usize = 4; -pub const CASTAGNOLI: Crc = Crc::::new(&CRC_32_ISCSI); - pub struct Entry { view: MmapViewSync, } @@ -264,9 +262,9 @@ impl Segment { if offset + HEADER_LEN + padded_len + CRC_LEN > capacity { break; } - let mut digest = CASTAGNOLI.digest_with_initial(crc); - digest.update(&segment[offset..offset + HEADER_LEN + padded_len]); - let entry_crc = digest.finalize(); + let mut digest = crc32c::Crc32cHasher::new(crc); + digest.write(&segment[offset..offset + HEADER_LEN + padded_len]); + let entry_crc = digest.finish() as u32; let stored_crc = LittleEndian::read_u32(&segment[offset + HEADER_LEN + padded_len..]); if entry_crc != stored_crc { @@ -340,7 +338,7 @@ impl Segment { let offset = self.size(); let mut crc = self.crc; - let mut digest = CASTAGNOLI.digest_with_initial(crc); + let mut digest = crc32c::Crc32cHasher::new(crc); LittleEndian::write_u64(&mut self.as_mut_slice()[offset..], entry.len() as u64); copy_memory( @@ -355,8 +353,8 @@ impl Segment { &mut self.as_mut_slice()[offset + HEADER_LEN + entry.len()..], ); } - digest.update(&self.as_slice()[offset..offset + HEADER_LEN + padded_len]); - crc = digest.finalize(); + digest.write(&self.as_slice()[offset..offset + HEADER_LEN + padded_len]); + crc = digest.finish() as u32; LittleEndian::write_u32( &mut self.as_mut_slice()[offset + HEADER_LEN + padded_len..], @@ -875,4 +873,35 @@ mod test { Segment::open(&path).unwrap_err().kind() ); } + + use rand::{rngs::OsRng, RngCore}; + use std::hash::Hasher; + + #[test] + fn test_crc32c() { + let message = b"123456789"; + let crc = crc32c::crc32c(message); + assert_eq!(crc, crc::CRC_32_ISCSI.check); + + let mut hasher = crc32c::Crc32cHasher::default(); + hasher.write(message); + assert_eq!(hasher.finish() as u32, crc::CRC_32_ISCSI.check); + } + + #[test] + fn test_crc32c_accuracy() { + let mut buffer = [0u8; 8192]; + let castagnoli = crc::Crc::::new(&crc::CRC_32_ISCSI); + + (0..1024).for_each(|_| { + OsRng.fill_bytes(&mut buffer); + let mut digest = castagnoli.digest(); + digest.update(&buffer); + let crc1 = digest.finalize(); + + let crc2 = crc32c::crc32c(&buffer); + + assert_eq!(crc1, crc2); + }); + } } From c7a5b244779bc66f4d86783917bb4e4a69b83c0a Mon Sep 17 00:00:00 2001 From: lizhanhui Date: Fri, 17 Nov 2023 22:33:15 +0800 Subject: [PATCH 2/5] experiment: use features to enable/disable hardware acceleration Signed-off-by: lizhanhui --- Cargo.toml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6fdb501..199772a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ edition = "2021" [dependencies] byteorder = "1.4" -crc32c = "0.6.4" +crc32c = { git = "https://github.com/lizhanhui/crc32c.git", branch = "master", default-features = false } log = "0.4" memmap2 = "0.9.0" rand = "0.8.5" @@ -42,3 +42,7 @@ criterion = "0.5.1" [[bench]] name = "benchmark" harness = false + +[features] +default = ["simd"] +simd = ["crc32c/x86_64", "crc32c/aarch64"] From 181547a5ebf47b5bf9913d6989c1d0f097089ae4 Mon Sep 17 00:00:00 2001 From: lizhanhui Date: Sat, 18 Nov 2023 18:38:43 +0800 Subject: [PATCH 3/5] Revert "experiment: use features to enable/disable hardware acceleration" This reverts commit c7a5b244779bc66f4d86783917bb4e4a69b83c0a. --- Cargo.toml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 199772a..6fdb501 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ edition = "2021" [dependencies] byteorder = "1.4" -crc32c = { git = "https://github.com/lizhanhui/crc32c.git", branch = "master", default-features = false } +crc32c = "0.6.4" log = "0.4" memmap2 = "0.9.0" rand = "0.8.5" @@ -42,7 +42,3 @@ criterion = "0.5.1" [[bench]] name = "benchmark" harness = false - -[features] -default = ["simd"] -simd = ["crc32c/x86_64", "crc32c/aarch64"] From 52835c22f37949f6d53ebef8adaab88d93657d9b Mon Sep 17 00:00:00 2001 From: lizhanhui Date: Sat, 18 Nov 2023 20:55:19 +0800 Subject: [PATCH 4/5] fix: ensure checksum is consistent with crc crate with arbitary seed value Signed-off-by: lizhanhui --- src/segment.rs | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/segment.rs b/src/segment.rs index adb01d8..6099103 100644 --- a/src/segment.rs +++ b/src/segment.rs @@ -262,9 +262,10 @@ impl Segment { if offset + HEADER_LEN + padded_len + CRC_LEN > capacity { break; } - let mut digest = crc32c::Crc32cHasher::new(crc); - digest.write(&segment[offset..offset + HEADER_LEN + padded_len]); - let entry_crc = digest.finish() as u32; + let entry_crc = crc32c::crc32c_append( + !crc.reverse_bits(), + &segment[offset..offset + HEADER_LEN + padded_len], + ); let stored_crc = LittleEndian::read_u32(&segment[offset + HEADER_LEN + padded_len..]); if entry_crc != stored_crc { @@ -338,7 +339,6 @@ impl Segment { let offset = self.size(); let mut crc = self.crc; - let mut digest = crc32c::Crc32cHasher::new(crc); LittleEndian::write_u64(&mut self.as_mut_slice()[offset..], entry.len() as u64); copy_memory( @@ -353,8 +353,10 @@ impl Segment { &mut self.as_mut_slice()[offset + HEADER_LEN + entry.len()..], ); } - digest.write(&self.as_slice()[offset..offset + HEADER_LEN + padded_len]); - crc = digest.finish() as u32; + crc = crc32c::crc32c_append( + !crc.reverse_bits(), + &self.as_slice()[offset..offset + HEADER_LEN + padded_len], + ); LittleEndian::write_u32( &mut self.as_mut_slice()[offset + HEADER_LEN + padded_len..], @@ -898,9 +900,23 @@ mod test { let mut digest = castagnoli.digest(); digest.update(&buffer); let crc1 = digest.finalize(); - let crc2 = crc32c::crc32c(&buffer); + assert_eq!(crc1, crc2); + }); + } + + #[test] + fn test_crc32c_with_arbitrary_initial_value() { + let mut buffer = [0u8; 8192]; + let castagnoli = crc::Crc::::new(&crc::CRC_32_ISCSI); + + (0..1024).for_each(|seed| { + OsRng.fill_bytes(&mut buffer); + let mut digest = castagnoli.digest_with_initial(seed); + digest.update(&buffer); + let crc1 = digest.finalize(); + let crc2 = crc32c::crc32c_append(!seed.reverse_bits(), &buffer); assert_eq!(crc1, crc2); }); } From 951531c40435768c8a38f7a0cfe15e95e7766e00 Mon Sep 17 00:00:00 2001 From: lizhanhui Date: Sat, 18 Nov 2023 21:19:47 +0800 Subject: [PATCH 5/5] fix: remove unused trait Signed-off-by: lizhanhui --- src/segment.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/segment.rs b/src/segment.rs index 6099103..9590aa5 100644 --- a/src/segment.rs +++ b/src/segment.rs @@ -2,7 +2,6 @@ use log::{debug, error, log_enabled, trace}; use std::cmp::Ordering; use std::fmt; use std::fs::{self, OpenOptions}; -use std::hash::Hasher; use std::io::{Error, ErrorKind, Result}; use std::mem; use std::ops::Deref;