Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calculate CRC32C using SIMD acceleration #66

Merged
merged 5 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ edition = "2021"

[dependencies]
byteorder = "1.4"
crc = "3"
crc32c = "0.6.4"
log = "0.4"
memmap2 = "0.9.0"
rand = "0.8.5"
Expand All @@ -31,8 +31,14 @@ env_logger = "0.10"
serde = { version = "1", features = ["derive"] }

[dev-dependencies]
crc = "3"
hdrhistogram = "7.5.2"
quickcheck = "1.0.3"
regex = "1.8.1"
tempfile = "3.5.0"
chrono = "0.4.31"
criterion = "0.5.1"

[[bench]]
name = "benchmark"
harness = false
50 changes: 50 additions & 0 deletions benches/benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
use crc::{Crc, CRC_32_ISCSI};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::{rngs::OsRng, RngCore};

pub const CASTAGNOLI: Crc<u32> = Crc::<u32>::new(&CRC_32_ISCSI);

pub fn criterion_benchmark_4k(c: &mut Criterion) {
let mut buffer = [0u8; 8192];
OsRng.fill_bytes(&mut buffer);

let mut group = c.benchmark_group("8k");
group.throughput(criterion::Throughput::Bytes(8192));
group.bench_function("crc", |b| {
b.iter(|| {
let mut digest = CASTAGNOLI.digest();
digest.update(&buffer);
black_box(digest.finalize());
})
});

group.bench_function("crc32c", |b| {
b.iter(|| {
black_box(crc32c::crc32c(&buffer));
})
});
}

pub fn criterion_benchmark_1024k(c: &mut Criterion) {
let mut buffer = [0u8; 1048576];
OsRng.fill_bytes(&mut buffer);

let mut group = c.benchmark_group("1M");
group.throughput(criterion::Throughput::Bytes(1048576));
group.bench_function("crc", |b| {
b.iter(|| {
let mut digest = CASTAGNOLI.digest();
digest.update(&buffer);
black_box(digest.finalize());
})
});

group.bench_function("crc32c", |b| {
b.iter(|| {
black_box(crc32c::crc32c(&buffer));
})
});
}

criterion_group!(benches, criterion_benchmark_4k, criterion_benchmark_1024k);
criterion_main!(benches);
47 changes: 38 additions & 9 deletions src/segment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use log::{debug, error, log_enabled, trace};
use std::cmp::Ordering;
use std::fmt;
use std::fs::{self, OpenOptions};
use std::hash::Hasher;
use std::io::{Error, ErrorKind, Result};
use std::mem;
use std::ops::Deref;
Expand All @@ -13,7 +14,6 @@ use std::time::Duration;

use crate::mmap_view_sync::MmapViewSync;
use byteorder::{ByteOrder, LittleEndian};
use crc::{Crc, CRC_32_ISCSI};
#[cfg(not(unix))]
use fs4::FileExt;

Expand All @@ -27,8 +27,6 @@ const HEADER_LEN: usize = 8;
/// The length of a CRC value.
const CRC_LEN: usize = 4;

pub const CASTAGNOLI: Crc<u32> = Crc::<u32>::new(&CRC_32_ISCSI);

pub struct Entry {
view: MmapViewSync,
}
Expand Down Expand Up @@ -264,9 +262,9 @@ impl Segment {
if offset + HEADER_LEN + padded_len + CRC_LEN > capacity {
break;
}
let mut digest = CASTAGNOLI.digest_with_initial(crc);
digest.update(&segment[offset..offset + HEADER_LEN + padded_len]);
let entry_crc = digest.finalize();
let mut digest = crc32c::Crc32cHasher::new(crc);
digest.write(&segment[offset..offset + HEADER_LEN + padded_len]);
let entry_crc = digest.finish() as u32;
let stored_crc =
LittleEndian::read_u32(&segment[offset + HEADER_LEN + padded_len..]);
if entry_crc != stored_crc {
Expand Down Expand Up @@ -340,7 +338,7 @@ impl Segment {
let offset = self.size();

let mut crc = self.crc;
let mut digest = CASTAGNOLI.digest_with_initial(crc);
let mut digest = crc32c::Crc32cHasher::new(crc);

LittleEndian::write_u64(&mut self.as_mut_slice()[offset..], entry.len() as u64);
copy_memory(
Expand All @@ -355,8 +353,8 @@ impl Segment {
&mut self.as_mut_slice()[offset + HEADER_LEN + entry.len()..],
);
}
digest.update(&self.as_slice()[offset..offset + HEADER_LEN + padded_len]);
crc = digest.finalize();
digest.write(&self.as_slice()[offset..offset + HEADER_LEN + padded_len]);
crc = digest.finish() as u32;

LittleEndian::write_u32(
&mut self.as_mut_slice()[offset + HEADER_LEN + padded_len..],
Expand Down Expand Up @@ -875,4 +873,35 @@ mod test {
Segment::open(&path).unwrap_err().kind()
);
}

use rand::{rngs::OsRng, RngCore};
use std::hash::Hasher;

#[test]
fn test_crc32c() {
let message = b"123456789";
let crc = crc32c::crc32c(message);
assert_eq!(crc, crc::CRC_32_ISCSI.check);

let mut hasher = crc32c::Crc32cHasher::default();
hasher.write(message);
assert_eq!(hasher.finish() as u32, crc::CRC_32_ISCSI.check);
}

#[test]
fn test_crc32c_accuracy() {
let mut buffer = [0u8; 8192];
let castagnoli = crc::Crc::<u32>::new(&crc::CRC_32_ISCSI);

(0..1024).for_each(|_| {
OsRng.fill_bytes(&mut buffer);
let mut digest = castagnoli.digest();
digest.update(&buffer);
let crc1 = digest.finalize();

let crc2 = crc32c::crc32c(&buffer);

assert_eq!(crc1, crc2);
});
}
}