diff --git a/zlib-rs/Cargo.toml b/zlib-rs/Cargo.toml index acbe84b7..103d665b 100644 --- a/zlib-rs/Cargo.toml +++ b/zlib-rs/Cargo.toml @@ -18,3 +18,5 @@ libc.workspace = true [dev-dependencies] libloading = "0.8.1" libz-ng-sys = "1.1.12" +crc32fast = "1.3.2" +quickcheck = "1.0.3" diff --git a/zlib-rs/src/crc32.rs b/zlib-rs/src/crc32.rs new file mode 100644 index 00000000..74e050c2 --- /dev/null +++ b/zlib-rs/src/crc32.rs @@ -0,0 +1,163 @@ +// Several implementations of CRC-32: +// * A naive byte-granularity approach +// * A word-sized approach that processes a usize word at a time +// * A "braid" implementation that processes a block of N words +// at a time, based on the algorithm in section 4.11 from +// https://github.com/zlib-ng/zlib-ng/blob/develop/doc/crc-doc.1.0.pdf. + +// The binary encoding of the CRC-32 polynomial. +// We are assuming little-endianness so we process the input +// LSB-first. We need to use the "reversed" value from e.g +// https://en.wikipedia.org/wiki/Cyclic_redundancy_check#Polynomial_representations. +const CRC32_LSB_POLY: usize = 0xedb8_8320usize; + +const W: usize = core::mem::size_of::<usize>(); + +// The logic assumes that W >= sizeof(u32). +// In Rust, this is generally true. +const _: () = assert!(W >= core::mem::size_of::<u32>()); + +// Pre-computed tables for the CRC32 algorithm. +// CRC32_BYTE_TABLE corresponds to MulByXPowD from the paper. +static CRC32_BYTE_TABLE: [[u32; 256]; 1] = build_crc32_table(1); +// CRC32_WORD_TABLE is MulWordByXpowD. +static CRC32_WORD_TABLE: [[u32; 256]; W] = build_crc32_table(1); + +// Build the CRC32 tables using a more efficient and simpler approach +// than the combination of Multiply and XpowN (which implement polynomial +// multiplication and exponentiation, respectively) from the paper, +// but with identical results. This function is const, so it should be +// fully evaluated at compile time. +const fn build_crc32_table<const T: usize, const W: usize>(braid: usize) -> [[u32; T]; W] { + let mut arr = [[0u32; T]; W]; + let mut i = 0; + while i < W { + let mut j = 0; + while j < T { + let mut c = j; + let mut k = 0; + while k < 8 * (W * braid - i) { + if c & 1 != 0 { + c = CRC32_LSB_POLY ^ (c >> 1); + } else { + c >>= 1; + } + k += 1; + } + arr[i][j] = c as u32; + j += 1; + } + i += 1; + } + arr +} + +fn crc32_naive_inner(data: &[u8], start: u32) -> u32 { + data.iter().fold(start, |crc, val| { + let crc32_lsb = crc.to_le_bytes()[0]; + CRC32_BYTE_TABLE[0][usize::from(crc32_lsb ^ *val)] ^ (crc >> 8) + }) +} + +fn crc32_words_inner(words: &[usize], start: u32, per_word_crcs: &[u32]) -> u32 { + words.iter().enumerate().fold(start, |crc, (i, word)| { + let value = *word ^ (crc ^ per_word_crcs.get(i).unwrap_or(&0)) as usize; + value + .to_le_bytes() + .into_iter() + .zip(CRC32_WORD_TABLE) + .fold(0u32, |crc, (b, tab)| crc ^ tab[usize::from(b)]) + }) +} + +pub fn crc32_naive(data: &[u8], start: u32) -> u32 { + let crc = !start; + let crc = crc32_naive_inner(data, crc); + !crc +} + +pub fn crc32_words(data: &[u8], start: u32) -> u32 { + // Get a word-aligned sub-slice of the input data + let (prefix, words, suffix) = unsafe { data.align_to::<usize>() }; + let crc = !start; + let crc = crc32_naive_inner(prefix, crc); + let crc = crc32_words_inner(words, crc, &[]); + let crc = crc32_naive_inner(suffix, crc); + !crc +} + +pub fn crc32_braid<const N: usize>(data: &[u8], start: u32) -> u32 { + let CRC32_BRAID_TABLE: [[u32; 256]; W] = build_crc32_table(N); + + // Get a word-aligned sub-slice of the input data + let (prefix, words, suffix) = unsafe { data.align_to::<usize>() }; + let crc = !start; + let crc = crc32_naive_inner(prefix, crc); + + let mut crcs = [0u32; N]; + crcs[0] = crc; + + // TODO: this would normally use words.chunks_exact(N), but + // we need to pass the last full block to crc32_words_inner + // because we accumulate partial crcs in the array and we + // need to roll those into the final value. The last call to + // crc32_words_inner does that for us with its per_word_crcs + // argument. + let blocks = words.len() / N; + let blocks = blocks.saturating_sub(1); + for i in 0..blocks { + // Load the next N words. + let mut buffer: [usize; N] = + core::array::from_fn(|j| usize::to_le(words[i * N + j] ^ (crcs[j] as usize))); + + crcs.fill(0); + for j in 0..W { + for k in 0..N { + crcs[k] ^= CRC32_BRAID_TABLE[j][buffer[k] & 0xff]; + buffer[k] >>= 8; + } + } + } + + let crc = core::mem::take(&mut crcs[0]); + let crc = crc32_words_inner(&words[blocks * N..], crc, &crcs); + let crc = crc32_naive_inner(suffix, crc); + !crc +} + +#[cfg(test)] +mod test { + use super::*; + + quickcheck::quickcheck! { + fn naive_is_crc32fast(v: Vec<u8>, start: u32) -> bool { + let mut h = crc32fast::Hasher::new_with_initial(start); + h.update(&v[..]); + crc32_naive(&v[..], start) == h.finalize() + } + + fn words_is_crc32fast(v: Vec<u8>, start: u32) -> bool { + let mut h = crc32fast::Hasher::new_with_initial(start); + h.update(&v[..]); + crc32_words(&v[..], start) == h.finalize() + } + + fn braid_4_is_crc32fast(v: Vec<u8>, start: u32) -> bool { + let mut h = crc32fast::Hasher::new_with_initial(start); + h.update(&v[..]); + crc32_braid::<4>(&v[..], start) == h.finalize() + } + + fn braid_5_is_crc32fast(v: Vec<u8>, start: u32) -> bool { + let mut h = crc32fast::Hasher::new_with_initial(start); + h.update(&v[..]); + crc32_braid::<5>(&v[..], start) == h.finalize() + } + + fn braid_6_is_crc32fast(v: Vec<u8>, start: u32) -> bool { + let mut h = crc32fast::Hasher::new_with_initial(start); + h.update(&v[..]); + crc32_braid::<6>(&v[..], start) == h.finalize() + } + } +} diff --git a/zlib-rs/src/lib.rs b/zlib-rs/src/lib.rs index 68d29907..667dc5f5 100644 --- a/zlib-rs/src/lib.rs +++ b/zlib-rs/src/lib.rs @@ -1,6 +1,7 @@ mod adler32; pub mod allocate; pub mod c_api; +mod crc32; pub mod deflate; pub mod inflate;