CRC32 implementation for issue trifectatechfoundation#14

Add a naive implementation of CRC-32 based on the C code from the PNG specification. Add an optimized word-size implementation based on Kadatch and Jenkins. Add optimized interleaved implementation from section 4.11 of the same paper.
immunant · Feb 13, 2024 · 390d0e1 · 390d0e1
1 parent fc8e11e
commit 390d0e1
Show file tree

Hide file tree

Showing 3 changed files with 136 additions and 0 deletions.
diff --git a/zlib-rs/Cargo.toml b/zlib-rs/Cargo.toml
@@ -18,3 +18,5 @@ libc.workspace = true
 [dev-dependencies]
 libloading = "0.8.1"
 libz-ng-sys = "1.1.12"
+crc32fast = "1.3.2"
+quickcheck = "1.0.3"
diff --git a/zlib-rs/src/crc32.rs b/zlib-rs/src/crc32.rs
@@ -0,0 +1,133 @@
+const CRC32_LSB_POLY: usize = 0xedb8_8320usize;
+const START_CHECKSUM: u32 = 0xffff_ffffu32;
+const FINAL_XOR: u32 = 0xffff_ffffu32;
+
+const W: usize = std::mem::size_of::<usize>();
+const N: usize = 5;
+
+static CRC32_BYTE_TABLE: [[u32; 256]; 1] = build_crc32_table(1);
+static CRC32_WORD_TABLE: [[u32; 256]; W] = build_crc32_table(1);
+static CRC32_STRIDE_TABLE: [[u32; 256]; W] = build_crc32_table(N);
+
+const fn build_crc32_table<const T: usize, const W: usize>(stride: usize) -> [[u32; T]; W] {
+    let mut arr = [[0u32; T]; W];
+    let mut i = 0;
+    while i < W {
+        let mut j = 0;
+        while j < T {
+            let mut c = j;
+            let mut k = 0;
+            while k < 8 * (W * stride - i) {
+                if c & 1 != 0 {
+                    c = CRC32_LSB_POLY ^ (c >> 1);
+                } else {
+                    c >>= 1;
+                }
+                k += 1;
+            }
+            arr[i][j] = c as u32;
+            j += 1;
+        }
+        i += 1;
+    }
+    arr
+}
+
+fn crc32_naive_inner(data: &[u8], start: u32) -> u32 {
+    data.iter().fold(start, |crc, val| {
+        let crc32_lsb = crc.to_le_bytes()[0];
+        CRC32_BYTE_TABLE[0][usize::from(crc32_lsb ^ *val)] ^ (crc >> 8)
+    })
+}
+
+pub fn crc32_naive(data: &[u8]) -> u32 {
+    crc32_naive_inner(data, START_CHECKSUM) ^ FINAL_XOR
+}
+
+fn crc32_words_inner(words: &[usize], start: u32, per_word_crcs: &[u32]) -> u32 {
+    words.iter().enumerate().fold(start, |crc, (i, word)| {
+        let mut word_crc = 0u32;
+        let value = *word ^ (crc ^ per_word_crcs.get(i).unwrap_or(&0)) as usize;
+        for (i, b) in value.to_le_bytes().into_iter().enumerate() {
+            word_crc ^= CRC32_WORD_TABLE[i][usize::from(b)];
+        }
+
+        // Per Listing 4 in Kadatch and Jenkins, this step
+        // is needed in case size_of(usize) < size_of(u32)
+        if std::mem::size_of_val(&word_crc) <= W {
+            word_crc
+        } else {
+            (crc >> 8) ^ word_crc
+        }
+    })
+}
+
+pub fn crc32_words(data: &[u8]) -> u32 {
+    // Get a word-aligned sub-slice of the input data
+    let (prefix, words, suffix) = unsafe { data.align_to::<usize>() };
+
+    let crc = crc32_naive_inner(prefix, START_CHECKSUM);
+    let crc = crc32_words_inner(words, crc, &[]);
+    crc32_naive_inner(suffix, crc) ^ FINAL_XOR
+}
+
+pub fn crc32_interleaved(data: &[u8]) -> u32 {
+    // Get a word-aligned sub-slice of the input data
+    let (prefix, words, suffix) = unsafe { data.align_to::<usize>() };
+
+    let mut crcs = [0u32; N + 1];
+    crcs[0] = crc32_naive_inner(prefix, START_CHECKSUM);
+
+    // TODO: this would normally use words.chunks_exact(N), but
+    // we need to pass the last full block to crc32_words_inner
+    let blocks = words.len() / N;
+    let stride_blocks = blocks.saturating_sub(1);
+    for i in 0..stride_blocks {
+        // Load the next N words.
+        let mut buffer: [usize; N] =
+            std::array::from_fn(|j| usize::to_le(words[i * N + j] ^ (crcs[j] as usize)));
+
+        for j in 0..N {
+            // TODO: this doesn't compile
+            // Move overflow bits into the "next" word.
+            //if std::mem::size_of_val(&crcs[i]) > W {
+            //    crcs[i + 1] ^= crcs[i] >> (8 * W);
+            //}
+            crcs[j] = 0;
+        }
+
+        for j in 0..W {
+            for k in 0..N {
+                crcs[k] ^= CRC32_STRIDE_TABLE[j][buffer[k] & 0xff];
+                buffer[k] >>= 8;
+            }
+        }
+
+        // Combine crc[0] with delayed overflow bits.
+        crcs[0] ^= crcs[N];
+        crcs[N] = 0;
+    }
+
+    let crc = std::mem::take(&mut crcs[0]);
+    let crc = crc32_words_inner(&words[stride_blocks * N..], crc, &crcs);
+    crc32_naive_inner(suffix, crc) ^ FINAL_XOR
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    quickcheck::quickcheck! {
+        fn naive_is_crc32fast(v: Vec<u8>) -> bool {
+            crc32_naive(&v[..]) == crc32fast::hash(&v[..])
+        }
+
+        fn words_is_crc32fast(v: Vec<u8>) -> bool {
+            crc32_words(&v[..]) == crc32fast::hash(&v[..])
+        }
+
+        fn interleaved_is_crc32fast(v: Vec<u8>) -> bool {
+            crc32_interleaved(&v[..]) == crc32fast::hash(&v[..])
+        }
+    }
+}
diff --git a/zlib-rs/src/lib.rs b/zlib-rs/src/lib.rs
@@ -1,6 +1,7 @@
 mod adler32;
 pub mod allocate;
 pub mod c_api;
+mod crc32;
 pub mod deflate;
 pub mod inflate;