Skip to content

Commit

Permalink
fix(file source): fix regression in fingerprint calculations
Browse files Browse the repository at this point in the history
0.14.0 included an upgrade to the `crc` crate, with associated updates
to use the new API, however it appears to calculate different checksums.

I put a comment here asking about how to calculate an equivalent
checksum using the new API:
mrhooray/crc-rs#62 (comment)

This also adds an alias to ensure that checkpoints written by 0.14.0
will be able to be read by 0.15.0 when it is released.

Fixes: #8182

Signed-off-by: Jesse Szwedko <[email protected]>
  • Loading branch information
jszwedko committed Jul 8, 2021
1 parent 3629646 commit 5229d39
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 15 deletions.
19 changes: 17 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion lib/file-source/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ libc = "0.2"
winapi = { version = "0.3", features = ["winioctl"] }

[dependencies]
crc = "2.0.0"
crc = "1.8.1"
glob = "0.3.0"
scan_fmt = "0.2.6"

Expand Down
31 changes: 19 additions & 12 deletions lib/file-source/src/fingerprinter.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::{metadata_ext::PortableFileExt, FileSourceInternalEvents};
use crc::Crc;
use serde::{Deserialize, Serialize};
use std::{
collections::HashSet,
Expand All @@ -9,8 +8,6 @@ use std::{
};
use tracing::trace_span;

const FINGERPRINT_CRC: Crc<u64> = Crc::<u64>::new(&crc::CRC_64_ECMA_182);

#[derive(Clone)]
pub struct Fingerprinter {
pub strategy: FingerprintStrategy,
Expand All @@ -37,6 +34,7 @@ pub enum FingerprintStrategy {
pub enum FileFingerprint {
#[serde(rename = "checksum")]
BytesChecksum(u64),
#[serde(alias = "first_line_checksum")]
FirstLinesChecksum(u64),
DevInode(u64, u64),
Unknown(u64),
Expand All @@ -53,7 +51,7 @@ impl FileFingerprint {
let mut buf = Vec::with_capacity(std::mem::size_of_val(dev) * 2);
buf.write_all(&dev.to_be_bytes()).expect("writing to array");
buf.write_all(&ino.to_be_bytes()).expect("writing to array");
FINGERPRINT_CRC.checksum(&buf[..])
crc::crc64::checksum_ecma(&buf[..])
}
Unknown(c) => *c,
}
Expand Down Expand Up @@ -94,7 +92,7 @@ impl Fingerprinter {
let mut fp = fs::File::open(path)?;
fp.seek(SeekFrom::Start(ignored_header_bytes as u64))?;
fingerprinter_read_until(fp, b'\n', lines, buffer)?;
let fingerprint = FINGERPRINT_CRC.checksum(&buffer[..]);
let fingerprint = crc::crc64::checksum_ecma(&buffer[..]);
Ok(FirstLinesChecksum(fingerprint))
}
}
Expand Down Expand Up @@ -151,7 +149,7 @@ impl Fingerprinter {
let mut fp = fs::File::open(path)?;
fp.seek(io::SeekFrom::Start(ignored_header_bytes as u64))?;
fp.read_exact(&mut buffer[..bytes])?;
let fingerprint = FINGERPRINT_CRC.checksum(&buffer[..]);
let fingerprint = crc::crc64::checksum_ecma(&buffer[..]);
Ok(Some(FileFingerprint::BytesChecksum(fingerprint)))
}
_ => Ok(None),
Expand Down Expand Up @@ -193,7 +191,7 @@ fn fingerprinter_read_until(

#[cfg(test)]
mod test {
use super::{FileSourceInternalEvents, FingerprintStrategy, Fingerprinter};
use super::{FileFingerprint, FileSourceInternalEvents, FingerprintStrategy, Fingerprinter};
use std::{collections::HashSet, fs, io::Error, path::Path, time::Duration};
use tempfile::tempdir;

Expand Down Expand Up @@ -226,9 +224,12 @@ mod test {
assert!(fingerprinter
.get_fingerprint_of_file(&empty_path, &mut buf)
.is_err());
assert!(fingerprinter
.get_fingerprint_of_file(&full_line_path, &mut buf)
.is_ok());
assert_eq!(
fingerprinter
.get_fingerprint_of_file(&full_line_path, &mut buf)
.unwrap(),
FileFingerprint::FirstLinesChecksum(8302183670541403209),
);
assert!(fingerprinter
.get_fingerprint_of_file(&not_full_line_path, &mut buf)
.is_err());
Expand Down Expand Up @@ -298,7 +299,10 @@ mod test {
assert!(run(&incomlete_line).is_err());
assert!(run(&incomplete_under_max_line_length_by_one).is_err());

assert!(run(&one_line).is_ok());
assert_eq!(
run(&one_line).unwrap(),
FileFingerprint::FirstLinesChecksum(12790833211255586118)
);
assert!(run(&one_line_duplicate).is_ok());
assert!(run(&one_line_continued).is_ok());
assert!(run(&different_two_lines).is_ok());
Expand Down Expand Up @@ -356,7 +360,10 @@ mod test {

assert!(run(&incomlete_lines).is_err());

assert!(run(&two_lines).is_ok());
assert_eq!(
run(&two_lines).unwrap(),
FileFingerprint::FirstLinesChecksum(8288549968916239272)
);
assert!(run(&two_lines_duplicate).is_ok());
assert!(run(&two_lines_continued).is_ok());
assert!(run(&different_three_lines).is_ok());
Expand Down

0 comments on commit 5229d39

Please sign in to comment.