diff --git a/.gitignore b/.gitignore index 2f220a1836..d42fcdc7e7 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,6 @@ Cargo.lock pkg/ wasm-pack.log .hypothesis + +# IDEs +.vscode/ diff --git a/include/sourmash.h b/include/sourmash.h index e187005126..401c6b3407 100644 --- a/include/sourmash.h +++ b/include/sourmash.h @@ -174,14 +174,12 @@ SourmashStr kmerminhash_md5sum(const SourmashKmerMinHash *ptr); void kmerminhash_merge(SourmashKmerMinHash *ptr, const SourmashKmerMinHash *other); -SourmashKmerMinHash *kmerminhash_new(uint32_t n, +SourmashKmerMinHash *kmerminhash_new(uint64_t scaled, uint32_t k, - bool prot, - bool dayhoff, - bool hp, + HashFunctions hash_function, uint64_t seed, - uint64_t mx, - bool track_abundance); + bool track_abundance, + uint32_t n); uint32_t kmerminhash_num(const SourmashKmerMinHash *ptr); diff --git a/sourmash/minhash.py b/sourmash/minhash.py index 114e147bc0..56c271f5e4 100644 --- a/sourmash/minhash.py +++ b/sourmash/minhash.py @@ -37,14 +37,20 @@ def _get_max_hash_for_scaled(scaled): elif scaled == 1: return get_minhash_max_hash() - return int(round(get_minhash_max_hash() / scaled, 0)) + return min( + int(round(get_minhash_max_hash() / scaled, 0)), + MINHASH_MAX_HASH + ) def _get_scaled_for_max_hash(max_hash): "Convert a 'max_hash' value into a 'scaled' value." if max_hash == 0: return 0 - return int(round(get_minhash_max_hash() / max_hash, 0)) + return min( + int(round(get_minhash_max_hash() / max_hash, 0)), + MINHASH_MAX_HASH + ) def to_bytes(s): @@ -179,10 +185,17 @@ def __init__( if dayhoff or hp: is_protein = False - # ok, for Rust API, go from scaled back to max_hash - max_hash = _get_max_hash_for_scaled(scaled) + if dayhoff: + hash_function = lib.HASH_FUNCTIONS_MURMUR64_DAYHOFF + elif hp: + hash_function = lib.HASH_FUNCTIONS_MURMUR64_HP + elif is_protein: + hash_function = lib.HASH_FUNCTIONS_MURMUR64_PROTEIN + else: + hash_function = lib.HASH_FUNCTIONS_MURMUR64_DNA + self._objptr = lib.kmerminhash_new( - n, ksize, is_protein, dayhoff, hp, seed, int(max_hash), track_abundance + scaled, ksize, hash_function, seed, track_abundance, n ) if mins: @@ -227,8 +240,17 @@ def __setstate__(self, tup): max_hash, seed) = tup self.__del__() + + hash_function = ( + lib.HASH_FUNCTIONS_MURMUR64_DAYHOFF if dayhoff else + lib.HASH_FUNCTIONS_MURMUR64_HP if hp else + lib.HASH_FUNCTIONS_MURMUR64_PROTEIN if is_protein else + lib.HASH_FUNCTIONS_MURMUR64_DNA + ) + + scaled = _get_scaled_for_max_hash(max_hash) self._objptr = lib.kmerminhash_new( - n, ksize, is_protein, dayhoff, hp, seed, max_hash, track_abundance + scaled, ksize, hash_function, seed, track_abundance, n ) if track_abundance: self.set_abundances(mins) diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index 808b1194fc..36087c5f07 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sourmash" -version = "0.9.0" +version = "0.10.0" authors = ["Luiz Irber "] description = "MinHash sketches for genomic data" repository = "https://github.com/dib-lab/sourmash" @@ -24,7 +24,7 @@ parallel = ["rayon"] [dependencies] backtrace = "=0.3.46" # later versions require rust 1.40 byteorder = "1.3.4" -cfg-if = "0.1.10" +cfg-if = "1.0" failure = "0.1.8" # can remove after .backtrace() is available in std::error::Error finch = { version = "0.3.0", optional = true } fixedbitset = "0.3.0" diff --git a/src/core/src/cmd.rs b/src/core/src/cmd.rs index 223ff763fa..7a7f5c33e2 100644 --- a/src/core/src/cmd.rs +++ b/src/core/src/cmd.rs @@ -150,7 +150,7 @@ impl Default for ComputeParameters { } pub fn build_template(params: &ComputeParameters) -> Vec { - let max_hash = max_hash_for_scaled(params.scaled).unwrap_or(0); + let max_hash = max_hash_for_scaled(params.scaled); params .ksizes diff --git a/src/core/src/ffi/minhash.rs b/src/core/src/ffi/minhash.rs index 55f74cf9ca..980cb7b258 100644 --- a/src/core/src/ffi/minhash.rs +++ b/src/core/src/ffi/minhash.rs @@ -16,28 +16,14 @@ impl ForeignObject for SourmashKmerMinHash { #[no_mangle] pub unsafe extern "C" fn kmerminhash_new( - n: u32, + scaled: u64, k: u32, - prot: bool, - dayhoff: bool, - hp: bool, + hash_function: HashFunctions, seed: u64, - mx: u64, track_abundance: bool, + n: u32, ) -> *mut SourmashKmerMinHash { - // TODO: at most one of (prot, dayhoff, hp) should be true - - let hash_function = if dayhoff { - HashFunctions::murmur64_dayhoff - } else if hp { - HashFunctions::murmur64_hp - } else if prot { - HashFunctions::murmur64_protein - } else { - HashFunctions::murmur64_DNA - }; - - let mh = KmerMinHash::new(n, k, hash_function, seed, mx, track_abundance); + let mh = KmerMinHash::new(scaled, k, hash_function, seed, track_abundance, n); SourmashKmerMinHash::from_rust(mh) } @@ -105,7 +91,11 @@ pub unsafe extern "C" fn kmerminhash_add_hash(ptr: *mut SourmashKmerMinHash, h: } #[no_mangle] -pub unsafe extern "C" fn kmerminhash_add_hash_with_abundance(ptr: *mut SourmashKmerMinHash, h: u64, abundance: u64) { +pub unsafe extern "C" fn kmerminhash_add_hash_with_abundance( + ptr: *mut SourmashKmerMinHash, + h: u64, + abundance: u64, +) { let mh = SourmashKmerMinHash::as_rust_mut(ptr); mh.add_hash_with_abundance(h, abundance); @@ -259,7 +249,7 @@ unsafe fn kmerminhash_set_abundances( }; let mut pairs: Vec<_> = hashes.iter().cloned().zip(abunds.iter().cloned()).collect(); - pairs.sort(); + pairs.sort_unstable(); // Reset the minhash if clear { diff --git a/src/core/src/from.rs b/src/core/src/from.rs index 3abe2553ba..e2405f3594 100644 --- a/src/core/src/from.rs +++ b/src/core/src/from.rs @@ -14,12 +14,12 @@ impl From for KmerMinHash { let values = other.to_vec(); let mut new_mh = KmerMinHash::new( - values.len() as u32, + 0, values.get(0).unwrap().kmer.len() as u32, HashFunctions::murmur64_DNA, 42, - 0, true, + values.len() as u32, ); let hash_with_abunds: Vec<(u64, u64)> = values @@ -52,7 +52,7 @@ mod test { #[test] fn finch_behavior() { - let mut a = KmerMinHash::new(20, 10, HashFunctions::murmur64_DNA, 42, 0, true); + let mut a = KmerMinHash::new(0, 10, HashFunctions::murmur64_DNA, 42, true, 20); let mut b = MashSketcher::new(20, 10, 42); let seq = b"TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA"; @@ -89,7 +89,7 @@ mod test { #[test] fn from_finch() { - let mut a = KmerMinHash::new(20, 10, HashFunctions::murmur64_DNA, 42, 0, true); + let mut a = KmerMinHash::new(0, 10, HashFunctions::murmur64_DNA, 42, true, 20); let mut b = MashSketcher::new(20, 10, 42); let seq = b"TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA"; diff --git a/src/core/src/sketch/minhash.rs b/src/core/src/sketch/minhash.rs index 73afd7a4c7..04626210b2 100644 --- a/src/core/src/sketch/minhash.rs +++ b/src/core/src/sketch/minhash.rs @@ -60,18 +60,18 @@ impl TryFrom<&str> for HashFunctions { } } -pub fn max_hash_for_scaled(scaled: u64) -> Option { +pub fn max_hash_for_scaled(scaled: u64) -> u64 { match scaled { - 0 => None, - 1 => Some(u64::max_value()), - _ => Some((u64::max_value() as f64 / scaled as f64) as u64), + 0 => 0, + 1 => u64::max_value(), + _ => (u64::max_value() as f64 / scaled as f64) as u64, } } pub fn scaled_for_max_hash(max_hash: u64) -> u64 { match max_hash { 0 => 0, - _ => u64::max_value() / max_hash, + _ => (u64::max_value() as f64 / max_hash as f64) as u64, } } @@ -203,7 +203,7 @@ impl<'de> Deserialize<'de> for KmerMinHash { (mins, Some(abunds)) } else { let mut values: Vec<_> = tmpsig.mins.into_iter().collect(); - values.sort(); + values.sort_unstable(); (values, None) }; @@ -222,12 +222,12 @@ impl<'de> Deserialize<'de> for KmerMinHash { impl KmerMinHash { pub fn new( - num: u32, + scaled: u64, ksize: u32, hash_function: HashFunctions, seed: u64, - max_hash: u64, track_abundance: bool, + num: u32, ) -> KmerMinHash { let mins: Vec; let abunds: Option>; @@ -244,6 +244,8 @@ impl KmerMinHash { abunds = None } + let max_hash = max_hash_for_scaled(scaled); + KmerMinHash { num, ksize, @@ -276,6 +278,10 @@ impl KmerMinHash { self.max_hash } + pub fn scaled(&self) -> u64 { + scaled_for_max_hash(self.max_hash) + } + pub fn clear(&mut self) { self.mins.clear(); if let Some(ref mut abunds) = self.abunds { @@ -622,12 +628,12 @@ impl KmerMinHash { self.check_compatible(other)?; let mut combined_mh = KmerMinHash::new( - self.num, + self.scaled(), self.ksize, self.hash_function, self.seed, - self.max_hash, self.abunds.is_some(), + self.num, ); combined_mh.merge(&self)?; @@ -648,12 +654,12 @@ impl KmerMinHash { self.check_compatible(other)?; let mut combined_mh = KmerMinHash::new( - self.num, + self.scaled(), self.ksize, self.hash_function, self.seed, - self.max_hash, self.abunds.is_some(), + self.num, ); combined_mh.merge(&self)?; @@ -776,13 +782,15 @@ impl KmerMinHash { // create a downsampled copy of self pub fn downsample_max_hash(&self, max_hash: u64) -> Result { + let scaled = scaled_for_max_hash(max_hash); + let mut new_mh = KmerMinHash::new( - self.num, + scaled, self.ksize, self.hash_function, self.seed, - max_hash, // old max_hash => max_hash arg self.abunds.is_some(), + self.num, ); if self.abunds.is_some() { new_mh.add_many_with_abund(&self.to_vec_abunds())?; @@ -1453,12 +1461,12 @@ impl<'de> Deserialize<'de> for KmerMinHashBTree { impl KmerMinHashBTree { pub fn new( - num: u32, + scaled: u64, ksize: u32, hash_function: HashFunctions, seed: u64, - max_hash: u64, track_abundance: bool, + num: u32, ) -> KmerMinHashBTree { let mins = Default::default(); @@ -1468,6 +1476,8 @@ impl KmerMinHashBTree { None }; + let max_hash = max_hash_for_scaled(scaled); + KmerMinHashBTree { num, ksize, @@ -1501,6 +1511,10 @@ impl KmerMinHashBTree { self.max_hash } + pub fn scaled(&self) -> u64 { + scaled_for_max_hash(self.max_hash) + } + pub fn clear(&mut self) { self.mins.clear(); if let Some(ref mut abunds) = self.abunds { @@ -1734,12 +1748,12 @@ impl KmerMinHashBTree { self.check_compatible(other)?; let mut combined_mh = KmerMinHashBTree::new( - self.num, + self.scaled(), self.ksize, self.hash_function, self.seed, - self.max_hash, self.abunds.is_some(), + self.num, ); combined_mh.merge(&self)?; @@ -1761,12 +1775,12 @@ impl KmerMinHashBTree { self.check_compatible(other)?; let mut combined_mh = KmerMinHashBTree::new( - self.num, + self.scaled(), self.ksize, self.hash_function, self.seed, - self.max_hash, self.abunds.is_some(), + self.num, ); combined_mh.merge(&self)?; @@ -1878,13 +1892,15 @@ impl KmerMinHashBTree { // create a downsampled copy of self pub fn downsample_max_hash(&self, max_hash: u64) -> Result { + let scaled = scaled_for_max_hash(max_hash); + let mut new_mh = KmerMinHashBTree::new( - self.num, + scaled, self.ksize, self.hash_function, self.seed, - max_hash, // old max_hash => max_hash arg self.abunds.is_some(), + self.num, ); if self.abunds.is_some() { new_mh.add_many_with_abund(&self.to_vec_abunds())?; @@ -2066,12 +2082,12 @@ impl SigsTrait for KmerMinHashBTree { impl From for KmerMinHash { fn from(other: KmerMinHashBTree) -> KmerMinHash { let mut new_mh = KmerMinHash::new( - other.num(), + other.scaled(), other.ksize() as u32, other.hash_function(), other.seed(), - other.max_hash(), other.track_abundance(), + other.num(), ); let mins = other.mins.into_iter().collect(); @@ -2091,12 +2107,12 @@ impl From for KmerMinHash { impl From for KmerMinHashBTree { fn from(other: KmerMinHash) -> KmerMinHashBTree { let mut new_mh = KmerMinHashBTree::new( - other.num(), + other.scaled(), other.ksize() as u32, other.hash_function(), other.seed(), - other.max_hash(), other.track_abundance(), + other.num(), ); let mins: BTreeSet = other.mins.into_iter().collect(); diff --git a/src/core/src/wasm.rs b/src/core/src/wasm.rs index c469a028b2..bfd5e86065 100644 --- a/src/core/src/wasm.rs +++ b/src/core/src/wasm.rs @@ -4,7 +4,7 @@ use serde_json; use crate::cmd::ComputeParameters; use crate::signature::{Signature, SigsTrait}; -use crate::sketch::minhash::{max_hash_for_scaled, HashFunctions, KmerMinHash}; +use crate::sketch::minhash::{HashFunctions, KmerMinHash}; #[wasm_bindgen] impl KmerMinHash { @@ -19,12 +19,6 @@ impl KmerMinHash { scaled: u32, track_abundance: bool, ) -> KmerMinHash { - let max_hash = if num != 0 { - 0 - } else { - max_hash_for_scaled(scaled as u64).unwrap() - }; - // TODO: at most one of (prot, dayhoff, hp) should be true let hash_function = if dayhoff { @@ -38,12 +32,12 @@ impl KmerMinHash { }; KmerMinHash::new( - num, + scaled as u64, ksize, hash_function, seed as u64, - max_hash, track_abundance, + num ) } diff --git a/src/core/tests/minhash.rs b/src/core/tests/minhash.rs index 5b3083354b..a6a1cc4d9e 100644 --- a/src/core/tests/minhash.rs +++ b/src/core/tests/minhash.rs @@ -17,7 +17,7 @@ const EPSILON: f64 = 0.01; #[test] fn throws_error() { - let mut mh = KmerMinHash::new(1, 4, HashFunctions::murmur64_DNA, 42, 0, false); + let mut mh = KmerMinHash::new(0, 4, HashFunctions::murmur64_DNA, 42, false, 1); assert!( mh.add_sequence(b"ATGR", false).is_err(), @@ -27,8 +27,8 @@ fn throws_error() { #[test] fn merge() { - let mut a = KmerMinHash::new(20, 10, HashFunctions::murmur64_DNA, 42, 0, false); - let mut b = KmerMinHash::new(20, 10, HashFunctions::murmur64_DNA, 42, 0, false); + let mut a = KmerMinHash::new(0, 10, HashFunctions::murmur64_DNA, 42, false, 20); + let mut b = KmerMinHash::new(0, 10, HashFunctions::murmur64_DNA, 42, false, 20); a.add_sequence(b"TGCCGCCCAGCA", false).unwrap(); b.add_sequence(b"TGCCGCCCAGCA", false).unwrap(); @@ -54,20 +54,20 @@ fn merge() { #[test] fn invalid_dna() { - let mut a = KmerMinHash::new(20, 3, HashFunctions::murmur64_DNA, 42, 0, false); + let mut a = KmerMinHash::new(0, 3, HashFunctions::murmur64_DNA, 42, false, 20); a.add_sequence(b"AAANNCCCTN", true).unwrap(); assert_eq!(a.mins().len(), 3); - let mut b = KmerMinHash::new(20, 3, HashFunctions::murmur64_DNA, 42, 0, false); + let mut b = KmerMinHash::new(0, 3, HashFunctions::murmur64_DNA, 42, false, 20); b.add_sequence(b"NAAA", true).unwrap(); assert_eq!(b.mins().len(), 1); } #[test] fn similarity() -> Result<(), Box> { - let mut a = KmerMinHash::new(5, 20, HashFunctions::murmur64_hp, 42, 0, true); - let mut b = KmerMinHash::new(5, 20, HashFunctions::murmur64_hp, 42, 0, true); + let mut a = KmerMinHash::new(0, 20, HashFunctions::murmur64_hp, 42, true, 5); + let mut b = KmerMinHash::new(0, 20, HashFunctions::murmur64_hp, 42, true, 5); a.add_hash(1); b.add_hash(1); @@ -81,8 +81,8 @@ fn similarity() -> Result<(), Box> { #[test] fn similarity_2() -> Result<(), Box> { - let mut a = KmerMinHash::new(5, 5, HashFunctions::murmur64_DNA, 42, 0, true); - let mut b = KmerMinHash::new(5, 5, HashFunctions::murmur64_DNA, 42, 0, true); + let mut a = KmerMinHash::new(0, 5, HashFunctions::murmur64_DNA, 42, true, 5); + let mut b = KmerMinHash::new(0, 5, HashFunctions::murmur64_DNA, 42, true, 5); a.add_sequence(b"ATGGA", false)?; a.add_sequence(b"GGACA", false)?; @@ -101,8 +101,8 @@ fn similarity_2() -> Result<(), Box> { #[test] fn similarity_3() -> Result<(), Box> { - let mut a = KmerMinHash::new(5, 20, HashFunctions::murmur64_dayhoff, 42, 0, true); - let mut b = KmerMinHash::new(5, 20, HashFunctions::murmur64_dayhoff, 42, 0, true); + let mut a = KmerMinHash::new(0, 20, HashFunctions::murmur64_dayhoff, 42, true, 5); + let mut b = KmerMinHash::new(0, 20, HashFunctions::murmur64_dayhoff, 42, true, 5); a.add_hash(1); a.add_hash(1); @@ -125,8 +125,8 @@ fn similarity_3() -> Result<(), Box> { #[test] fn dayhoff() { - let mut a = KmerMinHash::new(10, 6, HashFunctions::murmur64_dayhoff, 42, 0, false); - let mut b = KmerMinHash::new(10, 6, HashFunctions::murmur64_protein, 42, 0, false); + let mut a = KmerMinHash::new(0, 6, HashFunctions::murmur64_dayhoff, 42, false, 10); + let mut b = KmerMinHash::new(0, 6, HashFunctions::murmur64_protein, 42, false, 10); a.add_sequence(b"ACTGAC", false).unwrap(); b.add_sequence(b"ACTGAC", false).unwrap(); @@ -137,8 +137,8 @@ fn dayhoff() { #[test] fn hp() { - let mut a = KmerMinHash::new(10, 6, HashFunctions::murmur64_hp, 42, 0, false); - let mut b = KmerMinHash::new(10, 6, HashFunctions::murmur64_protein, 42, 0, false); + let mut a = KmerMinHash::new(0, 6, HashFunctions::murmur64_hp, 42, false, 10); + let mut b = KmerMinHash::new(0, 6, HashFunctions::murmur64_protein, 42, false, 10); a.add_sequence(b"ACTGAC", false).unwrap(); b.add_sequence(b"ACTGAC", false).unwrap(); @@ -149,14 +149,14 @@ fn hp() { #[test] fn max_for_scaled() { - assert_eq!(max_hash_for_scaled(100), Some(184467440737095520)); + assert_eq!(max_hash_for_scaled(100), 184467440737095520); } proptest! { #[test] fn oracle_mins(hashes in vec(u64::ANY, 1..10000)) { - let mut a = KmerMinHash::new(1000, 21, HashFunctions::murmur64_protein, 42, 0, true); - let mut b = KmerMinHashBTree::new(1000, 21, HashFunctions::murmur64_protein, 42, 0, true); + let mut a = KmerMinHash::new(0, 21, HashFunctions::murmur64_protein, 42, true, 1000); + let mut b = KmerMinHashBTree::new(0, 21, HashFunctions::murmur64_protein, 42, true, 1000); let mut c: KmerMinHash = Default::default(); c.set_hash_function(HashFunctions::murmur64_protein).unwrap(); @@ -198,12 +198,12 @@ fn oracle_mins(hashes in vec(u64::ANY, 1..10000)) { proptest! { #[test] fn oracle_mins_scaled(hashes in vec(u64::ANY, 1..10000)) { - let max_hash = max_hash_for_scaled(100).unwrap(); - let mut a = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true); - let mut b = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true); + let scaled = 100; + let mut a = KmerMinHash::new(scaled, 6, HashFunctions::murmur64_DNA, 42, true, 0); + let mut b = KmerMinHashBTree::new(scaled, 6, HashFunctions::murmur64_DNA, 42, true, 0); - let mut c = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true); - let mut d = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true); + let mut c = KmerMinHash::new(scaled, 6, HashFunctions::murmur64_DNA, 42, true, 0); + let mut d = KmerMinHashBTree::new(scaled, 6, HashFunctions::murmur64_DNA, 42, true, 0); let mut to_remove = vec![]; for hash in &hashes { @@ -234,7 +234,7 @@ fn oracle_mins_scaled(hashes in vec(u64::ANY, 1..10000)) { assert_eq!(a.num(), b.num()); assert_eq!(a.seed(), b.seed()); assert_eq!(a.ksize(), b.ksize()); - assert_eq!(a.max_hash(), b.max_hash()); + assert_eq!(a.scaled(), b.scaled()); assert_eq!(a.track_abundance(), b.track_abundance()); assert_eq!(a.hash_function(), b.hash_function()); @@ -299,12 +299,12 @@ fn oracle_mins_scaled(hashes in vec(u64::ANY, 1..10000)) { proptest! { #[test] fn prop_merge(seq1 in "[ACGT]{6,100}", seq2 in "[ACGT]{6,200}") { - let max_hash = max_hash_for_scaled(10).unwrap(); - let mut a = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true); - let mut b = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true); + let scaled: u64 = 10; + let mut a = KmerMinHash::new(scaled, 6, HashFunctions::murmur64_DNA, 42, true, 0); + let mut b = KmerMinHashBTree::new(scaled, 6, HashFunctions::murmur64_DNA, 42, true, 0); - let mut c = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true); - let mut d = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true); + let mut c = KmerMinHash::new(scaled, 6, HashFunctions::murmur64_DNA, 42, true, 0); + let mut d = KmerMinHashBTree::new(scaled, 6, HashFunctions::murmur64_DNA, 42, true, 0); a.add_sequence(seq1.as_bytes(), false).unwrap(); b.add_sequence(seq1.as_bytes(), false).unwrap(); @@ -551,12 +551,12 @@ fn load_save_minhash_sketches_abund() { #[test] fn merge_empty_scaled() { - let max_hash = max_hash_for_scaled(10).unwrap(); - let mut a = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true); - let mut b = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true); + let scaled = 10; + let mut a = KmerMinHash::new(scaled, 6, HashFunctions::murmur64_DNA, 42, true, 0); + let mut b = KmerMinHashBTree::new(scaled, 6, HashFunctions::murmur64_DNA, 42, true, 0); - let c = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true); - let d = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true); + let c = KmerMinHash::new(scaled, 6, HashFunctions::murmur64_DNA, 42, true, 0); + let d = KmerMinHashBTree::new(scaled, 6, HashFunctions::murmur64_DNA, 42, true, 0); a.merge(&c).unwrap(); b.merge(&d).unwrap(); @@ -577,9 +577,9 @@ fn merge_empty_scaled() { #[test] fn check_errors() { - let max_hash = max_hash_for_scaled(10).unwrap(); - let mut a = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, false); - let mut b = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, false); + let scaled = 10; + let mut a = KmerMinHash::new(scaled, 6, HashFunctions::murmur64_DNA, 42, false, 0); + let mut b = KmerMinHashBTree::new(scaled, 6, HashFunctions::murmur64_DNA, 42, false, 0); // sequence too short: OK assert!(a.add_sequence(b"AC", false).is_ok()); @@ -604,22 +604,22 @@ fn check_errors() { assert!(a.set_hash_function(HashFunctions::murmur64_DNA).is_ok()); assert!(b.set_hash_function(HashFunctions::murmur64_DNA).is_ok()); - let c = KmerMinHash::new(0, 7, HashFunctions::murmur64_DNA, 42, max_hash, true); - let d = KmerMinHashBTree::new(0, 7, HashFunctions::murmur64_DNA, 42, max_hash, true); + let c = KmerMinHash::new(scaled, 7, HashFunctions::murmur64_DNA, 42, true, 0); + let d = KmerMinHashBTree::new(scaled, 7, HashFunctions::murmur64_DNA, 42, true, 0); // different ksize assert!(a.check_compatible(&c).is_err()); assert!(b.check_compatible(&d).is_err()); - let c = KmerMinHash::new(0, 6, HashFunctions::murmur64_protein, 42, max_hash, true); - let d = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_protein, 42, max_hash, true); + let c = KmerMinHash::new(scaled, 6, HashFunctions::murmur64_protein, 42, true, 0); + let d = KmerMinHashBTree::new(scaled, 6, HashFunctions::murmur64_protein, 42, true, 0); // different hash_function assert!(a.check_compatible(&c).is_err()); assert!(b.check_compatible(&d).is_err()); - let c = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 31, max_hash, true); - let d = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 31, max_hash, true); + let c = KmerMinHash::new(scaled, 6, HashFunctions::murmur64_DNA, 31, true, 0); + let d = KmerMinHashBTree::new(scaled, 6, HashFunctions::murmur64_DNA, 31, true, 0); // different seed assert!(a.check_compatible(&c).is_err()); @@ -631,9 +631,9 @@ fn check_errors() { proptest! { #[test] fn load_save_minhash_dayhoff(seq in "FLYS*CWLPGQRMTHINKVADER{0,1000}") { - let max_hash = max_hash_for_scaled(10).unwrap(); - let mut a = KmerMinHash::new(0, 3, HashFunctions::murmur64_dayhoff, 42, max_hash, true); - let mut b = KmerMinHashBTree::new(0, 3, HashFunctions::murmur64_dayhoff, 42, max_hash, true); + let scaled = 10; + let mut a = KmerMinHash::new(scaled, 3, HashFunctions::murmur64_dayhoff, 42, true, 0); + let mut b = KmerMinHashBTree::new(scaled, 3, HashFunctions::murmur64_dayhoff, 42, true, 0); a.add_protein(seq.as_bytes()).unwrap(); b.add_protein(seq.as_bytes()).unwrap(); @@ -659,9 +659,9 @@ fn load_save_minhash_dayhoff(seq in "FLYS*CWLPGQRMTHINKVADER{0,1000}") { proptest! { #[test] fn load_save_minhash_hp(seq in "FLYS*CWLPGQRMTHINKVADER{0,1000}") { - let max_hash = max_hash_for_scaled(10).unwrap(); - let mut a = KmerMinHash::new(0, 3, HashFunctions::murmur64_hp, 42, max_hash, true); - let mut b = KmerMinHashBTree::new(0, 3, HashFunctions::murmur64_hp, 42, max_hash, true); + let scaled = 10; + let mut a = KmerMinHash::new(scaled, 3, HashFunctions::murmur64_hp, 42, true, 0); + let mut b = KmerMinHashBTree::new(scaled, 3, HashFunctions::murmur64_hp, 42, true, 0); a.add_protein(seq.as_bytes()).unwrap(); b.add_protein(seq.as_bytes()).unwrap(); @@ -687,9 +687,9 @@ fn load_save_minhash_hp(seq in "FLYS*CWLPGQRMTHINKVADER{0,1000}") { proptest! { #[test] fn load_save_minhash_dna(seq in "ACGTN{0,1000}") { - let max_hash = max_hash_for_scaled(10).unwrap(); - let mut a = KmerMinHash::new(0, 21, HashFunctions::murmur64_DNA, 42, max_hash, true); - let mut b = KmerMinHashBTree::new(0, 21, HashFunctions::murmur64_DNA, 42, max_hash, true); + let scaled = 10; + let mut a = KmerMinHash::new(scaled, 21, HashFunctions::murmur64_DNA, 42, true, 0); + let mut b = KmerMinHashBTree::new(scaled, 21, HashFunctions::murmur64_DNA, 42, true, 0); a.add_sequence(seq.as_bytes(), true).unwrap(); b.add_sequence(seq.as_bytes(), true).unwrap();