Skip to content

Commit

Permalink
MRG: adjust how ANI is calculated in the revindex code. (#3218)
Browse files Browse the repository at this point in the history
Calculate ANI of matches against original query with `f_orig_query` and
`f_match_orig`, instead of against `f_unique_to_query` and `f_match`.

This fixes the ANI differences between `sourmash gather` and RocksDB
branchwater gather for the columns `query_containment_ani`,
`match_containment_ani`, `max_containment_ani`, and
`average_containment_ani`.

Refs:
* Used by
sourmash-bio/sourmash_plugin_branchwater#361
* Fixes RocksDB-based calculations for
sourmash-bio/sourmash_plugin_branchwater#331
  • Loading branch information
ctb authored Jun 19, 2024
1 parent 5bde7dc commit 3009725
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ md5 = "0.7.0"
memmap2 = "0.9.4"
murmurhash3 = "0.0.5"
needletail = { version = "0.5.1", default-features = false }
niffler = { version = "2.6.0", default-features = false, features = [ "gz" ] }
niffler = { version = "2.4.0", default-features = false, features = [ "gz" ] }
nohash-hasher = "0.2.0"
num-iter = "0.1.45"
once_cell = "1.18.0"
Expand Down
4 changes: 2 additions & 2 deletions src/core/src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,8 @@ pub fn calculate_gather_stats(

// // get ANI values
let ksize = match_mh.ksize() as f64;
let query_containment_ani = ani_from_containment(f_unique_to_query, ksize);
let match_containment_ani = ani_from_containment(f_match, ksize);
let query_containment_ani = ani_from_containment(f_orig_query, ksize);
let match_containment_ani = ani_from_containment(f_match_orig, ksize);
let mut query_containment_ani_ci_low = None;
let mut query_containment_ani_ci_high = None;
let mut match_containment_ani_ci_low = None;
Expand Down
3 changes: 3 additions & 0 deletions src/core/src/index/revindex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,7 @@ mod test {
assert_eq!(round5(match_.f_unique_to_query()), round5(0.13096862));
assert_eq!(match_.unique_intersect_bp, 1920000);
assert_eq!(match_.remaining_bp, 12740000);
assert_eq!(round5(match_.query_containment_ani()), round5(0.90773763));

let match_ = &matches[1];
let names: Vec<&str> = match_.name().split(' ').take(1).collect();
Expand All @@ -822,6 +823,7 @@ mod test {
assert_eq!(round5(match_.f_unique_to_query()), round5(0.115279));
assert_eq!(match_.unique_intersect_bp, 1690000);
assert_eq!(match_.remaining_bp, 11050000);
assert_eq!(round5(match_.query_containment_ani()), round5(0.9068280));

let match_ = &matches[2];
dbg!(match_);
Expand All @@ -831,6 +833,7 @@ mod test {
assert_eq!(round5(match_.f_unique_to_query()), round5(0.0627557));
assert_eq!(match_.unique_intersect_bp, 920000);
assert_eq!(match_.remaining_bp, 10130000);
assert_eq!(round5(match_.query_containment_ani()), round5(0.90728512));

Ok(())
}
Expand Down

0 comments on commit 3009725

Please sign in to comment.