From 24efa2cd84b0c752480d1eb35a8bcb9dbe9ab56d Mon Sep 17 00:00:00 2001 From: mholt Date: Wed, 25 Sep 2024 06:06:49 -0700 Subject: [PATCH] changes to patch bug in bam tag removal --- CHANGELOG.md | 4 ++++ Cargo.lock | 2 +- Cargo.toml | 2 +- LICENSE-THIRDPARTY.json | 2 +- src/writers/ordered_bam_writer.rs | 30 ++++++++++++++---------------- 5 files changed, 21 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8094f60..286dca3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# v1.4.5 +## Fixed +* Fixed an error where BAM phase tags were not always properly removed prior to re-tagging, leading to a run-time error and exit + # v1.4.4 ## Fixed * Fixed an error where phasing information that was present in input files would be copied through to output files if it was not overwritten by HiPhase phasing results. HiPhase will now automatically remove this phasing information to prevent accidental mixing of phase results. diff --git a/Cargo.lock b/Cargo.lock index 01de4e5..7e770b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -526,7 +526,7 @@ dependencies = [ [[package]] name = "hiphase" -version = "1.4.4" +version = "1.4.5" dependencies = [ "bio", "bit-vec", diff --git a/Cargo.toml b/Cargo.toml index a20516e..089e236 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "hiphase" -version = "1.4.4" +version = "1.4.5" authors = ["J. Matthew Holt "] description = "A tool for jointly phasing small, structural, and tandem repeat variants for PacBio sequencing data" edition = "2021" diff --git a/LICENSE-THIRDPARTY.json b/LICENSE-THIRDPARTY.json index cf1a9f9..b8b1fa4 100644 --- a/LICENSE-THIRDPARTY.json +++ b/LICENSE-THIRDPARTY.json @@ -496,7 +496,7 @@ }, { "name": "hiphase", - "version": "1.4.4", + "version": "1.4.5", "authors": "J. Matthew Holt ", "repository": null, "license": null, diff --git a/src/writers/ordered_bam_writer.rs b/src/writers/ordered_bam_writer.rs index dac3ef0..cde9f61 100644 --- a/src/writers/ordered_bam_writer.rs +++ b/src/writers/ordered_bam_writer.rs @@ -206,26 +206,25 @@ impl OrderedBamWriter { continue; } - // this may need to be <=, hard to tell yet + // quick sanity check assert!(record_pos <= end_pos as i64); + // no matter what, we need to strip any existing phase information from the record + strip_record_phasing(&mut record)?; + // now check if the read name has a lookup let read_name = std::str::from_utf8(record.qname()).unwrap(); - match read_block_lookup.get(read_name) { - Some((phase_block_id, haplotag)) => { - // we have a match, modify phase info - // phase_block_id is 0-based, so add 1 to it - record.push_aux("PS".as_bytes(), bam::record::Aux::I32((phase_block_id + 1).try_into()?))?; - // haplotag is 0/1 and we want 1/2 in the BAM, so add 1 to it - record.push_aux("HP".as_bytes(), bam::record::Aux::U8((haplotag + 1).try_into()?))?; - bam_writer.write(&record)?; - }, - None => { - // no match, so just copy the read over after stripping any phase information - strip_record_phasing(&mut record)?; - bam_writer.write(&record)?; - } + + if let Some((phase_block_id, haplotag)) = read_block_lookup.get(read_name) { + // we have a match, modify phase info + // phase_block_id is 0-based, so add 1 to it + record.push_aux("PS".as_bytes(), bam::record::Aux::I32((phase_block_id + 1).try_into()?))?; + // haplotag is 0/1 and we want 1/2 in the BAM, so add 1 to it + record.push_aux("HP".as_bytes(), bam::record::Aux::U8((haplotag + 1).try_into()?))?; + } else { + // no haplotag information for this read, the record was already stripped; no-op }; + bam_writer.write(&record)?; } }, Err(e) => { @@ -233,7 +232,6 @@ impl OrderedBamWriter { warn!("Empty problem block received, no read mappings on chromosome {}", chrom_result); } else { warn!("Received \'{}\', while seeking to {}:{}-{} in bam #{}, likely no reads in region", e, chrom_result, start_pos, end_pos, bam_index); - //return Err(e); } } };