From d7b8d86ac0a64937d7bc51e07a4536c688e4f231 Mon Sep 17 00:00:00 2001 From: Chris Saunders Date: Fri, 20 Jun 2025 23:49:23 -0700 Subject: [PATCH 1/3] typo --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 36b9995bf..3f8c1b1d7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ //! ``` //! //! We can reproduce that with Rust-Htslib. Reading BAM files and printing the header -//! to the the screen is as easy as +//! to the screen is as easy as //! //! ``` //! use rust_htslib::{bam, bam::Read}; From 49d82ea0188f1be8c5c35a5152d69fd2f86fb876 Mon Sep 17 00:00:00 2001 From: Chris Saunders Date: Sat, 21 Jun 2025 00:26:41 -0700 Subject: [PATCH 2/3] Add new set_cigar method --- src/bam/record.rs | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/bam/record.rs b/src/bam/record.rs index 0751dcf45..7e29c4e0c 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -473,6 +473,64 @@ impl Record { self.inner_mut().core.l_extranul = extranul as u8; } + /// Replace current cigar with a new one. + pub fn set_cigar(&mut self, new_cigar: Option<&CigarString>) { + self.cigar = None; + + let qname_data_len = self.qname_capacity(); + let old_cigar_data_len = self.cigar_len() * 4; + + // Length of data after cigar + let other_data_len = self.inner_mut().l_data - (qname_data_len + old_cigar_data_len) as i32; + + let new_cigar_len = match new_cigar { + Some(x) => x.len(), + None => 0, + }; + let new_cigar_data_len = new_cigar_len * 4; + + if new_cigar_data_len < old_cigar_data_len { + self.inner_mut().l_data -= (old_cigar_data_len - new_cigar_data_len) as i32; + } else if new_cigar_data_len > old_cigar_data_len { + self.inner_mut().l_data += (new_cigar_data_len - old_cigar_data_len) as i32; + + // Reallocate if necessary + if (self.inner().m_data as i32) < self.inner().l_data { + // Verbosity due to lexical borrowing + let l_data = self.inner().l_data; + self.realloc_var_data(l_data as usize); + } + } + + if new_cigar_data_len != old_cigar_data_len { + // Move other data to new location + unsafe { + let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize); + + ::libc::memmove( + data.as_mut_ptr().add(new_cigar_data_len) as *mut ::libc::c_void, + data.as_mut_ptr().add(old_cigar_data_len) as *mut ::libc::c_void, + other_data_len as usize, + ); + } + } + + // Copy cigar data + if let Some(cigar_string) = new_cigar { + let cigar_data = unsafe { + #[allow(clippy::cast_ptr_alignment)] + slice::from_raw_parts_mut( + self.inner.data.add(qname_data_len) as *mut u32, + cigar_string.len(), + ) + }; + for (i, c) in cigar_string.iter().enumerate() { + cigar_data[i] = c.encode(); + } + } + self.inner_mut().core.n_cigar = new_cigar_len as u32; + } + fn realloc_var_data(&mut self, new_len: usize) { // pad request let new_len = new_len as u32; From 811e6d5fdeba9795a224bf9199aa4c4c565ce263 Mon Sep 17 00:00:00 2001 From: Chris Saunders Date: Sat, 21 Jun 2025 01:32:41 -0700 Subject: [PATCH 3/3] Add test and fix for set_cigar --- src/bam/mod.rs | 61 +++++++++++++++++++++++++++++++++++++++++++++++ src/bam/record.rs | 6 ++--- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/src/bam/mod.rs b/src/bam/mod.rs index 4e5d68bd9..9b9d3aa02 100644 --- a/src/bam/mod.rs +++ b/src/bam/mod.rs @@ -1933,6 +1933,67 @@ CCCCCCCCCCCCCCCCCCC"[..], assert_eq!(rec.qname(), b"r0"); } + #[test] + fn test_set_cigar() { + let (names, _, seqs, quals, cigars) = gold(); + + assert!(names[0] != names[1]); + + for i in 0..names.len() { + let mut rec = record::Record::new(); + rec.set(names[i], Some(&cigars[i]), seqs[i], quals[i]); + rec.push_aux(b"NM", Aux::I32(15)).unwrap(); + + assert_eq!(rec.qname(), names[i]); + assert_eq!(*rec.cigar(), cigars[i]); + assert_eq!(rec.seq().as_bytes(), seqs[i]); + assert_eq!(rec.qual(), quals[i]); + assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15)); + + // boring cigar + let new_cigar = CigarString(vec![Cigar::Match(rec.seq_len() as u32)]); + assert_ne!(*rec.cigar(), new_cigar); + rec.set_cigar(Some(&new_cigar)); + assert_eq!(*rec.cigar(), new_cigar); + + assert_eq!(rec.qname(), names[i]); + assert_eq!(rec.seq().as_bytes(), seqs[i]); + assert_eq!(rec.qual(), quals[i]); + assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15)); + + // bizarre cigar + let new_cigar = (0..rec.seq_len()) + .map(|i| { + if i % 2 == 0 { + Cigar::Match(1) + } else { + Cigar::Ins(1) + } + }) + .collect::>(); + let new_cigar = CigarString(new_cigar); + assert_ne!(*rec.cigar(), new_cigar); + rec.set_cigar(Some(&new_cigar)); + assert_eq!(*rec.cigar(), new_cigar); + + assert_eq!(rec.qname(), names[i]); + assert_eq!(rec.seq().as_bytes(), seqs[i]); + assert_eq!(rec.qual(), quals[i]); + assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15)); + + // empty cigar + let new_cigar = CigarString(Vec::new()); + assert_ne!(*rec.cigar(), new_cigar); + rec.set_cigar(None); + assert_eq!(*rec.cigar(), new_cigar); + + assert_eq!(rec.qname(), names[i]); + assert_eq!(rec.seq().as_bytes(), seqs[i]); + assert_eq!(rec.qual(), quals[i]); + assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15)); + } + } + #[test] fn test_remove_aux() { let mut bam = Reader::from_path(Path::new("test/test.bam")).expect("Error opening file."); diff --git a/src/bam/record.rs b/src/bam/record.rs index 7e29c4e0c..f63e87797 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -505,11 +505,9 @@ impl Record { if new_cigar_data_len != old_cigar_data_len { // Move other data to new location unsafe { - let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize); - ::libc::memmove( - data.as_mut_ptr().add(new_cigar_data_len) as *mut ::libc::c_void, - data.as_mut_ptr().add(old_cigar_data_len) as *mut ::libc::c_void, + self.inner.data.add(qname_data_len + new_cigar_data_len) as *mut ::libc::c_void, + self.inner.data.add(qname_data_len + old_cigar_data_len) as *mut ::libc::c_void, other_data_len as usize, ); }