diff --git a/d4-hts/src/alignment/bamfile.rs b/d4-hts/src/alignment/bamfile.rs index e78de82..8d4f535 100644 --- a/d4-hts/src/alignment/bamfile.rs +++ b/d4-hts/src/alignment/bamfile.rs @@ -150,7 +150,12 @@ impl BamFile { self.mp_free.replace(cur_list); } - pub fn range(&mut self, chrom: &str, from: usize, to: usize) -> Result, AlignmentError> { + pub fn range( + &mut self, + chrom: &str, + from: usize, + to: usize, + ) -> Result, AlignmentError> { if self.idx.is_null() { self.idx = unsafe { let path_buf = CString::new(self.path.as_path().as_os_str().as_bytes()).unwrap(); diff --git a/d4/src/d4file/track.rs b/d4/src/d4file/track.rs index d9232b1..6b843c1 100644 --- a/d4/src/d4file/track.rs +++ b/d4/src/d4file/track.rs @@ -97,7 +97,7 @@ where /// Code that used to scan a multi-track D4 file pub trait DataScanner + ExactSizeIterator> { #[inline(always)] - fn init(&mut self){} + fn init(&mut self) {} /// Get the range this data scanner want to scan. Please note all the data scanner doesn't across the chromosome boundary /// so we don't specify the chromosome, as it's implied by "current chromosome", which is defined by the MultiTrackPartitionReader fn get_range(&self) -> (u32, u32); diff --git a/d4/src/index/data_index/mod.rs b/d4/src/index/data_index/mod.rs index fc5e9bd..460ef30 100644 --- a/d4/src/index/data_index/mod.rs +++ b/d4/src/index/data_index/mod.rs @@ -3,7 +3,14 @@ mod data; use d4_framefile::{Blob, Directory}; pub use data::{DataSummary, Sum}; -use std::{collections::HashMap, fmt::Debug, fs::File, io::{Read, Result, Seek}, marker::PhantomData, ops::{Deref, DerefMut}}; +use std::{ + collections::HashMap, + fmt::Debug, + fs::File, + io::{Read, Result, Seek}, + marker::PhantomData, + ops::{Deref, DerefMut}, +}; use crate::{ssio::D4TrackReader as StreamD4Reader, Chrom, D4TrackReader}; @@ -74,17 +81,17 @@ impl<'a, T: DataSummary> DataIndexQueryResult<'a, T> { } impl DataIndexRef { - pub fn print_index(&self) - where - T: Debug + pub fn print_index(&self) + where + T: Debug, { let granularity = self.header.granularity; - let mut chroms:Vec<_> = self.offset_table.iter().collect(); + let mut chroms: Vec<_> = self.offset_table.iter().collect(); chroms.sort_unstable_by_key(|(_, (start, _))| *start); for (chr, (begin_idx, chrom_size)) in chroms { let mut begin = 0; for item in &self.pre_computed_data[*begin_idx..] { - let end = (begin + granularity).min(*chrom_size as u32); + let end = (begin + granularity).min(*chrom_size as u32); println!("{}\t{}\t{}\t{:.5?}", chr, begin, end, item); begin += granularity; if begin as usize > *chrom_size { diff --git a/d4/src/lib.rs b/d4/src/lib.rs index 937481e..752200c 100644 --- a/d4/src/lib.rs +++ b/d4/src/lib.rs @@ -17,8 +17,8 @@ pub use chrom::Chrom; #[cfg(all(feature = "mapped_io", not(target_arch = "wasm32")))] pub use d4file::{ - find_tracks, find_tracks_in_file, D4FileBuilder, D4FileMerger, D4FileWriter, D4MatrixReader, - D4TrackReader, MultiTrackReader, D4FileWriterExt + find_tracks, find_tracks_in_file, D4FileBuilder, D4FileMerger, D4FileWriter, D4FileWriterExt, + D4MatrixReader, D4TrackReader, MultiTrackReader, }; pub use dict::Dictionary; diff --git a/d4/src/ptab/bit_array.rs b/d4/src/ptab/bit_array.rs index 7520884..aaba232 100644 --- a/d4/src/ptab/bit_array.rs +++ b/d4/src/ptab/bit_array.rs @@ -214,10 +214,7 @@ impl DecoderParameter { fn new(decoders: &[PrimaryTableCodec]) -> Self { let pointers = decoders .iter() - .map(|enc| { - - &enc.memory[0] as *const u8 - }) + .map(|enc| &enc.memory[0] as *const u8) .collect::>(); let mut shift = smallvec![]; diff --git a/d4/src/ssio/mod.rs b/d4/src/ssio/mod.rs index 72e3c51..661fab6 100644 --- a/d4/src/ssio/mod.rs +++ b/d4/src/ssio/mod.rs @@ -13,4 +13,4 @@ mod reader; mod table; mod view; -pub use reader::{D4TrackReader, D4MatrixReader}; +pub use reader::{D4MatrixReader, D4TrackReader}; diff --git a/d4/src/ssio/reader.rs b/d4/src/ssio/reader.rs index ce756d1..34a3629 100644 --- a/d4/src/ssio/reader.rs +++ b/d4/src/ssio/reader.rs @@ -1,11 +1,20 @@ -use std::{io::{Error, ErrorKind, Read, Result, Seek}, path::{PathBuf, Path}}; +use std::{ + io::{Error, ErrorKind, Read, Result, Seek}, + path::{Path, PathBuf}, +}; use d4_framefile::{Blob, Directory, OpenResult}; use reqwest::IntoUrl; -use crate::{Chrom, Header, d4file::validate_header, index::{D4IndexCollection, DataIndexRef, DataSummary, SecondaryFrameIndex}, ptab::PRIMARY_TABLE_NAME, stab::{CompressionMethod, RecordBlockParsingState, SECONDARY_TABLE_NAME}}; +use crate::{ + d4file::validate_header, + index::{D4IndexCollection, DataIndexRef, DataSummary, SecondaryFrameIndex}, + ptab::PRIMARY_TABLE_NAME, + stab::{CompressionMethod, RecordBlockParsingState, SECONDARY_TABLE_NAME}, + Chrom, Header, +}; -use super::{table::SecondaryTableRef, view::D4TrackView, http::HttpReader}; +use super::{http::HttpReader, table::SecondaryTableRef, view::D4TrackView}; pub struct D4TrackReader { header: Header, @@ -20,22 +29,36 @@ pub struct D4MatrixReader { } impl D4MatrixReader { - pub fn open_tracks) -> bool>(url: U, pat: Pat) -> Result> { + pub fn open_tracks) -> bool>( + url: U, + pat: Pat, + ) -> Result> { let mut track_to_open = vec![]; let reader = HttpReader::new(url.clone())?; crate::d4file::find_tracks(reader, pat, &mut track_to_open)?; Ok(Self { - tracks: track_to_open.into_iter().map(|path| { - D4TrackReader::from_url_and_track_name(url.clone(), path.to_str()).unwrap() - }).collect() + tracks: track_to_open + .into_iter() + .map(|path| { + D4TrackReader::from_url_and_track_name(url.clone(), path.to_str()).unwrap() + }) + .collect(), }) } } -impl D4MatrixReader { - pub fn get_view(&mut self, chrom: &str, begin: u32, end: u32, buf: &mut Vec>) -> Result<()> { - for view in self.tracks.iter_mut().map(|x| { - x.get_view(chrom, begin, end) - }) { +impl D4MatrixReader { + pub fn get_view( + &mut self, + chrom: &str, + begin: u32, + end: u32, + buf: &mut Vec>, + ) -> Result<()> { + for view in self + .tracks + .iter_mut() + .map(|x| x.get_view(chrom, begin, end)) + { let view = view?; buf.push(view); } @@ -76,7 +99,7 @@ impl D4TrackReader { pub fn get_view(&mut self, chrom: &str, begin: u32, end: u32) -> Result> { let primary_offset = self.header.primary_table_offset_of_chrom(chrom); let primary_size = self.header.primary_table_size_of_chrom(chrom); - if primary_size == 0 && self.header.dictionary().bit_width() != 0{ + if primary_size == 0 && self.header.dictionary().bit_width() != 0 { return Err(Error::new(ErrorKind::Other, "chrom name not found")); } diff --git a/d4/src/ssio/view.rs b/d4/src/ssio/view.rs index d439966..702665f 100644 --- a/d4/src/ssio/view.rs +++ b/d4/src/ssio/view.rs @@ -69,14 +69,14 @@ impl D4TrackView { let mut buf = vec![0; size + 4]; let mut buf_cursor = 0; if let Some((prev_start, prev_buf)) = self.primary_table_buffer.as_ref() { - let prev_start = (prev_start - prev_start % 8) as usize - * self.dictionary.bit_width() / 8; + let prev_start = + (prev_start - prev_start % 8) as usize * self.dictionary.bit_width() / 8; let prev_end = prev_start + prev_buf.len() - 4; let overlap_start = prev_start.max(start_byte); let overlap_end = prev_end.min(end_byte); if overlap_start == start_byte && overlap_start < overlap_end { buf[..overlap_end - overlap_start].copy_from_slice( - &prev_buf[overlap_start - prev_start..overlap_end - prev_start] + &prev_buf[overlap_start - prev_start..overlap_end - prev_start], ); buf_cursor = overlap_end - overlap_start; } diff --git a/d4/src/stab/sparse_array/reader.rs b/d4/src/stab/sparse_array/reader.rs index cddd019..16f2ec1 100644 --- a/d4/src/stab/sparse_array/reader.rs +++ b/d4/src/stab/sparse_array/reader.rs @@ -13,9 +13,7 @@ pub(crate) fn assemble_incomplete_records<'a, R: Record>( if !incomplete_data.is_empty() { let bytes_needed = R::SIZE - incomplete_data.len(); incomplete_data.extend_from_slice(&extra[..bytes_needed]); - let record = *unsafe { - std::mem::transmute::<_, &R>(&incomplete_data[0]) - }; + let record = *unsafe { std::mem::transmute::<_, &R>(&incomplete_data[0]) }; buffer.push(RecordBlock::Record(record)); incomplete_data.clear(); return &extra[bytes_needed..]; @@ -176,11 +174,15 @@ mod mapped_io { let metadata = self .s_table_root .open_stream(SECONDARY_TABLE_METADATA_NAME)?; - let metadata = String::from_utf8_lossy({ - let mut buf = Vec::new(); - metadata.copy_content(&mut buf); - buf - }.as_ref()).to_string(); + let metadata = String::from_utf8_lossy( + { + let mut buf = Vec::new(); + metadata.copy_content(&mut buf); + buf + } + .as_ref(), + ) + .to_string(); let actual_data = metadata.trim_end_matches(|c| c == '\0'); serde_json::from_str(actual_data).ok() } diff --git a/d4/src/stab/sparse_array/writer.rs b/d4/src/stab/sparse_array/writer.rs index f936a59..ef490cb 100644 --- a/d4/src/stab/sparse_array/writer.rs +++ b/d4/src/stab/sparse_array/writer.rs @@ -52,7 +52,7 @@ impl SecondaryTableWriter for SparseArrayWriter { let mut metadata_stream = self.0.create_stream(SECONDARY_TABLE_METADATA_NAME, 512)?; metadata_stream.write_with_alloc_callback( serde_json::to_string(&metadata).unwrap().as_bytes(), - |s| s.set_frame_size(65536) + |s| s.set_frame_size(65536), )?; let compression = self.1; Ok(partitions diff --git a/d4/src/task/histogram.rs b/d4/src/task/histogram.rs index d9b127a..13160dc 100644 --- a/d4/src/task/histogram.rs +++ b/d4/src/task/histogram.rs @@ -42,7 +42,7 @@ impl TaskPartition> for Partition { #[inline(always)] fn init(&mut self) { - self.histogram = Some(vec![0;self.range]); + self.histogram = Some(vec![0; self.range]); } #[inline(always)] @@ -92,4 +92,4 @@ impl Task> for Histogram { } (below, histogram, above) } -} \ No newline at end of file +} diff --git a/d4/src/task/mod.rs b/d4/src/task/mod.rs index ed03036..698d0ea 100644 --- a/d4/src/task/mod.rs +++ b/d4/src/task/mod.rs @@ -142,7 +142,7 @@ pub trait TaskPartition + ExactSizeIterator>: Send type ResultType: Send + Clone; /// The type for a single row fn new(left: u32, right: u32, parent: &Self::ParentType) -> Self; - /// Initlize the task + /// Initlize the task #[inline(always)] fn init(&mut self) {} /// Feed one value to the task diff --git a/d4tools/src/create/main.rs b/d4tools/src/create/main.rs index a403b6a..ef9decd 100644 --- a/d4tools/src/create/main.rs +++ b/d4tools/src/create/main.rs @@ -46,8 +46,8 @@ fn main_impl(matches: ArgMatches<'_>) -> Result<(), Box> let mut enable_compression = false; - if (!matches.is_present("dict_range") && !matches.is_present("dict-file")) - || matches.is_present("dict-auto") + if (!matches.is_present("dict_range") && !matches.is_present("dict-file")) + || matches.is_present("dict-auto") { match input_type { InputType::Alignment => { @@ -57,12 +57,12 @@ fn main_impl(matches: ArgMatches<'_>) -> Result<(), Box> matches.value_of("ref"), min_mq, )?); - }, + } InputType::BiwWig => { let fp = std::fs::metadata(input_path)?; let bw_file = d4_bigwig::BigWigFile::open(input_path)?; - let genome_size : u64 = bw_file.chroms().into_iter().map(|(_, sz)| sz as u64).sum(); + let genome_size: u64 = bw_file.chroms().into_iter().map(|(_, sz)| sz as u64).sum(); let file_size = fp.len(); @@ -70,14 +70,14 @@ fn main_impl(matches: ArgMatches<'_>) -> Result<(), Box> d4_builder.set_dictionary(Dictionary::new_simple_range_dict(0, 1)?); enable_compression = true; } - }, + } InputType::BedGraph => { let genomes = parse_genome_file( matches .value_of("genome") .expect("Genome file is required for text file format"), )?; - let genome_size : u64 = genomes.into_iter().map(|chr| chr.size as u64).sum(); + let genome_size: u64 = genomes.into_iter().map(|chr| chr.size as u64).sum(); let fp = std::fs::metadata(input_path)?; let file_size = fp.len(); @@ -86,7 +86,7 @@ fn main_impl(matches: ArgMatches<'_>) -> Result<(), Box> d4_builder.set_dictionary(Dictionary::new_simple_range_dict(0, 1)?); enable_compression = true; } - }, + } _ => { panic!("Unsupported input type") } diff --git a/d4tools/src/show/main.rs b/d4tools/src/show/main.rs index 4a01ece..a5606c6 100644 --- a/d4tools/src/show/main.rs +++ b/d4tools/src/show/main.rs @@ -11,7 +11,7 @@ use std::{ borrow::{Borrow, Cow}, collections::HashMap, fs::File, - io::{Error, ErrorKind, Read, Result as IOResult, Seek, Write, BufReader, BufRead}, + io::{BufRead, BufReader, Error, ErrorKind, Read, Result as IOResult, Seek, Write}, path::Path, }; fn write_bed_record_fast( @@ -210,16 +210,19 @@ fn show_impl>( for path in path_buf.iter() { let track_root = match file_root.open(path)? { OpenResult::SubDir(track_root) => track_root, - _ => return Err(Error::new( - ErrorKind::Other, - format!("Unable to open track {}", path.to_string_lossy()), - ).into()), + _ => { + return Err(Error::new( + ErrorKind::Other, + format!("Unable to open track {}", path.to_string_lossy()), + ) + .into()) + } }; if print_header { - print!("\t{}", - path - .file_name() + print!( + "\t{}", + path.file_name() .map(|x| x.to_string_lossy().to_string()) .unwrap_or_else(|| "".to_string()) ); @@ -237,7 +240,8 @@ fn show_impl>( return Err(Error::new( ErrorKind::Other, "Inconsistent reference genome".to_string(), - ).into()) + ) + .into()); } if show_genome { @@ -288,7 +292,7 @@ pub fn entry_point(args: Vec) -> Result<(), Box> continue; } let mut splitted = buf.trim().split('\t'); - let (raw_chr, raw_beg, raw_end) = (splitted.next(),splitted.next(), splitted.next()); + let (raw_chr, raw_beg, raw_end) = (splitted.next(), splitted.next(), splitted.next()); if raw_chr.is_some() && raw_beg.is_some() && raw_end.is_some() { if let Ok(begin) = raw_beg.unwrap().parse::() { if let Ok(end) = raw_end.unwrap().parse::() { @@ -302,7 +306,9 @@ pub fn entry_point(args: Vec) -> Result<(), Box> } Some(region_list.into_iter()) } else { - matches.values_of("regions").map(|x| x.map(|y| y.to_owned()).collect::>().into_iter()) + matches + .values_of("regions") + .map(|x| x.map(|y| y.to_owned()).collect::>().into_iter()) }; if input_filename.starts_with("http://") || input_filename.starts_with("https://") { diff --git a/d4tools/src/stat/main.rs b/d4tools/src/stat/main.rs index 8a8d27d..4c8301e 100644 --- a/d4tools/src/stat/main.rs +++ b/d4tools/src/stat/main.rs @@ -139,7 +139,10 @@ where }) } -fn percentile_stat(matches: ArgMatches<'_>, percentile: f64) -> Result<(), Box> { +fn percentile_stat( + matches: ArgMatches<'_>, + percentile: f64, +) -> Result<(), Box> { let histograms = run_task::(matches)?; for OwnedOutput { chrom: chr, diff --git a/pyd4/src/builder.rs b/pyd4/src/builder.rs index 8fa4426..088c263 100644 --- a/pyd4/src/builder.rs +++ b/pyd4/src/builder.rs @@ -1,4 +1,7 @@ -use d4::{Chrom, D4FileBuilder, D4FileWriter, D4FileWriterExt, Dictionary, ptab::PTablePartitionWriter, stab::SecondaryTablePartWriter, index::D4IndexCollection, D4FileMerger}; +use d4::{ + index::D4IndexCollection, ptab::PTablePartitionWriter, stab::SecondaryTablePartWriter, Chrom, + D4FileBuilder, D4FileMerger, D4FileWriter, D4FileWriterExt, Dictionary, +}; use pyo3::{exceptions::PyKeyError, prelude::*}; use rayon::prelude::*; @@ -42,11 +45,11 @@ impl WriterPartHandle { Ok(()) } unsafe fn get_data_view( - &self, - chr: &str, - data_begin: u32, - data: *const i32, - count: usize + &self, + chr: &str, + data_begin: u32, + data: *const i32, + count: usize, ) -> Option<(u32, u64, usize)> { if self.chrom != chr { return None; @@ -57,9 +60,9 @@ impl WriterPartHandle { return None; } Some(( - begin, - data.offset((begin - data_begin) as isize) as u64, - (end - begin) as usize + begin, + data.offset((begin - data_begin) as isize) as u64, + (end - begin) as usize, )) } } @@ -77,7 +80,7 @@ pub struct D4Merger { impl D4Merger { /// __init__(output) /// -- - /// + /// /// Create a new D4Merger class #[new] fn new(out: &str) -> PyResult { @@ -86,8 +89,8 @@ impl D4Merger { }) } /// add_track(tag, path) - /// -- - /// + /// -- + /// /// Add tagged data tracks to the output file fn add_tagged_track(&mut self, tag: &str, path: &str) { self.inner = Some(self.inner.take().unwrap().add_input_with_tag(path, tag)) @@ -95,9 +98,9 @@ impl D4Merger { /// merge() /// -- - /// + /// /// Do actual file merging - fn merge(&mut self) -> PyResult<()>{ + fn merge(&mut self) -> PyResult<()> { if let Some(inner) = self.inner.take() { inner.merge()?; } @@ -116,53 +119,50 @@ pub struct D4Writer { impl D4Writer { /// close() /// -- - /// - /// Finalize the writer. - /// Note: This method will be implictly called when the writer object is deleted. - /// The output may be incompleted until this method gets called. + /// + /// Finalize the writer. + /// Note: This method will be implictly called when the writer object is deleted. + /// The output may be incompleted until this method gets called. /// If you want to make sure the output is completed, you can explicitly call this function. fn close(&mut self) -> PyResult<()> { let parts = std::mem::replace(&mut self.parts, Vec::new()); - parts.into_par_iter().for_each(|mut part| part.flush().unwrap()); + parts + .into_par_iter() + .for_each(|mut part| part.flush().unwrap()); self.writer_obj.take(); match &self.index_option { IndexFlavor::Sum(path) => { - let mut ic = D4IndexCollection::open_for_write(path)?; - ic.create_secondary_frame_index()?; - ic.create_sum_index()?; - }, + let mut ic = D4IndexCollection::open_for_write(path)?; + ic.create_secondary_frame_index()?; + ic.create_sum_index()?; + } IndexFlavor::NoIndex => (), } Ok(()) } - + fn write(&mut self, chr: &str, start_pos: u32, data_addr: i64, count: usize) -> PyResult<()> { - let active_parts: Vec<_> = self.parts + let active_parts: Vec<_> = self + .parts .iter_mut() .filter_map(|part| unsafe { - part - .get_data_view(chr, start_pos, data_addr as *const i32, count) + part.get_data_view(chr, start_pos, data_addr as *const i32, count) .and_then(|view| Some((part, view))) }) .collect(); - active_parts - .into_par_iter() - .for_each(|(part, view)| { - let data = unsafe { - std::slice::from_raw_parts(view.1 as *const i32, view.2) - }; - part.encode(view.0, data).unwrap(); - }); + active_parts.into_par_iter().for_each(|(part, view)| { + let data = unsafe { std::slice::from_raw_parts(view.1 as *const i32, view.2) }; + part.encode(view.0, data).unwrap(); + }); Ok(()) } } - #[pymethods] impl D4Builder { /// new() /// -- - /// + /// /// Create a builder to construct a D4 file. #[new] fn new() -> PyResult { @@ -175,8 +175,8 @@ impl D4Builder { } /// dict_range(low, high) /// -- - /// - /// Set the primary table dictionary that encodes a range of value. + /// + /// Set the primary table dictionary that encodes a range of value. /// Note: the size of the range should be a power of 2. fn dict_range(&mut self, low: i32, high: i32) -> PyResult<()> { self.dictionary = Dictionary::new_simple_range_dict(low, high)?; @@ -184,8 +184,8 @@ impl D4Builder { } /// add_seq(name, size) /// -- - /// - /// Add a new chromosome in the newly created file. + /// + /// Add a new chromosome in the newly created file. fn add_seq(&mut self, name: &str, size: usize) -> PyResult<()> { if self.genome_size.iter().any(|(x, _)| name == x) { return Err(PyKeyError::new_err("Sequence is already defined")); @@ -195,7 +195,7 @@ impl D4Builder { } /// dup_dict(d4_file) /// -- - /// + /// /// Copy the exact same dictionary definition from existing D4 file fn dup_dict(&mut self, that: &super::D4File) -> PyResult<()> { let reader = that.open()?.into_local_reader()?; @@ -204,11 +204,12 @@ impl D4Builder { } /// dup_seqs(d4_file) /// -- - /// + /// /// Copy the exact same chromosome definition from existing D4 file fn dup_seqs(&mut self, that: &super::D4File) -> PyResult<()> { let reader = that.open()?.into_local_reader()?; - self.genome_size = reader.header() + self.genome_size = reader + .header() .chrom_list() .iter() .map(|chrom| (chrom.name.clone(), chrom.size)) @@ -217,7 +218,7 @@ impl D4Builder { } /// set_compression(level) /// -- - /// + /// /// Set the compression level of the secondary table fn set_compression(&mut self, level: i32) -> PyResult<()> { if level < 0 { @@ -229,22 +230,25 @@ impl D4Builder { } /// into_writer(path) /// -- - /// + /// /// Build the D4 file from the writer class. fn into_writer(&mut self, path: &str, flavor: &str) -> PyResult { - let mut writer : D4FileWriter = D4FileBuilder::new(path) + let mut writer: D4FileWriter = D4FileBuilder::new(path) .set_dictionary(self.dictionary.clone()) - .append_chrom(self.genome_size.iter().map(|(name, size)| { - Chrom { name: name.to_string(), size: *size } + .append_chrom(self.genome_size.iter().map(|(name, size)| Chrom { + name: name.to_string(), + size: *size, })) .create()?; - + if let Some(level) = self.compression { writer.enable_secondary_table_compression(level); } - let parts = writer.parallel_parts(Some(100_0000))?.into_iter() - .map(|(p,s)| { + let parts = writer + .parallel_parts(Some(100_0000))? + .into_iter() + .map(|(p, s)| { let (chr, begin, end) = p.region(); let frontier = begin; WriterPartHandle { @@ -255,7 +259,7 @@ impl D4Builder { } }) .collect(); - Ok(D4Writer{ + Ok(D4Writer { writer_obj: Some(writer), parts, index_option: if flavor == "sum" { @@ -265,4 +269,4 @@ impl D4Builder { }, }) } -} \ No newline at end of file +} diff --git a/pyd4/src/d4file.rs b/pyd4/src/d4file.rs index e63b001..8efb3f9 100644 --- a/pyd4/src/d4file.rs +++ b/pyd4/src/d4file.rs @@ -1,3 +1,4 @@ +use crate::ReaderWrapper; use d4::ptab::{DecodeResult, Decoder}; use d4::stab::SecondaryTablePartReader; use d4::task::{Histogram, Mean, Task, TaskContext}; @@ -5,7 +6,6 @@ use d4::Chrom; use pyo3::prelude::*; use pyo3::types::{PyInt, PyList, PyString, PyTuple}; use rayon::prelude::*; -use crate::ReaderWrapper; use super::D4Iter; @@ -20,10 +20,7 @@ impl D4File { ReaderWrapper::open(self.path.as_str()) } - fn parse_range_spec( - chroms: &[Chrom], - regions: &PyList, - ) -> PyResult> { + fn parse_range_spec(chroms: &[Chrom], regions: &PyList) -> PyResult> { let mut spec = vec![]; for item in regions.iter() { let (chr, begin, end) = if let Ok(chr) = item.downcast::() { @@ -66,7 +63,7 @@ impl D4File { impl D4File { /// new(path) /// -- - /// + /// /// Open a new D4 file for read /// /// Path: path to the D4 file @@ -81,9 +78,9 @@ impl D4File { /// list_tracks() /// -- - /// + /// /// List all the tracks living in this file. - pub fn list_tracks(&self) -> PyResult> { + pub fn list_tracks(&self) -> PyResult> { let mut tracks = Vec::new(); if self.path.starts_with("http://") || self.path.starts_with("https://") { let path = if let Some(sep) = self.path.rfind('#') { @@ -104,34 +101,36 @@ impl D4File { /// is_remote_file() /// -- - /// + /// /// Check if the file is on remote server or local disk pub fn is_remote_file(&self) -> PyResult { Ok(self.path.starts_with("http://") || self.path.starts_with("https://")) } pub fn get_track_specifier(&self, track: &str) -> PyResult { - Ok(if self.path.starts_with("http://") || self.path.starts_with("https://") { - format!("{}#{}", self.path, track) - } else { - format!("{}:{}", self.path, track) - }) + Ok( + if self.path.starts_with("http://") || self.path.starts_with("https://") { + format!("{}#{}", self.path, track) + } else { + format!("{}:{}", self.path, track) + }, + ) } /// open_track(name) /// -- - /// + /// /// Open a track with the specified name. pub fn open_track(&self, track: &str) -> PyResult { let path = self.get_track_specifier(track)?; - let ret = Self{ path }; + let ret = Self { path }; ret.open()?; Ok(ret) } /// chroms() /// -- - /// + /// /// Returns a list of chromosomes defined in the D4 file pub fn chroms(&self) -> PyResult> { Ok(self @@ -144,7 +143,7 @@ impl D4File { /// histogram(regions, min, max) /// -- - /// + /// /// Returns the hisgoram of values in the given regions /// /// regions: The list of regions we are asking @@ -180,7 +179,7 @@ impl D4File { /// mean(regions) /// -- - /// + /// /// Compute the mean dpeth for the given region pub fn mean(&self, regions: &pyo3::types::PyList) -> PyResult> { let mut input = self.open()?; @@ -235,7 +234,8 @@ impl D4File { } let target = unsafe { std::slice::from_raw_parts_mut( - ((buf as u64) + std::mem::size_of::() as u64 * ((from - left) as u64)) + ((buf as u64) + + std::mem::size_of::() as u64 * ((from - left) as u64)) as *mut i32, (to - from) as usize, ) @@ -258,10 +258,7 @@ impl D4File { let mut remote = reader.into_remote_reader()?; let view = remote.get_view(chr, left, right)?; let target = unsafe { - std::slice::from_raw_parts_mut( - buf as u64 as *mut i32, - (right - left) as usize, - ) + std::slice::from_raw_parts_mut(buf as u64 as *mut i32, (right - left) as usize) }; for value in view { let (pos, idx) = value?; @@ -273,7 +270,7 @@ impl D4File { /// value_iter() /// -- - /// + /// /// Returns a value iterator that iterates over the given region pub fn value_iter(&self, chr: &str, left: u32, right: u32) -> PyResult { if self.is_remote_file()? { diff --git a/pyd4/src/iter.rs b/pyd4/src/iter.rs index 90e7a92..9c01b14 100644 --- a/pyd4/src/iter.rs +++ b/pyd4/src/iter.rs @@ -1,9 +1,9 @@ use d4::ptab::DecodeResult; +use d4::ssio::{http::HttpReader, D4TrackReader as RemoteReader}; use d4::stab::SecondaryTablePartReader; use d4::D4TrackReader; -use d4::ssio::{D4TrackReader as RemoteReader, http::HttpReader}; use pyo3::iter::IterNextOutput; -use pyo3::{PyIterProtocol, prelude::*}; +use pyo3::{prelude::*, PyIterProtocol}; use std::io::Result; /// Value iterator for D4 file @@ -14,19 +14,29 @@ pub struct D4Iter { } impl D4Iter { - pub(crate) fn from_remote_reader(mut inner: RemoteReader, chr: &str, left: u32, right: u32) -> PyResult { - Ok(Self{ + pub(crate) fn from_remote_reader( + mut inner: RemoteReader, + chr: &str, + left: u32, + right: u32, + ) -> PyResult { + Ok(Self { _inner: None, - iter: Box::new(inner.get_view(chr, left, right)?.map(|res|{ + iter: Box::new(inner.get_view(chr, left, right)?.map(|res| { if let Ok((_, value)) = res { value } else { 0 } - })) + })), }) } - pub(crate) fn from_local_reader(mut inner: D4TrackReader, chr: &str, left: u32, right: u32) -> PyResult { + pub(crate) fn from_local_reader( + mut inner: D4TrackReader, + chr: &str, + left: u32, + right: u32, + ) -> PyResult { let partition = inner.split(None)?; let chr = chr.to_string(); @@ -73,4 +83,4 @@ impl PyIterProtocol for D4Iter { IterNextOutput::Return("Ended") } } -} \ No newline at end of file +} diff --git a/pyd4/src/lib.rs b/pyd4/src/lib.rs index 5263691..e100d24 100644 --- a/pyd4/src/lib.rs +++ b/pyd4/src/lib.rs @@ -1,30 +1,30 @@ +mod builder; mod d4file; mod iter; -mod builder; +use builder::{D4Builder, D4Merger, D4Writer}; use d4::Chrom; -use pyo3::prelude::*; use d4file::D4File; use iter::D4Iter; -use builder::{D4Builder, D4Writer, D4Merger}; +use pyo3::prelude::*; enum ReaderWrapper { LocalReader(d4::D4TrackReader), - RemoteReader(d4::ssio::D4TrackReader) + RemoteReader(d4::ssio::D4TrackReader), } impl ReaderWrapper { fn open(path: &str) -> PyResult { if path.starts_with("http://") || path.starts_with("https://") { let (path, track) = if let Some(split_pos) = path.rfind('#') { - (&path[..split_pos], &path[split_pos + 1..]) + (&path[..split_pos], &path[split_pos + 1..]) } else { (path, "") }; let conn = d4::ssio::http::HttpReader::new(path)?; let reader = d4::ssio::D4TrackReader::from_reader( - conn, - if track == "" { None } else { Some(track) } + conn, + if track == "" { None } else { Some(track) }, )?; Ok(Self::RemoteReader(reader)) } else { @@ -43,10 +43,11 @@ impl ReaderWrapper { Self::LocalReader(what) => Ok(what), _ => { return Err(std::io::Error::new( - std::io::ErrorKind::Other, - "Operation only supports local D4 file" - ).into()); - } + std::io::ErrorKind::Other, + "Operation only supports local D4 file", + ) + .into()); + } } } fn into_local_reader(self) -> PyResult { @@ -54,10 +55,11 @@ impl ReaderWrapper { Self::LocalReader(what) => Ok(what), _ => { return Err(std::io::Error::new( - std::io::ErrorKind::Other, - "Operation only supports local D4 file" - ).into()); - } + std::io::ErrorKind::Other, + "Operation only supports local D4 file", + ) + .into()); + } } } fn into_remote_reader(self) -> PyResult> { @@ -65,10 +67,11 @@ impl ReaderWrapper { Self::RemoteReader(what) => Ok(what), _ => { return Err(std::io::Error::new( - std::io::ErrorKind::Other, - "Operation only supports remote D4 file" - ).into()); - } + std::io::ErrorKind::Other, + "Operation only supports remote D4 file", + ) + .into()); + } } } }