Skip to content

Commit

Permalink
x
Browse files Browse the repository at this point in the history
  • Loading branch information
38 committed Jan 25, 2022
1 parent ed6427f commit 5ab3096
Show file tree
Hide file tree
Showing 19 changed files with 225 additions and 168 deletions.
7 changes: 6 additions & 1 deletion d4-hts/src/alignment/bamfile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,12 @@ impl BamFile {
self.mp_free.replace(cur_list);
}

pub fn range(&mut self, chrom: &str, from: usize, to: usize) -> Result<Ranged<'_>, AlignmentError> {
pub fn range(
&mut self,
chrom: &str,
from: usize,
to: usize,
) -> Result<Ranged<'_>, AlignmentError> {
if self.idx.is_null() {
self.idx = unsafe {
let path_buf = CString::new(self.path.as_path().as_os_str().as_bytes()).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion d4/src/d4file/track.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ where
/// Code that used to scan a multi-track D4 file
pub trait DataScanner<RowType: Iterator<Item = i32> + ExactSizeIterator> {
#[inline(always)]
fn init(&mut self){}
fn init(&mut self) {}
/// Get the range this data scanner want to scan. Please note all the data scanner doesn't across the chromosome boundary
/// so we don't specify the chromosome, as it's implied by "current chromosome", which is defined by the MultiTrackPartitionReader
fn get_range(&self) -> (u32, u32);
Expand Down
19 changes: 13 additions & 6 deletions d4/src/index/data_index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@ mod data;
use d4_framefile::{Blob, Directory};
pub use data::{DataSummary, Sum};

use std::{collections::HashMap, fmt::Debug, fs::File, io::{Read, Result, Seek}, marker::PhantomData, ops::{Deref, DerefMut}};
use std::{
collections::HashMap,
fmt::Debug,
fs::File,
io::{Read, Result, Seek},
marker::PhantomData,
ops::{Deref, DerefMut},
};

use crate::{ssio::D4TrackReader as StreamD4Reader, Chrom, D4TrackReader};

Expand Down Expand Up @@ -74,17 +81,17 @@ impl<'a, T: DataSummary> DataIndexQueryResult<'a, T> {
}

impl<T: DataSummary> DataIndexRef<T> {
pub fn print_index(&self)
where
T: Debug
pub fn print_index(&self)
where
T: Debug,
{
let granularity = self.header.granularity;
let mut chroms:Vec<_> = self.offset_table.iter().collect();
let mut chroms: Vec<_> = self.offset_table.iter().collect();
chroms.sort_unstable_by_key(|(_, (start, _))| *start);
for (chr, (begin_idx, chrom_size)) in chroms {
let mut begin = 0;
for item in &self.pre_computed_data[*begin_idx..] {
let end = (begin + granularity).min(*chrom_size as u32);
let end = (begin + granularity).min(*chrom_size as u32);
println!("{}\t{}\t{}\t{:.5?}", chr, begin, end, item);
begin += granularity;
if begin as usize > *chrom_size {
Expand Down
4 changes: 2 additions & 2 deletions d4/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ pub use chrom::Chrom;

#[cfg(all(feature = "mapped_io", not(target_arch = "wasm32")))]
pub use d4file::{
find_tracks, find_tracks_in_file, D4FileBuilder, D4FileMerger, D4FileWriter, D4MatrixReader,
D4TrackReader, MultiTrackReader, D4FileWriterExt
find_tracks, find_tracks_in_file, D4FileBuilder, D4FileMerger, D4FileWriter, D4FileWriterExt,
D4MatrixReader, D4TrackReader, MultiTrackReader,
};

pub use dict::Dictionary;
Expand Down
5 changes: 1 addition & 4 deletions d4/src/ptab/bit_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,7 @@ impl DecoderParameter {
fn new(decoders: &[PrimaryTableCodec<Reader>]) -> Self {
let pointers = decoders
.iter()
.map(|enc| {

&enc.memory[0] as *const u8
})
.map(|enc| &enc.memory[0] as *const u8)
.collect::<SmallVec<_>>();

let mut shift = smallvec![];
Expand Down
2 changes: 1 addition & 1 deletion d4/src/ssio/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ mod reader;
mod table;
mod view;

pub use reader::{D4TrackReader, D4MatrixReader};
pub use reader::{D4MatrixReader, D4TrackReader};
49 changes: 36 additions & 13 deletions d4/src/ssio/reader.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
use std::{io::{Error, ErrorKind, Read, Result, Seek}, path::{PathBuf, Path}};
use std::{
io::{Error, ErrorKind, Read, Result, Seek},
path::{Path, PathBuf},
};

use d4_framefile::{Blob, Directory, OpenResult};
use reqwest::IntoUrl;

use crate::{Chrom, Header, d4file::validate_header, index::{D4IndexCollection, DataIndexRef, DataSummary, SecondaryFrameIndex}, ptab::PRIMARY_TABLE_NAME, stab::{CompressionMethod, RecordBlockParsingState, SECONDARY_TABLE_NAME}};
use crate::{
d4file::validate_header,
index::{D4IndexCollection, DataIndexRef, DataSummary, SecondaryFrameIndex},
ptab::PRIMARY_TABLE_NAME,
stab::{CompressionMethod, RecordBlockParsingState, SECONDARY_TABLE_NAME},
Chrom, Header,
};

use super::{table::SecondaryTableRef, view::D4TrackView, http::HttpReader};
use super::{http::HttpReader, table::SecondaryTableRef, view::D4TrackView};

pub struct D4TrackReader<R: Read + Seek> {
header: Header,
Expand All @@ -20,22 +29,36 @@ pub struct D4MatrixReader<R: Read + Seek> {
}

impl D4MatrixReader<HttpReader> {
pub fn open_tracks<U: IntoUrl + Clone, Pat: FnMut(Option<&Path>) -> bool>(url: U, pat: Pat) -> Result<D4MatrixReader<HttpReader>> {
pub fn open_tracks<U: IntoUrl + Clone, Pat: FnMut(Option<&Path>) -> bool>(
url: U,
pat: Pat,
) -> Result<D4MatrixReader<HttpReader>> {
let mut track_to_open = vec![];
let reader = HttpReader::new(url.clone())?;
crate::d4file::find_tracks(reader, pat, &mut track_to_open)?;
Ok(Self {
tracks: track_to_open.into_iter().map(|path| {
D4TrackReader::from_url_and_track_name(url.clone(), path.to_str()).unwrap()
}).collect()
tracks: track_to_open
.into_iter()
.map(|path| {
D4TrackReader::from_url_and_track_name(url.clone(), path.to_str()).unwrap()
})
.collect(),
})
}
}
impl <R: Read + Seek> D4MatrixReader<R> {
pub fn get_view(&mut self, chrom: &str, begin: u32, end: u32, buf: &mut Vec<D4TrackView<R>>) -> Result<()> {
for view in self.tracks.iter_mut().map(|x| {
x.get_view(chrom, begin, end)
}) {
impl<R: Read + Seek> D4MatrixReader<R> {
pub fn get_view(
&mut self,
chrom: &str,
begin: u32,
end: u32,
buf: &mut Vec<D4TrackView<R>>,
) -> Result<()> {
for view in self
.tracks
.iter_mut()
.map(|x| x.get_view(chrom, begin, end))
{
let view = view?;
buf.push(view);
}
Expand Down Expand Up @@ -76,7 +99,7 @@ impl<R: Read + Seek> D4TrackReader<R> {
pub fn get_view(&mut self, chrom: &str, begin: u32, end: u32) -> Result<D4TrackView<R>> {
let primary_offset = self.header.primary_table_offset_of_chrom(chrom);
let primary_size = self.header.primary_table_size_of_chrom(chrom);
if primary_size == 0 && self.header.dictionary().bit_width() != 0{
if primary_size == 0 && self.header.dictionary().bit_width() != 0 {
return Err(Error::new(ErrorKind::Other, "chrom name not found"));
}

Expand Down
6 changes: 3 additions & 3 deletions d4/src/ssio/view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,14 @@ impl<R: Read + Seek> D4TrackView<R> {
let mut buf = vec![0; size + 4];
let mut buf_cursor = 0;
if let Some((prev_start, prev_buf)) = self.primary_table_buffer.as_ref() {
let prev_start = (prev_start - prev_start % 8) as usize
* self.dictionary.bit_width() / 8;
let prev_start =
(prev_start - prev_start % 8) as usize * self.dictionary.bit_width() / 8;
let prev_end = prev_start + prev_buf.len() - 4;
let overlap_start = prev_start.max(start_byte);
let overlap_end = prev_end.min(end_byte);
if overlap_start == start_byte && overlap_start < overlap_end {
buf[..overlap_end - overlap_start].copy_from_slice(
&prev_buf[overlap_start - prev_start..overlap_end - prev_start]
&prev_buf[overlap_start - prev_start..overlap_end - prev_start],
);
buf_cursor = overlap_end - overlap_start;
}
Expand Down
18 changes: 10 additions & 8 deletions d4/src/stab/sparse_array/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@ pub(crate) fn assemble_incomplete_records<'a, R: Record>(
if !incomplete_data.is_empty() {
let bytes_needed = R::SIZE - incomplete_data.len();
incomplete_data.extend_from_slice(&extra[..bytes_needed]);
let record = *unsafe {
std::mem::transmute::<_, &R>(&incomplete_data[0])
};
let record = *unsafe { std::mem::transmute::<_, &R>(&incomplete_data[0]) };
buffer.push(RecordBlock::Record(record));
incomplete_data.clear();
return &extra[bytes_needed..];
Expand Down Expand Up @@ -176,11 +174,15 @@ mod mapped_io {
let metadata = self
.s_table_root
.open_stream(SECONDARY_TABLE_METADATA_NAME)?;
let metadata = String::from_utf8_lossy({
let mut buf = Vec::new();
metadata.copy_content(&mut buf);
buf
}.as_ref()).to_string();
let metadata = String::from_utf8_lossy(
{
let mut buf = Vec::new();
metadata.copy_content(&mut buf);
buf
}
.as_ref(),
)
.to_string();
let actual_data = metadata.trim_end_matches(|c| c == '\0');
serde_json::from_str(actual_data).ok()
}
Expand Down
2 changes: 1 addition & 1 deletion d4/src/stab/sparse_array/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ impl<R: Record> SecondaryTableWriter for SparseArrayWriter<R> {
let mut metadata_stream = self.0.create_stream(SECONDARY_TABLE_METADATA_NAME, 512)?;
metadata_stream.write_with_alloc_callback(
serde_json::to_string(&metadata).unwrap().as_bytes(),
|s| s.set_frame_size(65536)
|s| s.set_frame_size(65536),
)?;
let compression = self.1;
Ok(partitions
Expand Down
4 changes: 2 additions & 2 deletions d4/src/task/histogram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl TaskPartition<Once<i32>> for Partition {

#[inline(always)]
fn init(&mut self) {
self.histogram = Some(vec![0;self.range]);
self.histogram = Some(vec![0; self.range]);
}

#[inline(always)]
Expand Down Expand Up @@ -92,4 +92,4 @@ impl Task<std::iter::Once<i32>> for Histogram {
}
(below, histogram, above)
}
}
}
2 changes: 1 addition & 1 deletion d4/src/task/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ pub trait TaskPartition<RowType: Iterator<Item = i32> + ExactSizeIterator>: Send
type ResultType: Send + Clone;
/// The type for a single row
fn new(left: u32, right: u32, parent: &Self::ParentType) -> Self;
/// Initlize the task
/// Initlize the task
#[inline(always)]
fn init(&mut self) {}
/// Feed one value to the task
Expand Down
14 changes: 7 additions & 7 deletions d4tools/src/create/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ fn main_impl(matches: ArgMatches<'_>) -> Result<(), Box<dyn std::error::Error>>

let mut enable_compression = false;

if (!matches.is_present("dict_range") && !matches.is_present("dict-file"))
|| matches.is_present("dict-auto")
if (!matches.is_present("dict_range") && !matches.is_present("dict-file"))
|| matches.is_present("dict-auto")
{
match input_type {
InputType::Alignment => {
Expand All @@ -57,27 +57,27 @@ fn main_impl(matches: ArgMatches<'_>) -> Result<(), Box<dyn std::error::Error>>
matches.value_of("ref"),
min_mq,
)?);
},
}
InputType::BiwWig => {
let fp = std::fs::metadata(input_path)?;
let bw_file = d4_bigwig::BigWigFile::open(input_path)?;

let genome_size : u64 = bw_file.chroms().into_iter().map(|(_, sz)| sz as u64).sum();
let genome_size: u64 = bw_file.chroms().into_iter().map(|(_, sz)| sz as u64).sum();

let file_size = fp.len();

if file_size < genome_size / 8 {
d4_builder.set_dictionary(Dictionary::new_simple_range_dict(0, 1)?);
enable_compression = true;
}
},
}
InputType::BedGraph => {
let genomes = parse_genome_file(
matches
.value_of("genome")
.expect("Genome file is required for text file format"),
)?;
let genome_size : u64 = genomes.into_iter().map(|chr| chr.size as u64).sum();
let genome_size: u64 = genomes.into_iter().map(|chr| chr.size as u64).sum();

let fp = std::fs::metadata(input_path)?;
let file_size = fp.len();
Expand All @@ -86,7 +86,7 @@ fn main_impl(matches: ArgMatches<'_>) -> Result<(), Box<dyn std::error::Error>>
d4_builder.set_dictionary(Dictionary::new_simple_range_dict(0, 1)?);
enable_compression = true;
}
},
}
_ => {
panic!("Unsupported input type")
}
Expand Down
28 changes: 17 additions & 11 deletions d4tools/src/show/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use std::{
borrow::{Borrow, Cow},
collections::HashMap,
fs::File,
io::{Error, ErrorKind, Read, Result as IOResult, Seek, Write, BufReader, BufRead},
io::{BufRead, BufReader, Error, ErrorKind, Read, Result as IOResult, Seek, Write},
path::Path,
};
fn write_bed_record_fast<W: Write>(
Expand Down Expand Up @@ -210,16 +210,19 @@ fn show_impl<R: Read + Seek, I: Iterator<Item = String>>(
for path in path_buf.iter() {
let track_root = match file_root.open(path)? {
OpenResult::SubDir(track_root) => track_root,
_ => return Err(Error::new(
ErrorKind::Other,
format!("Unable to open track {}", path.to_string_lossy()),
).into()),
_ => {
return Err(Error::new(
ErrorKind::Other,
format!("Unable to open track {}", path.to_string_lossy()),
)
.into())
}
};

if print_header {
print!("\t{}",
path
.file_name()
print!(
"\t{}",
path.file_name()
.map(|x| x.to_string_lossy().to_string())
.unwrap_or_else(|| "<null>".to_string())
);
Expand All @@ -237,7 +240,8 @@ fn show_impl<R: Read + Seek, I: Iterator<Item = String>>(
return Err(Error::new(
ErrorKind::Other,
"Inconsistent reference genome".to_string(),
).into())
)
.into());
}

if show_genome {
Expand Down Expand Up @@ -288,7 +292,7 @@ pub fn entry_point(args: Vec<String>) -> Result<(), Box<dyn std::error::Error>>
continue;
}
let mut splitted = buf.trim().split('\t');
let (raw_chr, raw_beg, raw_end) = (splitted.next(),splitted.next(), splitted.next());
let (raw_chr, raw_beg, raw_end) = (splitted.next(), splitted.next(), splitted.next());
if raw_chr.is_some() && raw_beg.is_some() && raw_end.is_some() {
if let Ok(begin) = raw_beg.unwrap().parse::<u32>() {
if let Ok(end) = raw_end.unwrap().parse::<u32>() {
Expand All @@ -302,7 +306,9 @@ pub fn entry_point(args: Vec<String>) -> Result<(), Box<dyn std::error::Error>>
}
Some(region_list.into_iter())
} else {
matches.values_of("regions").map(|x| x.map(|y| y.to_owned()).collect::<Vec<_>>().into_iter())
matches
.values_of("regions")
.map(|x| x.map(|y| y.to_owned()).collect::<Vec<_>>().into_iter())
};

if input_filename.starts_with("http://") || input_filename.starts_with("https://") {
Expand Down
5 changes: 4 additions & 1 deletion d4tools/src/stat/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,10 @@ where
})
}

fn percentile_stat(matches: ArgMatches<'_>, percentile: f64) -> Result<(), Box<dyn std::error::Error>> {
fn percentile_stat(
matches: ArgMatches<'_>,
percentile: f64,
) -> Result<(), Box<dyn std::error::Error>> {
let histograms = run_task::<Histogram>(matches)?;
for OwnedOutput {
chrom: chr,
Expand Down
Loading

0 comments on commit 5ab3096

Please sign in to comment.