Skip to content

Commit

Permalink
Merge pull request #28 from MannLabs/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
sander-willems-bruker authored Aug 28, 2024
2 parents 21efdf9 + 4e108c1 commit 96716e5
Show file tree
Hide file tree
Showing 48 changed files with 2,040 additions and 1,253 deletions.
686 changes: 295 additions & 391 deletions Cargo.lock

Large diffs are not rendered by default.

18 changes: 11 additions & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "timsrust"
version = "0.3.0"
version = "0.4.0"
edition = "2021"
description = "A crate to read Bruker timsTOF data"
license = "Apache-2.0"
Expand All @@ -14,18 +14,22 @@ keywords = ["MS", "LC-TIMS-TOF", "PASEF"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
byteorder = "1.4.3"
zstd = "0.12.3"
rusqlite = { version = "0.29.0", features = ["bundled"] }
rayon = "1.5"
zstd = "0.13.2"
rayon = "1.10.0"
linreg = "0.2.0"
bytemuck = "1.13.1"
parquet = "42.0.0"
thiserror = "1.0.0"
memmap2 = "0.9.3"
rusqlite = { version = "0.31.0", features = ["bundled"], optional = true}
parquet = { version = "42.0.0", optional = true }

[features]
tdf = ["rusqlite"]
minitdf = ["parquet"]
default = ["tdf", "minitdf"]

[dev-dependencies]
criterion = { version = "0.4", features = ["html_reports"] }
criterion = { version = "0.5.1", features = ["html_reports"] }

[[bench]]
name = "speed_performance"
Expand Down
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

A crate to read Bruker TimsTof data.

## Stability

**NOTE**: TimsRust does not yet have a stable version! Use with caution.

## Installation

Add this crate to your `Cargo.toml`:
Expand Down Expand Up @@ -43,3 +47,14 @@ Two file formats are supported:
## Python bindings

The [timsrust_pyo3](https://github.com/jspaezp/timsrust_pyo3) package is an example of how the performance of TimsRust can be utilized in Python

## Planned changes for future versions
TODO
* Improve docs
* Improve tests
* Pase CompressionType1
* Make Path of TimsTOF data into special type
* Single access point for all readers?
* Few unchecked unwraps left
* Queryable data in all dimensions
* ...
76 changes: 48 additions & 28 deletions benches/speed_performance.rs
Original file line number Diff line number Diff line change
@@ -1,93 +1,113 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use timsrust::FileReader;
use rayon::iter::ParallelIterator;
#[cfg(feature = "tdf")]
use timsrust::readers::FrameReader;
use timsrust::readers::{SpectrumReader, SpectrumReaderConfig};

const DDA_TEST: &str =
"/mnt/c/Users/Sander.Willems/Documents/data/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/";
"/mnt/d/data/mpib/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/";
const DIA_TEST: &str =
"/mnt/c/Users/Sander.Willems/Documents/data/20230505_TIMS05_PaSk_SA_HeLa_6min_diaP_8scans_S1-D3_1_2329.d/";
const SYP_TEST: &str =
"/mnt/c/Users/Sander.Willems/Documents/data/20230505_TIMS05_PaSk_SA_HeLa_6min_syP_5scans_30Da_S1-D4_1_2330.d/";

fn read_all_frames(file_reader: &FileReader) {
file_reader.read_all_frames();
#[cfg(feature = "tdf")]
fn read_all_frames(frame_reader: &FrameReader) {
frame_reader.get_all();
}

fn read_all_ms1_frames(file_reader: &FileReader) {
file_reader.read_all_ms1_frames();
#[cfg(feature = "tdf")]
fn read_all_ms1_frames(frame_reader: &FrameReader) {
frame_reader.get_all_ms1();
}

fn read_all_ms2_frames(file_reader: &FileReader) {
file_reader.read_all_ms2_frames();
#[cfg(feature = "tdf")]
fn read_all_ms2_frames(frame_reader: &FrameReader) {
frame_reader.get_all_ms2();
}

fn read_all_spectra(file_reader: &FileReader) {
file_reader.read_all_spectra();
fn read_all_spectra(spectrum_reader: &SpectrumReader) {
spectrum_reader.get_all();
}

fn criterion_benchmark_dda(c: &mut Criterion) {
#[cfg(feature = "tdf")]
fn criterion_benchmark_dda_frames(c: &mut Criterion) {
// c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20))));
let mut group = c.benchmark_group("sample-size-example");
group.significance_level(0.001).sample_size(10);
let d_folder_name: &str = DDA_TEST;
let file_reader: FileReader =
FileReader::new(d_folder_name.to_string()).unwrap();
let frame_reader = FrameReader::new(d_folder_name).unwrap();
group.bench_function("DDA read_all_frames 6m", |b| {
b.iter(|| read_all_frames(black_box(&file_reader)))
b.iter(|| read_all_frames(black_box(&frame_reader)))
});
group.bench_function("DDA read_all_ms1_frames 6m", |b| {
b.iter(|| read_all_ms1_frames(black_box(&file_reader)))
b.iter(|| read_all_ms1_frames(black_box(&frame_reader)))
});
group.bench_function("DDA read_all_ms2_frames 6m", |b| {
b.iter(|| read_all_ms2_frames(black_box(&file_reader)))
b.iter(|| read_all_ms2_frames(black_box(&frame_reader)))
});
group.finish();
}

#[cfg(feature = "tdf")]
fn criterion_benchmark_dda_spectra(c: &mut Criterion) {
// c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20))));
let mut group = c.benchmark_group("sample-size-example");
group.significance_level(0.001).sample_size(10);
let d_folder_name: &str = DDA_TEST;
let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap();
group.bench_function("DDA read_all_spectra 6m", |b| {
b.iter(|| read_all_spectra(black_box(&file_reader)))
b.iter(|| read_all_spectra(black_box(&spectrum_reader)))
});
group.finish();
}

#[cfg(feature = "tdf")]
fn criterion_benchmark_dia(c: &mut Criterion) {
// c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20))));
let mut group = c.benchmark_group("sample-size-example");
group.significance_level(0.001).sample_size(10);
let d_folder_name: &str = DIA_TEST;
let file_reader: FileReader =
FileReader::new(d_folder_name.to_string()).unwrap();
let frame_reader = FrameReader::new(d_folder_name).unwrap();
let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap();
group.bench_function("DIA read_all_frames 6m", |b| {
b.iter(|| read_all_frames(black_box(&file_reader)))
b.iter(|| read_all_frames(black_box(&frame_reader)))
});
group.bench_function("DIA read_all_ms1_frames 6m", |b| {
b.iter(|| read_all_ms1_frames(black_box(&file_reader)))
b.iter(|| read_all_ms1_frames(black_box(&frame_reader)))
});
group.bench_function("DIA read_all_ms2_frames 6m", |b| {
b.iter(|| read_all_ms2_frames(black_box(&file_reader)))
b.iter(|| read_all_ms2_frames(black_box(&frame_reader)))
});
group.finish();
}

#[cfg(feature = "tdf")]
fn criterion_benchmark_syp(c: &mut Criterion) {
// c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20))));
let mut group = c.benchmark_group("sample-size-example");
group.significance_level(0.001).sample_size(10);
let d_folder_name: &str = SYP_TEST;
let file_reader: FileReader =
FileReader::new(d_folder_name.to_string()).unwrap();
let frame_reader = FrameReader::new(d_folder_name).unwrap();
let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap();
group.bench_function("SYP read_all_frames 6m", |b| {
b.iter(|| read_all_frames(black_box(&file_reader)))
b.iter(|| read_all_frames(black_box(&frame_reader)))
});
group.bench_function("SYP read_all_ms1_frames 6m", |b| {
b.iter(|| read_all_ms1_frames(black_box(&file_reader)))
b.iter(|| read_all_ms1_frames(black_box(&frame_reader)))
});
group.bench_function("SYP read_all_ms2_frames 6m", |b| {
b.iter(|| read_all_ms2_frames(black_box(&file_reader)))
b.iter(|| read_all_ms2_frames(black_box(&frame_reader)))
});
group.finish();
}

#[cfg(feature = "tdf")]
criterion_group!(
benches,
criterion_benchmark_dda,
criterion_benchmark_dda_spectra,
// criterion_benchmark_dia,
// criterion_benchmark_syp
);
#[cfg(feature = "tdf")]
criterion_main!(benches);
2 changes: 2 additions & 0 deletions src/domain_converters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ pub use tof_to_mz::Tof2MzConverter;
/// Convert from one domain (e.g. Time of Flight) to another (m/z).
pub trait ConvertableDomain {
fn convert<T: Into<f64> + Copy>(&self, value: T) -> f64;

fn invert<T: Into<f64> + Copy>(&self, value: T) -> f64;
}
18 changes: 17 additions & 1 deletion src/domain_converters/frame_to_rt.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/// A converter from Frame -> retention time.
#[derive(Debug, Clone)]
#[derive(Debug, Default, Clone, PartialEq)]
pub struct Frame2RtConverter {
rt_values: Vec<f64>,
}
Expand All @@ -16,4 +16,20 @@ impl super::ConvertableDomain for Frame2RtConverter {
let upper_value: f64 = self.rt_values[value.into().ceil() as usize];
(lower_value + upper_value) / 2.
}
fn invert<T: Into<f64> + Copy>(&self, value: T) -> f64 {
let rt_value = value.into();
match self.rt_values.binary_search_by(|probe| {
probe.partial_cmp(&rt_value).expect("Cannot handle NaNs")
}) {
Ok(index) => index as f64,
Err(index) => match index {
_ if (index > 0) && (index < self.rt_values.len()) => {
let start = self.rt_values[index - 1];
let end = self.rt_values[index];
index as f64 + (rt_value - start) / (end - start)
},
_ => index as f64,
},
}
}
}
11 changes: 8 additions & 3 deletions src/domain_converters/scan_to_im.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/// A converter from Scan -> (inversed) ion mobility.
#[derive(Debug, Clone)]
#[derive(Debug, Default, Clone, Copy, PartialEq)]
pub struct Scan2ImConverter {
scan_intercept: f64,
scan_slope: f64,
Expand All @@ -22,7 +22,12 @@ impl Scan2ImConverter {

impl super::ConvertableDomain for Scan2ImConverter {
fn convert<T: Into<f64> + Copy>(&self, value: T) -> f64 {
let scan_index_f64: f64 = value.into();
self.scan_intercept + self.scan_slope * scan_index_f64
let scan_index: f64 = value.into();
self.scan_intercept + self.scan_slope * scan_index
}

fn invert<T: Into<f64> + Copy>(&self, value: T) -> f64 {
let im_value: f64 = value.into();
(im_value - self.scan_intercept) / self.scan_slope
}
}
12 changes: 8 additions & 4 deletions src/domain_converters/tof_to_mz.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use linreg::linear_regression;

/// A converter from TOF -> m/z.
#[derive(Debug, Clone)]
#[derive(Debug, Default, Clone, Copy, PartialEq)]
pub struct Tof2MzConverter {
tof_intercept: f64,
tof_slope: f64,
Expand All @@ -22,7 +22,7 @@ impl Tof2MzConverter {
}
}

pub fn from_pairs(data: &Vec<(f64, u32)>) -> Self {
pub fn regress_from_pairs(data: &Vec<(f64, u32)>) -> Self {
let x: Vec<u32> = data.iter().map(|(_, x_val)| *x_val).collect();
let y: Vec<f64> =
data.iter().map(|(y_val, _)| (*y_val).sqrt()).collect();
Expand All @@ -36,7 +36,11 @@ impl Tof2MzConverter {

impl super::ConvertableDomain for Tof2MzConverter {
fn convert<T: Into<f64> + Copy>(&self, value: T) -> f64 {
let tof_index_f64: f64 = value.into();
(self.tof_intercept + self.tof_slope * tof_index_f64).powi(2)
let tof_index: f64 = value.into();
(self.tof_intercept + self.tof_slope * tof_index).powi(2)
}
fn invert<T: Into<f64> + Copy>(&self, value: T) -> f64 {
let mz_value: f64 = value.into();
(mz_value.sqrt() - self.tof_intercept) / self.tof_slope
}
}
29 changes: 18 additions & 11 deletions src/errors.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
use crate::{
file_readers,
// io::readers::common::{sql_reader::SqlError, tdf_blobs::TdfBlobError},
#[cfg(feature = "tdf")]
use crate::io::readers::{
FrameReaderError, MetadataReaderError, QuadrupoleSettingsReaderError,
};
use crate::io::readers::{PrecursorReaderError, SpectrumReaderError};

/// An error that is produced by timsrust (uses [thiserror]).
#[derive(thiserror::Error, Debug)]
pub enum Error {
/// An error to indicate a path is not a Bruker File Format.
#[error("FileFormatError: {0}")]
FileFormatError(#[from] file_readers::FileFormatError),
// #[error("SqlError: {0}")]
// SqlError(#[from] SqlError),
// #[error("BinError: {0}")]
// BinError(#[from] TdfBlobError),
pub enum TimsRustError {
#[cfg(feature = "tdf")]
#[error("{0}")]
FrameReaderError(#[from] FrameReaderError),
#[error("{0}")]
SpectrumReaderError(#[from] SpectrumReaderError),
#[cfg(feature = "tdf")]
#[error("{0}")]
MetadataReaderError(#[from] MetadataReaderError),
#[error("{0}")]
PrecursorReaderError(#[from] PrecursorReaderError),
#[cfg(feature = "tdf")]
#[error("{0}")]
QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError),
}
Loading

0 comments on commit 96716e5

Please sign in to comment.