From c9e31f915d9a713944061083c41ff0432d8174a5 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 5 Apr 2024 14:45:53 +0200 Subject: [PATCH 001/109] CHORE: version update to 0.2.5 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c6b75c8..aa7d4d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1227,7 +1227,7 @@ dependencies = [ [[package]] name = "timsrust" -version = "0.2.4" +version = "0.2.5" dependencies = [ "bytemuck", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index dc11952..a05990a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "timsrust" -version = "0.2.4" +version = "0.2.5" edition = "2021" description = "A crate to read Bruker timsTOF data" license = "Apache-2.0" From 0211d139fbdf097d30ff2bf30f29c4eb6109133e Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 8 Apr 2024 14:25:46 +0200 Subject: [PATCH 002/109] CHORE: refactored converters to be more explicit --- src/calibration.rs | 2 +- src/converters.rs | 95 ++----------------- src/converters/frame_to_rt.rs | 19 ++++ src/converters/scan_to_im.rs | 28 ++++++ src/converters/tof_to_mz.rs | 42 ++++++++ .../common/sql_reader/metadata.rs | 4 +- src/file_readers/frame_readers/tdf_reader.rs | 4 +- .../spectrum_readers/dda_reader.rs | 2 +- .../spectrum_readers/dda_reader/precursors.rs | 2 +- src/lib.rs | 2 +- src/spectra.rs | 2 +- 11 files changed, 107 insertions(+), 95 deletions(-) create mode 100644 src/converters/frame_to_rt.rs create mode 100644 src/converters/scan_to_im.rs create mode 100644 src/converters/tof_to_mz.rs diff --git a/src/calibration.rs b/src/calibration.rs index 7b89913..de0cb22 100644 --- a/src/calibration.rs +++ b/src/calibration.rs @@ -1,5 +1,5 @@ use crate::{ - converters::{ConvertableIndex, Tof2MzConverter}, + converters::{ConvertableDomain, Tof2MzConverter}, spectra::RawSpectrum, Precursor, }; diff --git a/src/converters.rs b/src/converters.rs index fab7f40..dd3652c 100644 --- a/src/converters.rs +++ b/src/converters.rs @@ -1,89 +1,12 @@ -use linreg::linear_regression; +mod frame_to_rt; +mod scan_to_im; +mod tof_to_mz; -/// Converting from an index domain (e.g. Time of Flight) to a continuous domain (m/z). -pub trait ConvertableIndex { - /// Convert any index (even fractional) to a continuous value. - fn convert + Copy>(&self, index: T) -> f64; -} - -/// A converter from TOF -> m/z. -#[derive(Debug, Copy, Clone)] -pub struct Tof2MzConverter { - tof_intercept: f64, - tof_slope: f64, -} - -impl Tof2MzConverter { - pub fn new(mz_min: f64, mz_max: f64, tof_max_index: u32) -> Self { - let tof_intercept: f64 = mz_min.sqrt(); - let tof_slope: f64 = - (mz_max.sqrt() - tof_intercept) / tof_max_index as f64; - Self { - tof_intercept, - tof_slope, - } - } - - pub fn from_unfragmented_precursors(data: &Vec<(f64, u32)>) -> Self { - let x: Vec = data.iter().map(|(_, x_val)| *x_val).collect(); - let y: Vec = - data.iter().map(|(y_val, _)| (*y_val).sqrt()).collect(); - let (tof_slope, tof_intercept) = linear_regression(&x, &y).unwrap(); - Self { - tof_intercept, - tof_slope, - } - } -} - -impl ConvertableIndex for Tof2MzConverter { - fn convert + Copy>(&self, index: T) -> f64 { - let tof_index_f64: f64 = index.into(); - (self.tof_intercept + self.tof_slope * tof_index_f64).powi(2) - } -} - -/// A converter from Scan -> ion mobility. -#[derive(Debug, Copy, Clone)] -pub struct Scan2ImConverter { - scan_intercept: f64, - scan_slope: f64, -} - -impl Scan2ImConverter { - pub fn new(im_min: f64, im_max: f64, scan_max_index: u32) -> Self { - let scan_intercept: f64 = im_max; - let scan_slope: f64 = (im_min - scan_intercept) / scan_max_index as f64; - Self { - scan_intercept, - scan_slope, - } - } -} - -impl ConvertableIndex for Scan2ImConverter { - fn convert + Copy>(&self, index: T) -> f64 { - let scan_index_f64: f64 = index.into(); - self.scan_intercept + self.scan_slope * scan_index_f64 - } -} - -/// A converter from Frame -> retention time. -#[derive(Debug, Clone)] -pub struct Frame2RtConverter { - rt_values: Vec, -} - -impl Frame2RtConverter { - pub fn new(rt_values: Vec) -> Self { - Self { rt_values } - } -} +pub use frame_to_rt::Frame2RtConverter; +pub use scan_to_im::Scan2ImConverter; +pub use tof_to_mz::Tof2MzConverter; -impl ConvertableIndex for Frame2RtConverter { - fn convert + Copy>(&self, index: T) -> f64 { - let lower_value: f64 = self.rt_values[index.into().floor() as usize]; - let upper_value: f64 = self.rt_values[index.into().ceil() as usize]; - (lower_value + upper_value) / 2. - } +/// Convert from one domain (e.g. Time of Flight) to a another (m/z). +pub trait ConvertableDomain { + fn convert + Copy>(&self, value: T) -> f64; } diff --git a/src/converters/frame_to_rt.rs b/src/converters/frame_to_rt.rs new file mode 100644 index 0000000..5c83388 --- /dev/null +++ b/src/converters/frame_to_rt.rs @@ -0,0 +1,19 @@ +/// A converter from Frame -> retention time. +#[derive(Debug, Clone)] +pub struct Frame2RtConverter { + rt_values: Vec, +} + +impl Frame2RtConverter { + pub fn from_values(rt_values: Vec) -> Self { + Self { rt_values } + } +} + +impl super::ConvertableDomain for Frame2RtConverter { + fn convert + Copy>(&self, value: T) -> f64 { + let lower_value: f64 = self.rt_values[value.into().floor() as usize]; + let upper_value: f64 = self.rt_values[value.into().ceil() as usize]; + (lower_value + upper_value) / 2. + } +} diff --git a/src/converters/scan_to_im.rs b/src/converters/scan_to_im.rs new file mode 100644 index 0000000..5968a79 --- /dev/null +++ b/src/converters/scan_to_im.rs @@ -0,0 +1,28 @@ +/// A converter from Scan -> (inversed) ion mobility. +#[derive(Debug, Copy, Clone)] +pub struct Scan2ImConverter { + scan_intercept: f64, + scan_slope: f64, +} + +impl Scan2ImConverter { + pub fn from_boundaries( + im_min: f64, + im_max: f64, + scan_max_index: u32, + ) -> Self { + let scan_intercept: f64 = im_max; + let scan_slope: f64 = (im_min - scan_intercept) / scan_max_index as f64; + Self { + scan_intercept, + scan_slope, + } + } +} + +impl super::ConvertableDomain for Scan2ImConverter { + fn convert + Copy>(&self, value: T) -> f64 { + let scan_index_f64: f64 = value.into(); + self.scan_intercept + self.scan_slope * scan_index_f64 + } +} diff --git a/src/converters/tof_to_mz.rs b/src/converters/tof_to_mz.rs new file mode 100644 index 0000000..3613914 --- /dev/null +++ b/src/converters/tof_to_mz.rs @@ -0,0 +1,42 @@ +use linreg::linear_regression; + +/// A converter from TOF -> m/z. +#[derive(Debug, Copy, Clone)] +pub struct Tof2MzConverter { + tof_intercept: f64, + tof_slope: f64, +} + +impl Tof2MzConverter { + pub fn from_boundaries( + mz_min: f64, + mz_max: f64, + tof_max_index: u32, + ) -> Self { + let tof_intercept: f64 = mz_min.sqrt(); + let tof_slope: f64 = + (mz_max.sqrt() - tof_intercept) / tof_max_index as f64; + Self { + tof_intercept, + tof_slope, + } + } + + pub fn from_pairs(data: &Vec<(f64, u32)>) -> Self { + let x: Vec = data.iter().map(|(_, x_val)| *x_val).collect(); + let y: Vec = + data.iter().map(|(y_val, _)| (*y_val).sqrt()).collect(); + let (tof_slope, tof_intercept) = linear_regression(&x, &y).unwrap(); + Self { + tof_intercept, + tof_slope, + } + } +} + +impl super::ConvertableDomain for Tof2MzConverter { + fn convert + Copy>(&self, value: T) -> f64 { + let tof_index_f64: f64 = value.into(); + (self.tof_intercept + self.tof_slope * tof_index_f64).powi(2) + } +} diff --git a/src/file_readers/common/sql_reader/metadata.rs b/src/file_readers/common/sql_reader/metadata.rs index 6e1e29b..5a00201 100644 --- a/src/file_readers/common/sql_reader/metadata.rs +++ b/src/file_readers/common/sql_reader/metadata.rs @@ -77,13 +77,13 @@ impl SqlReader { impl ReadableFromSql for Tof2MzConverter { fn from_sql(sql_reader: &SqlReader) -> Self { let (tof_max_index, mz_min, mz_max) = sql_reader.read_mz_information(); - Tof2MzConverter::new(mz_min, mz_max, tof_max_index) + Tof2MzConverter::from_boundaries(mz_min, mz_max, tof_max_index) } } impl ReadableFromSql for Scan2ImConverter { fn from_sql(sql_reader: &SqlReader) -> Self { let (scan_max_index, im_min, im_max) = sql_reader.read_im_information(); - Scan2ImConverter::new(im_min, im_max, scan_max_index) + Scan2ImConverter::from_boundaries(im_min, im_max, scan_max_index) } } diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index 1c7a264..3fae8ae 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -2,7 +2,7 @@ use { crate::{ acquisition::AcquisitionType, converters::{ - ConvertableIndex, Frame2RtConverter, Scan2ImConverter, + ConvertableDomain, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, }, file_readers::{ @@ -68,7 +68,7 @@ impl TDFReader { fn get_rt_converter(frame_table: &FrameTable) -> Frame2RtConverter { let retention_times: Vec = frame_table.rt.clone(); - Frame2RtConverter::new(retention_times) + Frame2RtConverter::from_values(retention_times) } } diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index e0d545f..cca3527 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -120,7 +120,7 @@ impl ReadableSpectra for DDASpectrumReader { ); let mz_reader: Tof2MzConverter; if hits.len() >= 2 { - mz_reader = Tof2MzConverter::from_unfragmented_precursors(&hits); + mz_reader = Tof2MzConverter::from_pairs(&hits); } else { mz_reader = self.mz_reader } diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index 482ede4..ccdc489 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -1,7 +1,7 @@ use rayon::prelude::*; use crate::{ - converters::{ConvertableIndex, Scan2ImConverter}, + converters::{ConvertableDomain, Scan2ImConverter}, file_readers::{ common::sql_reader::{ PasefFrameMsMsTable, PrecursorTable, ReadableFromSql, diff --git a/src/lib.rs b/src/lib.rs index c3645c1..e268b52 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,7 +34,7 @@ mod vec_utils; pub use crate::{ acquisition::AcquisitionType, converters::{ - ConvertableIndex, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, + ConvertableDomain, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, }, errors::*, file_readers::FileReader, diff --git a/src/spectra.rs b/src/spectra.rs index 9f9eadf..cacdefc 100644 --- a/src/spectra.rs +++ b/src/spectra.rs @@ -1,5 +1,5 @@ use crate::{ - converters::{ConvertableIndex, Tof2MzConverter}, + converters::{ConvertableDomain, Tof2MzConverter}, precursors::QuadrupoleEvent, vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, Precursor, From 8c39a9e1ec91d8732a2d1f4e0adcfa05d2bbac95 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 8 Apr 2024 14:37:15 +0200 Subject: [PATCH 003/109] CHORE: grouped ms data together --- src/calibration.rs | 2 +- src/data.rs | 9 +++++++++ src/{ => data}/acquisition.rs | 1 + src/{ => data}/frames.rs | 2 +- src/{ => data}/precursors.rs | 0 src/{ => data}/spectra.rs | 3 +-- src/file_readers/frame_readers/tdf_reader.rs | 3 +-- src/file_readers/spectrum_readers/dda_reader.rs | 7 +++---- src/file_readers/spectrum_readers/mini_tdf_reader.rs | 3 +-- src/lib.rs | 12 ++++-------- 10 files changed, 22 insertions(+), 20 deletions(-) create mode 100644 src/data.rs rename src/{ => data}/acquisition.rs (91%) rename src/{ => data}/frames.rs (92%) rename src/{ => data}/precursors.rs (100%) rename src/{ => data}/spectra.rs (98%) diff --git a/src/calibration.rs b/src/calibration.rs index de0cb22..12f9c91 100644 --- a/src/calibration.rs +++ b/src/calibration.rs @@ -1,6 +1,6 @@ use crate::{ converters::{ConvertableDomain, Tof2MzConverter}, - spectra::RawSpectrum, + data::spectra::RawSpectrum, Precursor, }; diff --git a/src/data.rs b/src/data.rs new file mode 100644 index 0000000..d0560d0 --- /dev/null +++ b/src/data.rs @@ -0,0 +1,9 @@ +pub mod acquisition; +pub mod frames; +pub mod precursors; +pub mod spectra; + +pub use acquisition::AcquisitionType; +pub use frames::{Frame, FrameType}; +pub use precursors::{Precursor, QuadrupoleEvent}; +pub use spectra::Spectrum; diff --git a/src/acquisition.rs b/src/data/acquisition.rs similarity index 91% rename from src/acquisition.rs rename to src/data/acquisition.rs index 18f0500..2826e09 100644 --- a/src/acquisition.rs +++ b/src/data/acquisition.rs @@ -3,5 +3,6 @@ pub enum AcquisitionType { DDAPASEF, DIAPASEF, + PRMPASEF, Unknown, } diff --git a/src/frames.rs b/src/data/frames.rs similarity index 92% rename from src/frames.rs rename to src/data/frames.rs index ad7df8d..5f4455d 100644 --- a/src/frames.rs +++ b/src/data/frames.rs @@ -1,4 +1,4 @@ -use crate::acquisition::AcquisitionType; +use crate::AcquisitionType; /// A frame with all unprocessed data as it was acquired. #[derive(Debug, PartialEq, Default)] diff --git a/src/precursors.rs b/src/data/precursors.rs similarity index 100% rename from src/precursors.rs rename to src/data/precursors.rs diff --git a/src/spectra.rs b/src/data/spectra.rs similarity index 98% rename from src/spectra.rs rename to src/data/spectra.rs index cacdefc..a32f84b 100644 --- a/src/spectra.rs +++ b/src/data/spectra.rs @@ -1,8 +1,7 @@ use crate::{ converters::{ConvertableDomain, Tof2MzConverter}, - precursors::QuadrupoleEvent, vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, - Precursor, + Precursor, QuadrupoleEvent, }; pub struct RawSpectrumProcessor { diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index 3fae8ae..f8744f6 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -1,6 +1,5 @@ use { crate::{ - acquisition::AcquisitionType, converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, @@ -12,7 +11,7 @@ use { }, ReadableFrames, }, - Frame, FrameType, + AcquisitionType, Frame, FrameType, }, rayon::prelude::*, std::path::Path, diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index cca3527..51a874d 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -3,15 +3,14 @@ mod precursors; use crate::{ calibration::Tof2MzCalibrator, converters::Tof2MzConverter, + data::spectra::RawSpectrum, + data::spectra::{self, RawSpectrumProcessor}, file_readers::{ frame_readers::{tdf_reader::TDFReader, ReadableFrames}, ReadableSpectra, }, - frames::Frame, - spectra::RawSpectrum, - spectra::{self, RawSpectrumProcessor}, vec_utils::group_and_sum, - Spectrum, + Frame, Spectrum, }; use rayon::prelude::*; diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index 8ba343d..4eb7686 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -1,4 +1,4 @@ -use crate::file_readers::FileFormatError; +use crate::{file_readers::FileFormatError, QuadrupoleEvent}; use std::fs; use { crate::{ @@ -9,7 +9,6 @@ use { }, ReadableSpectra, }, - precursors::QuadrupoleEvent, Precursor, Spectrum, }, rayon::prelude::*, diff --git a/src/lib.rs b/src/lib.rs index e268b52..463a69b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,24 +21,20 @@ //! * *.ms2spectrum.bin //! * *.ms2spectrum.parquet -mod acquisition; mod calibration; mod converters; +mod data; mod errors; mod file_readers; -mod frames; -mod precursors; -mod spectra; mod vec_utils; pub use crate::{ - acquisition::AcquisitionType, converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, }, + data::{ + AcquisitionType, Frame, FrameType, Precursor, QuadrupoleEvent, Spectrum, + }, errors::*, file_readers::FileReader, - frames::{Frame, FrameType}, - precursors::{Precursor, QuadrupoleEvent}, - spectra::Spectrum, }; From 6d648b4c2793efd01a9a0f8bdcd58846efe5d936 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 8 Apr 2024 14:42:01 +0200 Subject: [PATCH 004/109] CHORE: renamed modules --- src/calibration.rs | 5 ++--- src/{converters.rs => domain_converters.rs} | 0 src/{converters => domain_converters}/frame_to_rt.rs | 0 src/{converters => domain_converters}/scan_to_im.rs | 0 src/{converters => domain_converters}/tof_to_mz.rs | 0 src/file_readers.rs | 3 +-- src/file_readers/common/sql_reader/metadata.rs | 2 +- src/file_readers/frame_readers/tdf_reader.rs | 8 ++++---- src/file_readers/spectrum_readers/dda_reader.rs | 7 +++---- .../spectrum_readers/dda_reader/precursors.rs | 3 +-- src/lib.rs | 12 ++++++------ src/{data.rs => ms_data.rs} | 0 src/{data => ms_data}/acquisition.rs | 0 src/{data => ms_data}/frames.rs | 0 src/{data => ms_data}/precursors.rs | 0 src/{data => ms_data}/spectra.rs | 3 +-- 16 files changed, 19 insertions(+), 24 deletions(-) rename src/{converters.rs => domain_converters.rs} (100%) rename src/{converters => domain_converters}/frame_to_rt.rs (100%) rename src/{converters => domain_converters}/scan_to_im.rs (100%) rename src/{converters => domain_converters}/tof_to_mz.rs (100%) rename src/{data.rs => ms_data.rs} (100%) rename src/{data => ms_data}/acquisition.rs (100%) rename src/{data => ms_data}/frames.rs (100%) rename src/{data => ms_data}/precursors.rs (100%) rename src/{data => ms_data}/spectra.rs (97%) diff --git a/src/calibration.rs b/src/calibration.rs index 12f9c91..289f09c 100644 --- a/src/calibration.rs +++ b/src/calibration.rs @@ -1,7 +1,6 @@ use crate::{ - converters::{ConvertableDomain, Tof2MzConverter}, - data::spectra::RawSpectrum, - Precursor, + ms_data::spectra::RawSpectrum, + Precursor, {ConvertableDomain, Tof2MzConverter}, }; pub struct Tof2MzCalibrator; diff --git a/src/converters.rs b/src/domain_converters.rs similarity index 100% rename from src/converters.rs rename to src/domain_converters.rs diff --git a/src/converters/frame_to_rt.rs b/src/domain_converters/frame_to_rt.rs similarity index 100% rename from src/converters/frame_to_rt.rs rename to src/domain_converters/frame_to_rt.rs diff --git a/src/converters/scan_to_im.rs b/src/domain_converters/scan_to_im.rs similarity index 100% rename from src/converters/scan_to_im.rs rename to src/domain_converters/scan_to_im.rs diff --git a/src/converters/tof_to_mz.rs b/src/domain_converters/tof_to_mz.rs similarity index 100% rename from src/converters/tof_to_mz.rs rename to src/domain_converters/tof_to_mz.rs diff --git a/src/file_readers.rs b/src/file_readers.rs index 56eb5cd..8ec606d 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -1,6 +1,5 @@ use crate::{ - converters::{Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}, - Error, + Error, {Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}, }; mod common; diff --git a/src/file_readers/common/sql_reader/metadata.rs b/src/file_readers/common/sql_reader/metadata.rs index 5a00201..8163d0c 100644 --- a/src/file_readers/common/sql_reader/metadata.rs +++ b/src/file_readers/common/sql_reader/metadata.rs @@ -1,6 +1,6 @@ use rusqlite::{Connection, Statement}; -use crate::converters::{Scan2ImConverter, Tof2MzConverter}; +use crate::{Scan2ImConverter, Tof2MzConverter}; use super::{get_sql_connection, ReadableFromSql, SqlReader}; diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index f8744f6..7dcb836 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -1,9 +1,5 @@ use { crate::{ - converters::{ - ConvertableDomain, Frame2RtConverter, Scan2ImConverter, - Tof2MzConverter, - }, file_readers::{ common::{ ms_data_blobs::{BinFileReader, ReadableFromBinFile}, @@ -12,6 +8,10 @@ use { ReadableFrames, }, AcquisitionType, Frame, FrameType, + { + ConvertableDomain, Frame2RtConverter, Scan2ImConverter, + Tof2MzConverter, + }, }, rayon::prelude::*, std::path::Path, diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index 51a874d..85f9451 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -2,15 +2,14 @@ mod precursors; use crate::{ calibration::Tof2MzCalibrator, - converters::Tof2MzConverter, - data::spectra::RawSpectrum, - data::spectra::{self, RawSpectrumProcessor}, file_readers::{ frame_readers::{tdf_reader::TDFReader, ReadableFrames}, ReadableSpectra, }, + ms_data::spectra::RawSpectrum, + ms_data::spectra::{self, RawSpectrumProcessor}, vec_utils::group_and_sum, - Frame, Spectrum, + Frame, Spectrum, Tof2MzConverter, }; use rayon::prelude::*; diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index ccdc489..321dca1 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -1,7 +1,6 @@ use rayon::prelude::*; use crate::{ - converters::{ConvertableDomain, Scan2ImConverter}, file_readers::{ common::sql_reader::{ PasefFrameMsMsTable, PrecursorTable, ReadableFromSql, @@ -9,7 +8,7 @@ use crate::{ frame_readers::tdf_reader::TDFReader, }, vec_utils::argsort, - Precursor, + Precursor, {ConvertableDomain, Scan2ImConverter}, }; #[derive(Debug)] diff --git a/src/lib.rs b/src/lib.rs index 463a69b..2c2e18c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,19 +22,19 @@ //! * *.ms2spectrum.parquet mod calibration; -mod converters; -mod data; +mod domain_converters; mod errors; mod file_readers; +mod ms_data; mod vec_utils; pub use crate::{ - converters::{ + domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, }, - data::{ - AcquisitionType, Frame, FrameType, Precursor, QuadrupoleEvent, Spectrum, - }, errors::*, file_readers::FileReader, + ms_data::{ + AcquisitionType, Frame, FrameType, Precursor, QuadrupoleEvent, Spectrum, + }, }; diff --git a/src/data.rs b/src/ms_data.rs similarity index 100% rename from src/data.rs rename to src/ms_data.rs diff --git a/src/data/acquisition.rs b/src/ms_data/acquisition.rs similarity index 100% rename from src/data/acquisition.rs rename to src/ms_data/acquisition.rs diff --git a/src/data/frames.rs b/src/ms_data/frames.rs similarity index 100% rename from src/data/frames.rs rename to src/ms_data/frames.rs diff --git a/src/data/precursors.rs b/src/ms_data/precursors.rs similarity index 100% rename from src/data/precursors.rs rename to src/ms_data/precursors.rs diff --git a/src/data/spectra.rs b/src/ms_data/spectra.rs similarity index 97% rename from src/data/spectra.rs rename to src/ms_data/spectra.rs index a32f84b..2d42800 100644 --- a/src/data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -1,7 +1,6 @@ use crate::{ - converters::{ConvertableDomain, Tof2MzConverter}, vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, - Precursor, QuadrupoleEvent, + ConvertableDomain, Precursor, QuadrupoleEvent, Tof2MzConverter, }; pub struct RawSpectrumProcessor { From 0a99fe57cef625b19333526f9179b1d9562bdbb0 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 8 Apr 2024 14:44:38 +0200 Subject: [PATCH 005/109] CHORE: renamed vec_utils --- src/file_readers/spectrum_readers/dda_reader.rs | 5 ++--- src/file_readers/spectrum_readers/dda_reader/precursors.rs | 4 ++-- src/lib.rs | 2 +- src/ms_data/spectra.rs | 2 +- src/utils.rs | 1 + src/{ => utils}/vec_utils.rs | 0 6 files changed, 7 insertions(+), 7 deletions(-) create mode 100644 src/utils.rs rename src/{ => utils}/vec_utils.rs (100%) diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index 85f9451..2544c17 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -6,9 +6,8 @@ use crate::{ frame_readers::{tdf_reader::TDFReader, ReadableFrames}, ReadableSpectra, }, - ms_data::spectra::RawSpectrum, - ms_data::spectra::{self, RawSpectrumProcessor}, - vec_utils::group_and_sum, + ms_data::spectra::{self, RawSpectrum, RawSpectrumProcessor}, + utils::vec_utils::group_and_sum, Frame, Spectrum, Tof2MzConverter, }; diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index 321dca1..8adfe23 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -7,8 +7,8 @@ use crate::{ }, frame_readers::tdf_reader::TDFReader, }, - vec_utils::argsort, - Precursor, {ConvertableDomain, Scan2ImConverter}, + utils::vec_utils::argsort, + ConvertableDomain, Precursor, Scan2ImConverter, }; #[derive(Debug)] diff --git a/src/lib.rs b/src/lib.rs index 2c2e18c..0a61c98 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,7 +26,7 @@ mod domain_converters; mod errors; mod file_readers; mod ms_data; -mod vec_utils; +mod utils; pub use crate::{ domain_converters::{ diff --git a/src/ms_data/spectra.rs b/src/ms_data/spectra.rs index 2d42800..4b81253 100644 --- a/src/ms_data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -1,5 +1,5 @@ use crate::{ - vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, + utils::vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, ConvertableDomain, Precursor, QuadrupoleEvent, Tof2MzConverter, }; diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..9aebe98 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1 @@ +pub mod vec_utils; diff --git a/src/vec_utils.rs b/src/utils/vec_utils.rs similarity index 100% rename from src/vec_utils.rs rename to src/utils/vec_utils.rs From 7e4ba616b3ad9fbf87f47a91b025d1b144e1d9ae Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 8 Apr 2024 17:07:11 +0200 Subject: [PATCH 006/109] FEAT: crude mgf writer implementation --- src/file_writers.rs | 1 + src/file_writers/mgf_writer.rs | 62 ++++++++++++++++++++++++++++++++++ src/lib.rs | 2 ++ src/main.rs | 34 +++++++++++-------- 4 files changed, 84 insertions(+), 15 deletions(-) create mode 100644 src/file_writers.rs create mode 100644 src/file_writers/mgf_writer.rs diff --git a/src/file_writers.rs b/src/file_writers.rs new file mode 100644 index 0000000..e686fe2 --- /dev/null +++ b/src/file_writers.rs @@ -0,0 +1 @@ +pub mod mgf_writer; diff --git a/src/file_writers/mgf_writer.rs b/src/file_writers/mgf_writer.rs new file mode 100644 index 0000000..a44d4ce --- /dev/null +++ b/src/file_writers/mgf_writer.rs @@ -0,0 +1,62 @@ +use std::fs::File; +use std::io::Write; +use std::path::Path; + +use crate::Spectrum; + +pub struct MGFWriter {} + +impl MGFWriter { + pub fn write_spectra(input_file_path: &str, spectra: &Vec) { + let output_file_path = { + let input_path = Path::new(&input_file_path); + let file_stem = + Path::new(&input_file_path).file_stem().unwrap_or_default(); + let new_file_name = format!("{}.mgf", file_stem.to_string_lossy()); + input_path.with_file_name(new_file_name) + }; + let mut file = + File::create(output_file_path).expect("Failed to create file"); + for spectrum in spectra { + _ = file.write_all("BEGIN IONS\n".as_bytes()); + _ = file.write_all(spectrum.as_mgf_header().as_bytes()); + _ = file.write_all(spectrum.as_mgf_peaks().as_bytes()); + _ = file.write_all("END IONS\n".as_bytes()); + } + file.flush().expect("Failed to flush to file"); + } +} + +pub trait MGFFormat { + fn as_mgf_entry(&self) -> String; + + fn as_mgf_header(&self) -> String; + + fn as_mgf_peaks(&self) -> String; +} + +impl MGFFormat for Spectrum { + fn as_mgf_entry(&self) -> String { + format!("{}{}", self.as_mgf_header(), self.as_mgf_peaks()) + } + + fn as_mgf_header(&self) -> String { + let precursor = self.precursor.unwrap_as_precursor(); + let title = precursor.index; + let ms2_data = format!( + "TITLE=index:{}, im:{:.4}, intensity:{:.4}, frame:{}, ce:{:.4}\nPEPMASS={:.4}\nCHARGE={}\nRT={:.2}\n", + title, precursor.im, precursor.intensity, precursor.frame_index, precursor.collision_energy, precursor.mz, precursor.charge, precursor.rt + ); + ms2_data + } + + fn as_mgf_peaks(&self) -> String { + let mut ms2_data: String = String::new(); + for (mz, intensity) in + self.mz_values.iter().zip(self.intensities.iter()) + { + ms2_data.push_str(&format!("{:.4}\t{:.0}\n", mz, intensity)); + } + ms2_data + } +} diff --git a/src/lib.rs b/src/lib.rs index 0a61c98..1ab0994 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,6 +25,7 @@ mod calibration; mod domain_converters; mod errors; mod file_readers; +mod file_writers; mod ms_data; mod utils; @@ -34,6 +35,7 @@ pub use crate::{ }, errors::*, file_readers::FileReader, + file_writers::mgf_writer::{MGFFormat, MGFWriter}, ms_data::{ AcquisitionType, Frame, FrameType, Precursor, QuadrupoleEvent, Spectrum, }, diff --git a/src/main.rs b/src/main.rs index 4efdbe8..235bd96 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,27 +1,31 @@ use std::env; -use timsrust::{FileReader, Spectrum}; +use timsrust::{FileReader, MGFFormat, MGFWriter, Spectrum}; -fn main() { +fn quick_test() { let args: Vec = env::args().collect(); let d_folder_name: &str = &args[1]; let x = FileReader::new(d_folder_name.to_string()).unwrap(); let dda_spectra: Vec = x.read_all_spectra(); - let precursor_index: usize; + let spectrum_index: usize; if args.len() >= 3 { - precursor_index = args[2].parse().unwrap_or(0); + spectrum_index = args[2].parse().unwrap_or(0); } else { - precursor_index = 1000; + spectrum_index = 10; } - - println!("precursor {:?}", dda_spectra[precursor_index].precursor); - println!( - "precursor {:?}", - dda_spectra[precursor_index].mz_values.len() - ); + println!("precursor {:?}", dda_spectra[spectrum_index].precursor); + // println!( + // "precursor\n{:?}", + // dda_spectra[spectrum_index].as_mgf_header() + // ); + println!("mz values {:?}", dda_spectra[spectrum_index].mz_values); println!( - "precursor {:?}", - dda_spectra[precursor_index].intensities.len() + "intensity values {:?}", + dda_spectra[spectrum_index].intensities ); - println!("precursor {:?}", dda_spectra[precursor_index].mz_values); - println!("precursor {:?}", dda_spectra[precursor_index].intensities); + // println!("{:?}", dda_spectra[spectrum_index].as_mgf_entry()); + MGFWriter::write_spectra(d_folder_name, &dda_spectra); +} + +fn main() { + quick_test(); } From 56a06a66f40dd46fa1cbf3457fd855f8a8122864 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 9 Apr 2024 09:52:49 +0200 Subject: [PATCH 007/109] CHORE: unified converter setup --- src/domain_converters/scan_to_im.rs | 2 +- src/domain_converters/tof_to_mz.rs | 2 +- src/file_readers/spectrum_readers/dda_reader.rs | 10 ++++++---- .../spectrum_readers/dda_reader/precursors.rs | 3 +-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/domain_converters/scan_to_im.rs b/src/domain_converters/scan_to_im.rs index 5968a79..68339f4 100644 --- a/src/domain_converters/scan_to_im.rs +++ b/src/domain_converters/scan_to_im.rs @@ -1,5 +1,5 @@ /// A converter from Scan -> (inversed) ion mobility. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Clone)] pub struct Scan2ImConverter { scan_intercept: f64, scan_slope: f64, diff --git a/src/domain_converters/tof_to_mz.rs b/src/domain_converters/tof_to_mz.rs index 3613914..e23ac18 100644 --- a/src/domain_converters/tof_to_mz.rs +++ b/src/domain_converters/tof_to_mz.rs @@ -1,7 +1,7 @@ use linreg::linear_regression; /// A converter from TOF -> m/z. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Clone)] pub struct Tof2MzConverter { tof_intercept: f64, tof_slope: f64, diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index 2544c17..8b3ede0 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -29,7 +29,7 @@ pub struct DDASpectrumReader { impl DDASpectrumReader { pub fn new(path_name: String) -> Self { let tdf_reader: TDFReader = TDFReader::new(&path_name.to_string()); - let mz_reader: Tof2MzConverter = tdf_reader.mz_converter; + let mz_reader: Tof2MzConverter = tdf_reader.mz_converter.clone(); let ms2_frames: Vec = tdf_reader.read_all_ms2_frames(); let precursor_reader: PrecursorReader = PrecursorReader::new(&tdf_reader); @@ -115,11 +115,13 @@ impl ReadableSpectra for DDASpectrumReader { &self.precursor_reader.precursors, 0.1, ); - let mz_reader: Tof2MzConverter; + let temp_mz_reader: Tof2MzConverter; + let mz_reader: &Tof2MzConverter; if hits.len() >= 2 { - mz_reader = Tof2MzConverter::from_pairs(&hits); + temp_mz_reader = Tof2MzConverter::from_pairs(&hits); + mz_reader = &temp_mz_reader; } else { - mz_reader = self.mz_reader + mz_reader = &self.mz_reader } let spectra: Vec = raw_spectra .into_par_iter() diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index 8adfe23..d442407 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -27,7 +27,6 @@ impl PrecursorReader { ); let pasef_frames: PasefFrameMsMsTable = PasefFrameMsMsTable::from_sql(&tdf_reader.tdf_sql_reader); - let im_reader: Scan2ImConverter = tdf_reader.im_converter; let precursor_table: PrecursorTable = PrecursorTable::from_sql(&tdf_reader.tdf_sql_reader); let retention_times: Vec = tdf_reader.frame_table.rt.clone(); @@ -42,7 +41,7 @@ impl PrecursorReader { Precursor { mz: precursor_table.mz[index], rt: retention_times[frame_id], - im: im_reader.convert(scan_id), + im: tdf_reader.im_converter.convert(scan_id), charge: precursor_table.charge[index], intensity: precursor_table.intensity[index], index: index + 1, //TODO? From 6df48f378ffe0111938a64449192ce11224475a8 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 9 Apr 2024 09:53:24 +0200 Subject: [PATCH 008/109] CHORE: version update bigger than patch --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aa7d4d9..1795706 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1227,7 +1227,7 @@ dependencies = [ [[package]] name = "timsrust" -version = "0.2.5" +version = "0.3.0" dependencies = [ "bytemuck", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index a05990a..932a754 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "timsrust" -version = "0.2.5" +version = "0.3.0" edition = "2021" description = "A crate to read Bruker timsTOF data" license = "Apache-2.0" From c421d0e2ff2785865a771318fbd02ffea417cbdb Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 11 Apr 2024 12:38:18 +0200 Subject: [PATCH 009/109] CHORE: cleaned up precursor and frame structs in favor of simplicity --- src/file_readers/frame_readers/tdf_reader.rs | 22 ++++++++++------ .../spectrum_readers/dda_reader/precursors.rs | 2 +- .../spectrum_readers/mini_tdf_reader.rs | 8 +++--- src/file_writers/mgf_writer.rs | 2 +- src/lib.rs | 4 +-- src/main.rs | 1 + src/ms_data.rs | 2 +- src/ms_data/acquisition.rs | 7 +++++ src/ms_data/frames.rs | 3 ++- src/ms_data/precursors.rs | 26 ------------------- src/ms_data/quadrupole.rs | 25 ++++++++++++++++++ src/ms_data/spectra.rs | 6 ++--- tests/frame_readers.rs | 8 ++++-- tests/spectrum_readers.rs | 22 ++++++++-------- 14 files changed, 77 insertions(+), 61 deletions(-) create mode 100644 src/ms_data/quadrupole.rs diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index 7dcb836..de1d480 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -7,11 +7,8 @@ use { }, ReadableFrames, }, - AcquisitionType, Frame, FrameType, - { - ConvertableDomain, Frame2RtConverter, Scan2ImConverter, - Tof2MzConverter, - }, + AcquisitionType, ConvertableDomain, Frame, Frame2RtConverter, + FrameType, Scan2ImConverter, Tof2MzConverter, }, rayon::prelude::*, std::path::Path, @@ -26,6 +23,7 @@ pub struct TDFReader { pub im_converter: Scan2ImConverter, pub mz_converter: Tof2MzConverter, pub frame_table: FrameTable, + pub acquisition: AcquisitionType, frame_types: Vec, } @@ -48,11 +46,17 @@ impl TDFReader { .iter() .map(|msms_type| match msms_type { 0 => FrameType::MS1, - 8 => FrameType::MS2(AcquisitionType::DDAPASEF), - 9 => FrameType::MS2(AcquisitionType::DIAPASEF), + 8 => FrameType::MS2, + 9 => FrameType::MS2, _ => FrameType::Unknown, }) .collect(); + let mut acquisition = AcquisitionType::Unknown; + if frame_table.msms_type.contains(&8) { + acquisition = AcquisitionType::DDAPASEF; + } else if frame_table.msms_type.contains(&9) { + acquisition = AcquisitionType::DIAPASEF; + } Self { path: path.to_string(), tdf_bin_reader: tdf_bin_reader, @@ -62,6 +66,7 @@ impl TDFReader { frame_table: frame_table, tdf_sql_reader: tdf_sql_reader, frame_types: frame_types, + acquisition: acquisition, } } @@ -78,6 +83,7 @@ impl ReadableFrames for TDFReader { frame.rt = self.rt_converter.convert(index as u32); frame.index = self.frame_table.id[index]; frame.frame_type = self.frame_types[index]; + frame.acquisition = self.acquisition; frame } @@ -102,7 +108,7 @@ impl ReadableFrames for TDFReader { (0..self.tdf_bin_reader.size()) .into_par_iter() .map(|index| match self.frame_types[index] { - FrameType::MS2(_) => self.read_single_frame(index), + FrameType::MS2 => self.read_single_frame(index), _ => Frame::default(), }) .collect() diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index d442407..838bc26 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -8,7 +8,7 @@ use crate::{ frame_readers::tdf_reader::TDFReader, }, utils::vec_utils::argsort, - ConvertableDomain, Precursor, Scan2ImConverter, + ConvertableDomain, Precursor, }; #[derive(Debug)] diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index 4eb7686..e22f1cf 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -1,4 +1,4 @@ -use crate::{file_readers::FileFormatError, QuadrupoleEvent}; +use crate::file_readers::FileFormatError; use std::fs; use { crate::{ @@ -106,7 +106,7 @@ impl ReadableSpectra for MiniTDFReader { fn read_single_spectrum(&self, index: usize) -> Spectrum { let mut spectrum: Spectrum = Spectrum::read_from_file(&self.frame_reader, index); - spectrum.precursor = QuadrupoleEvent::Precursor(self.precursors[index]); + spectrum.precursor = self.precursors[index]; spectrum.index = self.precursors[index].index; spectrum } @@ -118,8 +118,8 @@ impl ReadableSpectra for MiniTDFReader { .map(|index| self.read_single_spectrum(index)) .collect(); spectra.sort_by(|a, b| { - let x = b.precursor.unwrap_as_precursor().index as f64; - let y = a.precursor.unwrap_as_precursor().index as f64; + let x = b.precursor.index as f64; + let y = a.precursor.index as f64; y.total_cmp(&x) }); spectra diff --git a/src/file_writers/mgf_writer.rs b/src/file_writers/mgf_writer.rs index a44d4ce..73501d6 100644 --- a/src/file_writers/mgf_writer.rs +++ b/src/file_writers/mgf_writer.rs @@ -41,7 +41,7 @@ impl MGFFormat for Spectrum { } fn as_mgf_header(&self) -> String { - let precursor = self.precursor.unwrap_as_precursor(); + let precursor = self.precursor; let title = precursor.index; let ms2_data = format!( "TITLE=index:{}, im:{:.4}, intensity:{:.4}, frame:{}, ce:{:.4}\nPEPMASS={:.4}\nCHARGE={}\nRT={:.2}\n", diff --git a/src/lib.rs b/src/lib.rs index 1ab0994..71e74ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,7 +36,5 @@ pub use crate::{ errors::*, file_readers::FileReader, file_writers::mgf_writer::{MGFFormat, MGFWriter}, - ms_data::{ - AcquisitionType, Frame, FrameType, Precursor, QuadrupoleEvent, Spectrum, - }, + ms_data::{AcquisitionType, Frame, FrameType, Precursor, Spectrum}, }; diff --git a/src/main.rs b/src/main.rs index 235bd96..1eeb2d3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,6 +13,7 @@ fn quick_test() { spectrum_index = 10; } println!("precursor {:?}", dda_spectra[spectrum_index].precursor); + _ = dda_spectra[spectrum_index].as_mgf_header(); // println!( // "precursor\n{:?}", // dda_spectra[spectrum_index].as_mgf_header() diff --git a/src/ms_data.rs b/src/ms_data.rs index d0560d0..f91825a 100644 --- a/src/ms_data.rs +++ b/src/ms_data.rs @@ -5,5 +5,5 @@ pub mod spectra; pub use acquisition::AcquisitionType; pub use frames::{Frame, FrameType}; -pub use precursors::{Precursor, QuadrupoleEvent}; +pub use precursors::Precursor; pub use spectra::Spectrum; diff --git a/src/ms_data/acquisition.rs b/src/ms_data/acquisition.rs index 2826e09..61b3e1e 100644 --- a/src/ms_data/acquisition.rs +++ b/src/ms_data/acquisition.rs @@ -3,6 +3,13 @@ pub enum AcquisitionType { DDAPASEF, DIAPASEF, + DiagonalDIAPASEF, PRMPASEF, Unknown, } + +impl Default for AcquisitionType { + fn default() -> Self { + Self::Unknown + } +} diff --git a/src/ms_data/frames.rs b/src/ms_data/frames.rs index 5f4455d..9e407d4 100644 --- a/src/ms_data/frames.rs +++ b/src/ms_data/frames.rs @@ -9,13 +9,14 @@ pub struct Frame { pub index: usize, pub rt: f64, pub frame_type: FrameType, + pub acquisition: AcquisitionType, } /// The kind of frame, determined by acquisition. #[derive(Debug, PartialEq, Clone, Copy)] pub enum FrameType { MS1, - MS2(AcquisitionType), + MS2, Unknown, } diff --git a/src/ms_data/precursors.rs b/src/ms_data/precursors.rs index 195beb3..603b77d 100644 --- a/src/ms_data/precursors.rs +++ b/src/ms_data/precursors.rs @@ -10,29 +10,3 @@ pub struct Precursor { pub frame_index: usize, pub collision_energy: f64, } - -/// A type of quadrupole selection. -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum QuadrupoleEvent { - Precursor(Precursor), - // Window(Window), - // PrecursorList(Vec), - None, -} - -impl Default for QuadrupoleEvent { - fn default() -> Self { - Self::None - } -} - -impl QuadrupoleEvent { - pub fn unwrap_as_precursor(&self) -> Precursor { - match self { - QuadrupoleEvent::Precursor(precursor) => *precursor, - _ => { - panic!("Not a precursor"); - }, - } - } -} diff --git a/src/ms_data/quadrupole.rs b/src/ms_data/quadrupole.rs new file mode 100644 index 0000000..823aa56 --- /dev/null +++ b/src/ms_data/quadrupole.rs @@ -0,0 +1,25 @@ +/// A type of quadrupole selection. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum QuadrupoleEvent { + Precursor(Precursor), + // Window(Window), + // PrecursorList(Vec), + None, +} + +impl Default for QuadrupoleEvent { + fn default() -> Self { + Self::None + } +} + +impl QuadrupoleEvent { + pub fn unwrap_as_precursor(&self) -> Precursor { + match self { + QuadrupoleEvent::Precursor(precursor) => *precursor, + _ => { + panic!("Not a precursor"); + }, + } + } +} diff --git a/src/ms_data/spectra.rs b/src/ms_data/spectra.rs index 4b81253..2ec334e 100644 --- a/src/ms_data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -1,6 +1,6 @@ use crate::{ utils::vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, - ConvertableDomain, Precursor, QuadrupoleEvent, Tof2MzConverter, + ConvertableDomain, Precursor, Tof2MzConverter, }; pub struct RawSpectrumProcessor { @@ -76,7 +76,7 @@ impl RawSpectrumProcessor { .iter() .map(|x| *x as f64) .collect(), - precursor: QuadrupoleEvent::Precursor(precursor), + precursor: precursor, index: index, }; spectrum @@ -110,6 +110,6 @@ pub struct RawSpectrum { pub struct Spectrum { pub mz_values: Vec, pub intensities: Vec, - pub precursor: QuadrupoleEvent, + pub precursor: Precursor, pub index: usize, } diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index aa7de7e..2c35a60 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -25,6 +25,7 @@ fn tdf_reader_frames() { index: 1, rt: 0.1, frame_type: FrameType::MS1, + acquisition: AcquisitionType::DDAPASEF, }, Frame { scan_offsets: vec![0, 5, 11, 18, 26], @@ -32,7 +33,8 @@ fn tdf_reader_frames() { intensities: (10..36).map(|x| (x + 1) * 2).collect(), index: 2, rt: 0.2, - frame_type: FrameType::MS2(AcquisitionType::DDAPASEF), + frame_type: FrameType::MS2, + acquisition: AcquisitionType::DDAPASEF, }, Frame { scan_offsets: vec![0, 9, 19, 30, 42], @@ -41,6 +43,7 @@ fn tdf_reader_frames() { index: 3, rt: 0.3, frame_type: FrameType::MS1, + acquisition: AcquisitionType::DDAPASEF, }, Frame { scan_offsets: vec![0, 13, 27, 42, 58], @@ -48,7 +51,8 @@ fn tdf_reader_frames() { intensities: (78..136).map(|x| (x + 1) * 2).collect(), index: 4, rt: 0.4, - frame_type: FrameType::MS2(AcquisitionType::DDAPASEF), + frame_type: FrameType::MS2, + acquisition: AcquisitionType::DDAPASEF, }, ]; for i in 0..frames.len() { diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index a19f228..373d954 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,5 +1,5 @@ use std::path::Path; -use timsrust::{FileReader, Precursor, QuadrupoleEvent, Spectrum}; +use timsrust::{FileReader, Precursor, Spectrum}; fn get_local_directory() -> &'static Path { Path::new(std::file!()) @@ -21,7 +21,7 @@ fn minitdf_reader() { Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], intensities: vec![1.0, 2.0, 3.0, 4.0], - precursor: QuadrupoleEvent::Precursor(Precursor { + precursor: Precursor { mz: 123.4567, rt: 12.345, im: 1.234, @@ -30,13 +30,13 @@ fn minitdf_reader() { index: 1, frame_index: 1, collision_energy: 0.0, - }), + }, index: 1, }, Spectrum { mz_values: vec![1100.0, 1200.002, 1300.03, 1400.4], intensities: vec![10.0, 20.0, 30.0, 40.0], - precursor: QuadrupoleEvent::Precursor(Precursor { + precursor: Precursor { mz: 987.6543, rt: 9.876, im: 0.9876, @@ -45,7 +45,7 @@ fn minitdf_reader() { index: 2, frame_index: 2, collision_energy: 0.0, - }), + }, index: 2, }, ]; @@ -68,7 +68,7 @@ fn tdf_reader_dda() { Spectrum { mz_values: vec![199.7633445943076], intensities: vec![162.0], - precursor: QuadrupoleEvent::Precursor(Precursor { + precursor: Precursor { mz: 500.0, rt: 0.2, im: 1.4989212513484358, @@ -77,13 +77,13 @@ fn tdf_reader_dda() { index: 1, frame_index: 1, collision_energy: 0.0, - }), + }, index: 0, }, Spectrum { mz_values: vec![169.5419900362706, 695.6972509397959], intensities: vec![120.0, 624.0], - precursor: QuadrupoleEvent::Precursor(Precursor { + precursor: Precursor { mz: 501.0, rt: 0.2, im: 1.4978425026968716, @@ -92,13 +92,13 @@ fn tdf_reader_dda() { index: 2, frame_index: 1, collision_energy: 0.0, - }), + }, index: 1, }, Spectrum { mz_values: vec![827.1915846690921], intensities: vec![714.0], - precursor: QuadrupoleEvent::Precursor(Precursor { + precursor: Precursor { mz: 502.0, rt: 0.4, im: 1.4989212513484358, @@ -107,7 +107,7 @@ fn tdf_reader_dda() { index: 3, frame_index: 3, collision_energy: 0.0, - }), + }, index: 2, }, ]; From 2262abf1a18767ec36f751ba387d16c1c9818ae4 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 12 Apr 2024 11:47:11 +0200 Subject: [PATCH 010/109] FEAT: Updated frames with ms info and quad settings --- src/file_readers/frame_readers/tdf_reader.rs | 35 +++++++++++--------- src/lib.rs | 7 ++-- src/main.rs | 5 ++- src/ms_data.rs | 4 ++- src/ms_data/acquisition.rs | 14 +++----- src/ms_data/frames.rs | 24 ++++++-------- src/ms_data/precursors.rs | 2 +- src/ms_data/quadrupole.rs | 33 +++++------------- tests/frame_readers.rs | 26 +++++++++------ 9 files changed, 73 insertions(+), 77 deletions(-) diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index de1d480..9a24d85 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -7,11 +7,11 @@ use { }, ReadableFrames, }, - AcquisitionType, ConvertableDomain, Frame, Frame2RtConverter, - FrameType, Scan2ImConverter, Tof2MzConverter, + AcquisitionType, ConvertableDomain, Frame, Frame2RtConverter, MSLevel, + QuadrupoleSettings, Scan2ImConverter, Tof2MzConverter, }, rayon::prelude::*, - std::path::Path, + std::{path::Path, sync::Arc}, }; #[derive(Debug)] @@ -24,7 +24,7 @@ pub struct TDFReader { pub mz_converter: Tof2MzConverter, pub frame_table: FrameTable, pub acquisition: AcquisitionType, - frame_types: Vec, + ms_levels: Vec, } impl TDFReader { @@ -41,14 +41,14 @@ impl TDFReader { String::from(&file_name), frame_table.offsets.clone(), ); - let frame_types: Vec = frame_table + let ms_levels: Vec = frame_table .msms_type .iter() .map(|msms_type| match msms_type { - 0 => FrameType::MS1, - 8 => FrameType::MS2, - 9 => FrameType::MS2, - _ => FrameType::Unknown, + 0 => MSLevel::MS1, + 8 => MSLevel::MS2, + 9 => MSLevel::MS2, + _ => MSLevel::Unknown, }) .collect(); let mut acquisition = AcquisitionType::Unknown; @@ -65,7 +65,7 @@ impl TDFReader { mz_converter: Tof2MzConverter::from_sql(&tdf_sql_reader), frame_table: frame_table, tdf_sql_reader: tdf_sql_reader, - frame_types: frame_types, + ms_levels: ms_levels, acquisition: acquisition, } } @@ -82,8 +82,11 @@ impl ReadableFrames for TDFReader { Frame::read_from_file(&self.tdf_bin_reader, index); frame.rt = self.rt_converter.convert(index as u32); frame.index = self.frame_table.id[index]; - frame.frame_type = self.frame_types[index]; - frame.acquisition = self.acquisition; + frame.ms_level = self.ms_levels[index]; + frame.acquisition_type = self.acquisition; + if frame.ms_level == MSLevel::MS2 { + frame.quadrupole_settings = Arc::new(QuadrupoleSettings::default()); + } frame } @@ -97,8 +100,8 @@ impl ReadableFrames for TDFReader { fn read_all_ms1_frames(&self) -> Vec { (0..self.tdf_bin_reader.size()) .into_par_iter() - .map(|index| match self.frame_types[index] { - FrameType::MS1 => self.read_single_frame(index), + .map(|index| match self.ms_levels[index] { + MSLevel::MS1 => self.read_single_frame(index), _ => Frame::default(), }) .collect() @@ -107,8 +110,8 @@ impl ReadableFrames for TDFReader { fn read_all_ms2_frames(&self) -> Vec { (0..self.tdf_bin_reader.size()) .into_par_iter() - .map(|index| match self.frame_types[index] { - FrameType::MS2 => self.read_single_frame(index), + .map(|index| match self.ms_levels[index] { + MSLevel::MS2 => self.read_single_frame(index), _ => Frame::default(), }) .collect() diff --git a/src/lib.rs b/src/lib.rs index 71e74ba..c86f37d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,6 +35,9 @@ pub use crate::{ }, errors::*, file_readers::FileReader, - file_writers::mgf_writer::{MGFFormat, MGFWriter}, - ms_data::{AcquisitionType, Frame, FrameType, Precursor, Spectrum}, + file_writers::mgf_writer::MGFWriter, + ms_data::{ + AcquisitionType, Frame, MSLevel, Precursor, QuadrupoleSettings, + Spectrum, + }, }; diff --git a/src/main.rs b/src/main.rs index 1eeb2d3..3c030cb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,8 @@ +mod file_writers; + +use file_writers::mgf_writer::MGFFormat; use std::env; -use timsrust::{FileReader, MGFFormat, MGFWriter, Spectrum}; +use timsrust::{FileReader, MGFWriter, Spectrum}; fn quick_test() { let args: Vec = env::args().collect(); diff --git a/src/ms_data.rs b/src/ms_data.rs index f91825a..9ed5873 100644 --- a/src/ms_data.rs +++ b/src/ms_data.rs @@ -1,9 +1,11 @@ pub mod acquisition; pub mod frames; pub mod precursors; +pub mod quadrupole; pub mod spectra; pub use acquisition::AcquisitionType; -pub use frames::{Frame, FrameType}; +pub use frames::{Frame, MSLevel}; pub use precursors::Precursor; +pub use quadrupole::QuadrupoleSettings; pub use spectra::Spectrum; diff --git a/src/ms_data/acquisition.rs b/src/ms_data/acquisition.rs index 61b3e1e..790c503 100644 --- a/src/ms_data/acquisition.rs +++ b/src/ms_data/acquisition.rs @@ -1,15 +1,11 @@ /// The kind of acquisition that was used. -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, PartialEq, Clone, Copy, Default)] pub enum AcquisitionType { DDAPASEF, DIAPASEF, - DiagonalDIAPASEF, - PRMPASEF, + // DiagonalDIAPASEF, + // PRMPASEF, + /// Default value. + #[default] Unknown, } - -impl Default for AcquisitionType { - fn default() -> Self { - Self::Unknown - } -} diff --git a/src/ms_data/frames.rs b/src/ms_data/frames.rs index 9e407d4..d62e507 100644 --- a/src/ms_data/frames.rs +++ b/src/ms_data/frames.rs @@ -1,27 +1,25 @@ -use crate::AcquisitionType; +use super::{AcquisitionType, QuadrupoleSettings}; +use std::sync::Arc; /// A frame with all unprocessed data as it was acquired. -#[derive(Debug, PartialEq, Default)] +#[derive(Debug, PartialEq, Default, Clone)] pub struct Frame { pub scan_offsets: Vec, pub tof_indices: Vec, pub intensities: Vec, pub index: usize, pub rt: f64, - pub frame_type: FrameType, - pub acquisition: AcquisitionType, + pub acquisition_type: AcquisitionType, + pub ms_level: MSLevel, + pub quadrupole_settings: Arc, } -/// The kind of frame, determined by acquisition. -#[derive(Debug, PartialEq, Clone, Copy)] -pub enum FrameType { +/// The MS level used. +#[derive(Debug, PartialEq, Default, Clone, Copy)] +pub enum MSLevel { MS1, MS2, + /// Default value. + #[default] Unknown, } - -impl Default for FrameType { - fn default() -> Self { - Self::Unknown - } -} diff --git a/src/ms_data/precursors.rs b/src/ms_data/precursors.rs index 603b77d..d4c1ae4 100644 --- a/src/ms_data/precursors.rs +++ b/src/ms_data/precursors.rs @@ -1,4 +1,4 @@ -/// An MS1 precursor that got selected for fragmentation. +/// The MS1 precursor that got selected for fragmentation. #[derive(Debug, Default, Clone, Copy, PartialEq)] pub struct Precursor { pub mz: f64, diff --git a/src/ms_data/quadrupole.rs b/src/ms_data/quadrupole.rs index 823aa56..84e1ced 100644 --- a/src/ms_data/quadrupole.rs +++ b/src/ms_data/quadrupole.rs @@ -1,25 +1,10 @@ -/// A type of quadrupole selection. -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum QuadrupoleEvent { - Precursor(Precursor), - // Window(Window), - // PrecursorList(Vec), - None, -} - -impl Default for QuadrupoleEvent { - fn default() -> Self { - Self::None - } -} - -impl QuadrupoleEvent { - pub fn unwrap_as_precursor(&self) -> Precursor { - match self { - QuadrupoleEvent::Precursor(precursor) => *precursor, - _ => { - panic!("Not a precursor"); - }, - } - } +/// The quadrupole settings used for fragmentation. +#[derive(Debug, Default, Clone, PartialEq)] +pub struct QuadrupoleSettings { + is_used: bool, + scan_starts: Vec, + scan_ends: Vec, + isolation_mz: Vec, + isolation_width: Vec, + collision_energy: Vec, } diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 2c35a60..909cfeb 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,5 +1,7 @@ -use std::path::Path; -use timsrust::{AcquisitionType, FileReader, Frame, FrameType}; +use std::{path::Path, sync::Arc}; +use timsrust::{ + AcquisitionType, FileReader, Frame, MSLevel, QuadrupoleSettings, +}; fn get_local_directory() -> &'static Path { Path::new(std::file!()) @@ -24,8 +26,9 @@ fn tdf_reader_frames() { intensities: (0..10).map(|x| (x + 1) * 2).collect(), index: 1, rt: 0.1, - frame_type: FrameType::MS1, - acquisition: AcquisitionType::DDAPASEF, + ms_level: MSLevel::MS1, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, }, Frame { scan_offsets: vec![0, 5, 11, 18, 26], @@ -33,8 +36,9 @@ fn tdf_reader_frames() { intensities: (10..36).map(|x| (x + 1) * 2).collect(), index: 2, rt: 0.2, - frame_type: FrameType::MS2, - acquisition: AcquisitionType::DDAPASEF, + ms_level: MSLevel::MS2, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, }, Frame { scan_offsets: vec![0, 9, 19, 30, 42], @@ -42,8 +46,9 @@ fn tdf_reader_frames() { intensities: (36..78).map(|x| (x + 1) * 2).collect(), index: 3, rt: 0.3, - frame_type: FrameType::MS1, - acquisition: AcquisitionType::DDAPASEF, + ms_level: MSLevel::MS1, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, }, Frame { scan_offsets: vec![0, 13, 27, 42, 58], @@ -51,8 +56,9 @@ fn tdf_reader_frames() { intensities: (78..136).map(|x| (x + 1) * 2).collect(), index: 4, rt: 0.4, - frame_type: FrameType::MS2, - acquisition: AcquisitionType::DDAPASEF, + ms_level: MSLevel::MS2, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, }, ]; for i in 0..frames.len() { From a9064a58d66f3b1dd29fe55dbdcc852bda9943dd Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 23 Apr 2024 13:29:53 +0200 Subject: [PATCH 011/109] CHORE: partial move of writers to io --- src/file_writers.rs | 1 - src/io.rs | 2 ++ src/io/readers.rs | 1 + src/io/readers/common.rs | 1 + src/io/writers.rs | 1 + src/{file_writers/mgf_writer.rs => io/writers/mgf.rs} | 6 ------ src/lib.rs | 6 +++--- src/main.rs | 4 +--- 8 files changed, 9 insertions(+), 13 deletions(-) delete mode 100644 src/file_writers.rs create mode 100644 src/io.rs create mode 100644 src/io/readers.rs create mode 100644 src/io/readers/common.rs create mode 100644 src/io/writers.rs rename src/{file_writers/mgf_writer.rs => io/writers/mgf.rs} (92%) diff --git a/src/file_writers.rs b/src/file_writers.rs deleted file mode 100644 index e686fe2..0000000 --- a/src/file_writers.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod mgf_writer; diff --git a/src/io.rs b/src/io.rs new file mode 100644 index 0000000..7fba1ff --- /dev/null +++ b/src/io.rs @@ -0,0 +1,2 @@ +pub mod readers; +pub mod writers; diff --git a/src/io/readers.rs b/src/io/readers.rs new file mode 100644 index 0000000..34994bf --- /dev/null +++ b/src/io/readers.rs @@ -0,0 +1 @@ +pub mod common; diff --git a/src/io/readers/common.rs b/src/io/readers/common.rs new file mode 100644 index 0000000..0386907 --- /dev/null +++ b/src/io/readers/common.rs @@ -0,0 +1 @@ +pub mod tdf_blobs; diff --git a/src/io/writers.rs b/src/io/writers.rs new file mode 100644 index 0000000..b399291 --- /dev/null +++ b/src/io/writers.rs @@ -0,0 +1 @@ +pub mod mgf; diff --git a/src/file_writers/mgf_writer.rs b/src/io/writers/mgf.rs similarity index 92% rename from src/file_writers/mgf_writer.rs rename to src/io/writers/mgf.rs index 73501d6..9ae4a6a 100644 --- a/src/file_writers/mgf_writer.rs +++ b/src/io/writers/mgf.rs @@ -28,18 +28,12 @@ impl MGFWriter { } pub trait MGFFormat { - fn as_mgf_entry(&self) -> String; - fn as_mgf_header(&self) -> String; fn as_mgf_peaks(&self) -> String; } impl MGFFormat for Spectrum { - fn as_mgf_entry(&self) -> String { - format!("{}{}", self.as_mgf_header(), self.as_mgf_peaks()) - } - fn as_mgf_header(&self) -> String { let precursor = self.precursor; let title = precursor.index; diff --git a/src/lib.rs b/src/lib.rs index c86f37d..832498f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,8 +25,8 @@ mod calibration; mod domain_converters; mod errors; mod file_readers; -mod file_writers; -mod ms_data; +pub mod io; +pub mod ms_data; mod utils; pub use crate::{ @@ -35,7 +35,7 @@ pub use crate::{ }, errors::*, file_readers::FileReader, - file_writers::mgf_writer::MGFWriter, + io::writers::mgf::MGFWriter, ms_data::{ AcquisitionType, Frame, MSLevel, Precursor, QuadrupoleSettings, Spectrum, diff --git a/src/main.rs b/src/main.rs index 3c030cb..1eb7242 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,5 @@ -mod file_writers; - -use file_writers::mgf_writer::MGFFormat; use std::env; +use timsrust::io::writers::mgf::MGFFormat; use timsrust::{FileReader, MGFWriter, Spectrum}; fn quick_test() { From 93c87af4bd9ef6fbe1ebb702b21cd7e8b4fb9092 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 23 Apr 2024 13:30:35 +0200 Subject: [PATCH 012/109] CHORE: move of tdf_blobs --- src/file_readers/common/ms_data_blobs.rs | 134 ++++++++++-------- .../common/ms_data_blobs/parsers.rs | 82 ----------- src/file_readers/frame_readers/tdf_reader.rs | 18 +-- .../spectrum_readers/mini_tdf_reader.rs | 27 ++-- src/io/readers/common/tdf_blobs.rs | 87 ++++++++++++ 5 files changed, 189 insertions(+), 159 deletions(-) delete mode 100644 src/file_readers/common/ms_data_blobs/parsers.rs create mode 100644 src/io/readers/common/tdf_blobs.rs diff --git a/src/file_readers/common/ms_data_blobs.rs b/src/file_readers/common/ms_data_blobs.rs index c261b9b..46f4012 100644 --- a/src/file_readers/common/ms_data_blobs.rs +++ b/src/file_readers/common/ms_data_blobs.rs @@ -1,83 +1,32 @@ -mod parsers; - -use std::fs::File; - -use memmap2::Mmap; -use zstd::decode_all; +use crate::io::readers::common::tdf_blobs::{TdfBlob, TdfBlobReader}; use crate::{Frame, Spectrum}; -use self::parsers::parse_frame; - -#[derive(Debug, Default)] -pub struct BinFileReader { - file_offsets: Vec, - mmap: Option, -} - -impl BinFileReader { - pub fn new(file_name: String, file_offsets: Vec) -> Self { - let tdf_bin_file: File = File::open(&file_name) - .expect("File cannot be opened. Is the path correct?"); - let mmap: Option = - Some(unsafe { Mmap::map(&tdf_bin_file).unwrap() }); - Self { file_offsets, mmap } - } - - fn read_blob(&self, index: usize) -> Vec { - let offset: u64 = self.file_offsets[index as usize]; - if let Some(mmap) = self.mmap.as_ref() { - let raw_byte_count: &[u8] = - &mmap[offset as usize..(offset + 4) as usize]; - let byte_count: u32 = - u32::from_le_bytes(raw_byte_count.try_into().unwrap()); - if byte_count > 8 { - let compressed_blob: &[u8] = &mmap[(offset + 8) as usize - ..offset as usize + byte_count as usize]; - let blob: Vec = decode_all(compressed_blob).unwrap(); - return blob; - } - }; - return vec![]; - } - - pub fn size(&self) -> usize { - self.file_offsets.len() - } -} - pub trait ReadableFromBinFile { - fn parse_from_ms_data_blob(buffer: Vec, index: usize) -> Self; + fn parse_from_ms_data_blob(buffer: TdfBlob, index: usize) -> Self; - fn read_from_file(bin_file: &BinFileReader, index: usize) -> Self + fn read_from_file(bin_file: &TdfBlobReader, index: usize) -> Self where Self: Sized, { - let blob: Vec = bin_file.read_blob(index); + let blob = bin_file.get_blob(index); Self::parse_from_ms_data_blob(blob, index) } } impl ReadableFromBinFile for Spectrum { - fn parse_from_ms_data_blob(blob: Vec, index: usize) -> Self { + fn parse_from_ms_data_blob(blob: TdfBlob, index: usize) -> Self { let mut spectrum: Spectrum = Spectrum::default(); spectrum.index = index; if blob.len() == 0 { return spectrum; }; - let size: usize = blob.len() / std::mem::size_of::(); - let first: &[u8] = &blob[0 * size..1 * size]; - let second: &[u8] = &blob[1 * size..2 * size]; - let third: &[u8] = &blob[2 * size..3 * size]; - let fourth: &[u8] = &blob[3 * size..4 * size]; + let size: usize = blob.len(); let mut spectrum_data: Vec = vec![0; size]; for i in 0..size { - spectrum_data[i] = first[i] as u32; - spectrum_data[i] |= (second[i] as u32) << 8; - spectrum_data[i] |= (third[i] as u32) << 16; - spectrum_data[i] |= (fourth[i] as u32) << 24; + spectrum_data[i] = blob.get(i) } - let scan_count: usize = blob.len() / 3 / std::mem::size_of::(); + let scan_count: usize = blob.len() / 3; let tof_indices_bytes: &[u32] = &spectrum_data[..scan_count as usize * 2]; let intensities_bytes: &[u32] = @@ -94,7 +43,7 @@ impl ReadableFromBinFile for Spectrum { } impl ReadableFromBinFile for Frame { - fn parse_from_ms_data_blob(blob: Vec, index: usize) -> Self { + fn parse_from_ms_data_blob(blob: TdfBlob, index: usize) -> Self { let mut frame = Frame::default(); (frame.scan_offsets, frame.tof_indices, frame.intensities) = parse_frame(blob); @@ -102,3 +51,68 @@ impl ReadableFromBinFile for Frame { frame } } + +pub fn parse_frame(blob: TdfBlob) -> (Vec, Vec, Vec) { + let mut tof_indices: Vec = vec![]; + let mut intensities: Vec = vec![]; + let mut scan_offsets: Vec = vec![]; + if blob.len() != 0 { + let scan_count: usize = blob.get(0) as usize; + let peak_count: usize = (blob.len() - scan_count) / 2; + scan_offsets = read_scan_offsets(scan_count, peak_count, &blob); + intensities = read_intensities(scan_count, peak_count, &blob); + tof_indices = + read_tof_indices(scan_count, peak_count, &blob, &scan_offsets); + } + (scan_offsets, tof_indices, intensities) +} + +fn read_scan_offsets( + scan_count: usize, + peak_count: usize, + blob: &TdfBlob, +) -> Vec { + let mut scan_offsets: Vec = Vec::with_capacity(scan_count + 1); + scan_offsets.push(0); + for scan_index in 0..scan_count - 1 { + let index = scan_index + 1; + let scan_size: usize = (blob.get(index) / 2) as usize; + scan_offsets.push(scan_offsets[scan_index] + scan_size); + } + scan_offsets.push(peak_count); + scan_offsets +} + +fn read_intensities( + scan_count: usize, + peak_count: usize, + blob: &TdfBlob, +) -> Vec { + let mut intensities: Vec = Vec::with_capacity(peak_count); + for peak_index in 0..peak_count { + let index: usize = scan_count + 1 + 2 * peak_index; + intensities.push(blob.get(index)); + } + intensities +} + +fn read_tof_indices( + scan_count: usize, + peak_count: usize, + blob: &TdfBlob, + scan_offsets: &Vec, +) -> Vec { + let mut tof_indices: Vec = Vec::with_capacity(peak_count); + for scan_index in 0..scan_count { + let start_offset: usize = scan_offsets[scan_index]; + let end_offset: usize = scan_offsets[scan_index + 1]; + let mut current_sum: u32 = 0; + for peak_index in start_offset..end_offset { + let index = scan_count + 2 * peak_index; + let tof_index: u32 = blob.get(index); + current_sum += tof_index; + tof_indices.push(current_sum - 1); + } + } + tof_indices +} diff --git a/src/file_readers/common/ms_data_blobs/parsers.rs b/src/file_readers/common/ms_data_blobs/parsers.rs deleted file mode 100644 index bdb2918..0000000 --- a/src/file_readers/common/ms_data_blobs/parsers.rs +++ /dev/null @@ -1,82 +0,0 @@ -const U32_SIZE: usize = std::mem::size_of::(); - -#[inline(always)] -fn get_u32_from_blob(blob: &Vec, index: usize) -> u32 { - let size: usize = blob.len() / U32_SIZE; - return concatenate_four_bytes_into_u32( - blob[index], - blob[size + index], - blob[2 * size + index], - blob[3 * size + index], - ); -} - -#[inline(always)] -fn concatenate_four_bytes_into_u32(b1: u8, b2: u8, b3: u8, b4: u8) -> u32 { - (b1 as u32) | ((b2 as u32) << 8) | ((b3 as u32) << 16) | ((b4 as u32) << 24) -} - -pub fn parse_frame(blob: Vec) -> (Vec, Vec, Vec) { - let mut tof_indices: Vec = vec![]; - let mut intensities: Vec = vec![]; - let mut scan_offsets: Vec = vec![]; - if blob.len() != 0 { - let scan_count: usize = get_u32_from_blob(&blob, 0) as usize; - let peak_count: usize = (blob.len() / U32_SIZE - scan_count) / 2; - scan_offsets = read_scan_offsets(scan_count, peak_count, &blob); - intensities = read_intensities(scan_count, peak_count, &blob); - tof_indices = - read_tof_indices(scan_count, peak_count, &blob, &scan_offsets); - } - (scan_offsets, tof_indices, intensities) -} - -fn read_scan_offsets( - scan_count: usize, - peak_count: usize, - blob: &Vec, -) -> Vec { - let mut scan_offsets: Vec = Vec::with_capacity(scan_count + 1); - scan_offsets.push(0); - for scan_index in 0..scan_count - 1 { - let index = scan_index + 1; - let scan_size: usize = (get_u32_from_blob(blob, index) / 2) as usize; - scan_offsets.push(scan_offsets[scan_index] + scan_size); - } - scan_offsets.push(peak_count); - scan_offsets -} - -fn read_intensities( - scan_count: usize, - peak_count: usize, - blob: &Vec, -) -> Vec { - let mut intensities: Vec = Vec::with_capacity(peak_count); - for peak_index in 0..peak_count { - let index: usize = scan_count + 1 + 2 * peak_index; - intensities.push(get_u32_from_blob(blob, index)); - } - intensities -} - -fn read_tof_indices( - scan_count: usize, - peak_count: usize, - blob: &Vec, - scan_offsets: &Vec, -) -> Vec { - let mut tof_indices: Vec = Vec::with_capacity(peak_count); - for scan_index in 0..scan_count { - let start_offset: usize = scan_offsets[scan_index]; - let end_offset: usize = scan_offsets[scan_index + 1]; - let mut current_sum: u32 = 0; - for peak_index in start_offset..end_offset { - let index = scan_count + 2 * peak_index; - let tof_index: u32 = get_u32_from_blob(blob, index); - current_sum += tof_index; - tof_indices.push(current_sum - 1); - } - } - tof_indices -} diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index 9a24d85..42653ef 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -2,11 +2,12 @@ use { crate::{ file_readers::{ common::{ - ms_data_blobs::{BinFileReader, ReadableFromBinFile}, + ms_data_blobs::ReadableFromBinFile, sql_reader::{FrameTable, ReadableFromSql, SqlReader}, }, ReadableFrames, }, + io::readers::common::tdf_blobs::TdfBlobReader, AcquisitionType, ConvertableDomain, Frame, Frame2RtConverter, MSLevel, QuadrupoleSettings, Scan2ImConverter, Tof2MzConverter, }, @@ -18,7 +19,7 @@ use { pub struct TDFReader { pub path: String, pub tdf_sql_reader: SqlReader, - tdf_bin_reader: BinFileReader, + tdf_bin_reader: TdfBlobReader, pub rt_converter: Frame2RtConverter, pub im_converter: Scan2ImConverter, pub mz_converter: Tof2MzConverter, @@ -37,10 +38,11 @@ impl TDFReader { .join("analysis.tdf_bin") .to_string_lossy() .to_string(); - let tdf_bin_reader: BinFileReader = BinFileReader::new( + let tdf_bin_reader: TdfBlobReader = TdfBlobReader::new( String::from(&file_name), - frame_table.offsets.clone(), - ); + frame_table.offsets.iter().map(|x| *x as usize).collect(), + ) + .unwrap(); let ms_levels: Vec = frame_table .msms_type .iter() @@ -91,14 +93,14 @@ impl ReadableFrames for TDFReader { } fn read_all_frames(&self) -> Vec { - (0..self.tdf_bin_reader.size()) + (0..self.tdf_bin_reader.len()) .into_par_iter() .map(|index| self.read_single_frame(index)) .collect() } fn read_all_ms1_frames(&self) -> Vec { - (0..self.tdf_bin_reader.size()) + (0..self.tdf_bin_reader.len()) .into_par_iter() .map(|index| match self.ms_levels[index] { MSLevel::MS1 => self.read_single_frame(index), @@ -108,7 +110,7 @@ impl ReadableFrames for TDFReader { } fn read_all_ms2_frames(&self) -> Vec { - (0..self.tdf_bin_reader.size()) + (0..self.tdf_bin_reader.len()) .into_par_iter() .map(|index| match self.ms_levels[index] { MSLevel::MS2 => self.read_single_frame(index), diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index e22f1cf..212e2a7 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -1,10 +1,13 @@ -use crate::file_readers::FileFormatError; +use crate::{ + file_readers::FileFormatError, + io::readers::common::tdf_blobs::TdfBlobReader, +}; use std::fs; use { crate::{ file_readers::{ common::{ - ms_data_blobs::{BinFileReader, ReadableFromBinFile}, + ms_data_blobs::ReadableFromBinFile, parquet_reader::read_parquet_precursors, }, ReadableSpectra, @@ -21,7 +24,7 @@ pub struct MiniTDFReader { parquet_file_name: String, precursors: Vec, offsets: Vec, - frame_reader: BinFileReader, + frame_reader: Option, } fn find_ms2spectrum_file( @@ -64,13 +67,12 @@ impl MiniTDFReader { let parquet_file_name: String = String::default(); let precursors: Vec = Vec::default(); let offsets: Vec = Vec::default(); - let frame_reader: BinFileReader = BinFileReader::default(); let mut reader: MiniTDFReader = MiniTDFReader { path_name, parquet_file_name, precursors, offsets, - frame_reader, + frame_reader: None, }; reader.read_parquet_file_name(); reader.read_precursors(); @@ -97,15 +99,22 @@ impl MiniTDFReader { find_ms2spectrum_file(&self.path_name, "bin".to_owned()).unwrap(); path.push(ms2_bin_file); let file_name: String = path.to_string_lossy().into_owned(); - self.frame_reader = - BinFileReader::new(String::from(&file_name), self.offsets.clone()); + self.frame_reader = Some( + TdfBlobReader::new( + String::from(&file_name), + self.offsets.iter().map(|x| *x as usize).collect(), + ) + .unwrap(), + ); } } impl ReadableSpectra for MiniTDFReader { fn read_single_spectrum(&self, index: usize) -> Spectrum { - let mut spectrum: Spectrum = - Spectrum::read_from_file(&self.frame_reader, index); + let mut spectrum: Spectrum = Spectrum::read_from_file( + &self.frame_reader.as_ref().unwrap(), + index, + ); spectrum.precursor = self.precursors[index]; spectrum.index = self.precursors[index].index; spectrum diff --git a/src/io/readers/common/tdf_blobs.rs b/src/io/readers/common/tdf_blobs.rs new file mode 100644 index 0000000..a188a5f --- /dev/null +++ b/src/io/readers/common/tdf_blobs.rs @@ -0,0 +1,87 @@ +use memmap2::Mmap; +use std::fs::File; +use std::io; +use zstd::decode_all; + +const U32_SIZE: usize = std::mem::size_of::(); + +#[derive(Debug, Default)] +pub struct TdfBlob { + bytes: Vec, +} + +impl TdfBlob { + pub fn get(&self, index: usize) -> u32 { + Self::concatenate_bytes( + self.bytes[index], + self.bytes[index + self.len()], + self.bytes[index + 2 * self.len()], + self.bytes[index + 3 * self.len()], + ) + } + + #[inline(always)] + fn concatenate_bytes(b1: u8, b2: u8, b3: u8, b4: u8) -> u32 { + b1 as u32 + | ((b2 as u32) << 8) + | ((b3 as u32) << 16) + | ((b4 as u32) << 24) + } + + pub fn len(&self) -> usize { + self.bytes.len() / U32_SIZE + } +} + +#[derive(Debug)] +pub struct TdfBlobReader { + file_name: String, + file_offsets: Vec, + mmap: Mmap, +} + +impl TdfBlobReader { + pub fn new( + file_name: String, + file_offsets: Vec, + ) -> Result { + let file: File = File::open(&file_name)?; + let mmap: Mmap = unsafe { Mmap::map(&file)? }; + Ok(Self { + file_name, + file_offsets, + mmap, + }) + } + + pub fn get_blob(&self, index: usize) -> TdfBlob { + if index >= self.len() { + return TdfBlob::default(); + } + let offset: usize = self.file_offsets[index as usize]; + let byte_count: u32 = self.get_byte_count(offset); + if byte_count <= 8 { + return TdfBlob::default(); + } + let compressed_bytes: &[u8] = &self.mmap + [(offset + 8) as usize..offset as usize + byte_count as usize]; + match decode_all(compressed_bytes) { + Ok(bytes) => TdfBlob { bytes }, + Err(_) => TdfBlob::default(), + } + } + + pub fn get_file_name(self) -> String { + self.file_name + } + + fn get_byte_count(&self, offset: usize) -> u32 { + let raw_byte_count: &[u8] = + &self.mmap[offset as usize..(offset + 4) as usize]; + u32::from_le_bytes(raw_byte_count.try_into().unwrap()) + } + + pub fn len(&self) -> usize { + self.file_offsets.len() + } +} From ed8148a41563cc3c1d305749c324b4ec078a62d1 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 23 Apr 2024 14:07:02 +0200 Subject: [PATCH 013/109] DOCS: updated package to work in modules rather than forwrding structs --- src/calibration.rs | 4 ++-- src/domain_converters.rs | 1 + src/file_readers.rs | 7 +++--- src/file_readers/common/ms_data_blobs.rs | 2 +- src/file_readers/common/parquet_reader.rs | 2 +- .../common/sql_reader/metadata.rs | 2 +- src/file_readers/frame_readers.rs | 2 +- src/file_readers/frame_readers/tdf_reader.rs | 7 ++++-- src/file_readers/spectrum_readers.rs | 2 +- .../spectrum_readers/dda_reader.rs | 7 +++--- .../spectrum_readers/dda_reader/precursors.rs | 3 ++- .../spectrum_readers/mini_tdf_reader.rs | 2 +- src/io.rs | 2 ++ src/io/writers/mgf.rs | 2 +- src/lib.rs | 19 ++++------------ src/main.rs | 4 ++-- src/ms_data.rs | 22 ++++++++++--------- src/ms_data/spectra.rs | 10 +++++---- tests/frame_readers.rs | 3 ++- tests/spectrum_readers.rs | 5 ++++- 20 files changed, 57 insertions(+), 51 deletions(-) diff --git a/src/calibration.rs b/src/calibration.rs index 289f09c..2e08ad2 100644 --- a/src/calibration.rs +++ b/src/calibration.rs @@ -1,6 +1,6 @@ use crate::{ - ms_data::spectra::RawSpectrum, - Precursor, {ConvertableDomain, Tof2MzConverter}, + domain_converters::{ConvertableDomain, Tof2MzConverter}, + ms_data::{Precursor, RawSpectrum}, }; pub struct Tof2MzCalibrator; diff --git a/src/domain_converters.rs b/src/domain_converters.rs index dd3652c..cda5649 100644 --- a/src/domain_converters.rs +++ b/src/domain_converters.rs @@ -1,3 +1,4 @@ +//! Allows conversions between domains (e.g. Time of Flight and m/z) mod frame_to_rt; mod scan_to_im; mod tof_to_mz; diff --git a/src/file_readers.rs b/src/file_readers.rs index 8ec606d..883fb06 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -1,5 +1,6 @@ use crate::{ - Error, {Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}, + domain_converters::{Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}, + Error, }; mod common; @@ -12,14 +13,14 @@ use { file_formats::FileFormat, frame_readers::ReadableFrames, spectrum_readers::ReadableSpectra, }, - crate::{Frame, Spectrum}, + crate::ms_data::{Frame, Spectrum}, }; pub use file_formats::FileFormatError; use self::frame_readers::tdf_reader::TDFReader; -/// A reader to read [frames](crate::Frame) and [spectra](crate::Spectrum). +/// A reader to read [frames](crate::ms_data::Frame) and [spectra](crate::ms_data::Spectrum). pub struct FileReader { format: FileFormat, } diff --git a/src/file_readers/common/ms_data_blobs.rs b/src/file_readers/common/ms_data_blobs.rs index 46f4012..cb33803 100644 --- a/src/file_readers/common/ms_data_blobs.rs +++ b/src/file_readers/common/ms_data_blobs.rs @@ -1,6 +1,6 @@ use crate::io::readers::common::tdf_blobs::{TdfBlob, TdfBlobReader}; -use crate::{Frame, Spectrum}; +use crate::ms_data::{Frame, Spectrum}; pub trait ReadableFromBinFile { fn parse_from_ms_data_blob(buffer: TdfBlob, index: usize) -> Self; diff --git a/src/file_readers/common/parquet_reader.rs b/src/file_readers/common/parquet_reader.rs index 531ad02..ba78f7a 100644 --- a/src/file_readers/common/parquet_reader.rs +++ b/src/file_readers/common/parquet_reader.rs @@ -1,7 +1,7 @@ use parquet::file::reader::{FileReader, SerializedFileReader}; use std::fs::File; -use crate::Precursor; +use crate::ms_data::Precursor; pub fn read_parquet_precursors( parquet_file_name: &String, diff --git a/src/file_readers/common/sql_reader/metadata.rs b/src/file_readers/common/sql_reader/metadata.rs index 8163d0c..047d3ba 100644 --- a/src/file_readers/common/sql_reader/metadata.rs +++ b/src/file_readers/common/sql_reader/metadata.rs @@ -1,6 +1,6 @@ use rusqlite::{Connection, Statement}; -use crate::{Scan2ImConverter, Tof2MzConverter}; +use crate::domain_converters::{Scan2ImConverter, Tof2MzConverter}; use super::{get_sql_connection, ReadableFromSql, SqlReader}; diff --git a/src/file_readers/frame_readers.rs b/src/file_readers/frame_readers.rs index 939f2e0..94f0db3 100644 --- a/src/file_readers/frame_readers.rs +++ b/src/file_readers/frame_readers.rs @@ -1,4 +1,4 @@ -use crate::Frame; +use crate::ms_data::Frame; use self::tdf_reader::TDFReader; diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index 42653ef..ec5bdfb 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -1,5 +1,9 @@ use { crate::{ + domain_converters::{ + ConvertableDomain, Frame2RtConverter, Scan2ImConverter, + Tof2MzConverter, + }, file_readers::{ common::{ ms_data_blobs::ReadableFromBinFile, @@ -8,8 +12,7 @@ use { ReadableFrames, }, io::readers::common::tdf_blobs::TdfBlobReader, - AcquisitionType, ConvertableDomain, Frame, Frame2RtConverter, MSLevel, - QuadrupoleSettings, Scan2ImConverter, Tof2MzConverter, + ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, }, rayon::prelude::*, std::{path::Path, sync::Arc}, diff --git a/src/file_readers/spectrum_readers.rs b/src/file_readers/spectrum_readers.rs index 24d6ff5..2de5277 100644 --- a/src/file_readers/spectrum_readers.rs +++ b/src/file_readers/spectrum_readers.rs @@ -1,4 +1,4 @@ -use crate::Spectrum; +use crate::ms_data::Spectrum; use self::{dda_reader::DDASpectrumReader, mini_tdf_reader::MiniTDFReader}; diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index 8b3ede0..66d933c 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -2,13 +2,14 @@ mod precursors; use crate::{ calibration::Tof2MzCalibrator, + domain_converters::Tof2MzConverter, file_readers::{ frame_readers::{tdf_reader::TDFReader, ReadableFrames}, ReadableSpectra, }, - ms_data::spectra::{self, RawSpectrum, RawSpectrumProcessor}, + ms_data::{Frame, Spectrum}, + ms_data::{RawProcessedSpectrumState, RawSpectrum, RawSpectrumProcessor}, utils::vec_utils::group_and_sum, - Frame, Spectrum, Tof2MzConverter, }; use rayon::prelude::*; @@ -75,7 +76,7 @@ impl DDASpectrumReader { let raw_spectrum = RawSpectrum { tof_indices: raw_tof_indices, intensities: raw_intensities, - processed_state: spectra::RawProcessedSpectrumState::Profile, + processed_state: RawProcessedSpectrumState::Profile, index: index, }; let spectrum_processer = RawSpectrumProcessor { raw_spectrum }; diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index 838bc26..bf013b7 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -1,14 +1,15 @@ use rayon::prelude::*; use crate::{ + domain_converters::ConvertableDomain, file_readers::{ common::sql_reader::{ PasefFrameMsMsTable, PrecursorTable, ReadableFromSql, }, frame_readers::tdf_reader::TDFReader, }, + ms_data::Precursor, utils::vec_utils::argsort, - ConvertableDomain, Precursor, }; #[derive(Debug)] diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index 212e2a7..db9cca7 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -12,7 +12,7 @@ use { }, ReadableSpectra, }, - Precursor, Spectrum, + ms_data::{Precursor, Spectrum}, }, rayon::prelude::*, std::path::PathBuf, diff --git a/src/io.rs b/src/io.rs index 7fba1ff..74b4093 100644 --- a/src/io.rs +++ b/src/io.rs @@ -1,2 +1,4 @@ +//! Handles all input and output + pub mod readers; pub mod writers; diff --git a/src/io/writers/mgf.rs b/src/io/writers/mgf.rs index 9ae4a6a..ab27b3c 100644 --- a/src/io/writers/mgf.rs +++ b/src/io/writers/mgf.rs @@ -2,7 +2,7 @@ use std::fs::File; use std::io::Write; use std::path::Path; -use crate::Spectrum; +use crate::ms_data::Spectrum; pub struct MGFWriter {} diff --git a/src/lib.rs b/src/lib.rs index 832498f..fafc8a4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,8 +4,8 @@ //! //! Two primary data types are exposed: //! -//! * [Spectra](crate::Spectrum): A traditional representation that expresses intensitites in function of mz values for a given precursor. -//! * [Frames](crate::Frame): All recorded data from a single TIMS elution (i.e. at one specific retention_time). +//! * [Spectra](crate::ms_data::Spectrum): A traditional representation that expresses intensitites in function of mz values for a given precursor. +//! * [Frames](crate::ms_data::Frame): All recorded data from a single TIMS elution (i.e. at one specific retention_time). //! //! ## File formats //! @@ -22,22 +22,11 @@ //! * *.ms2spectrum.parquet mod calibration; -mod domain_converters; +pub mod domain_converters; mod errors; mod file_readers; pub mod io; pub mod ms_data; mod utils; -pub use crate::{ - domain_converters::{ - ConvertableDomain, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, - }, - errors::*, - file_readers::FileReader, - io::writers::mgf::MGFWriter, - ms_data::{ - AcquisitionType, Frame, MSLevel, Precursor, QuadrupoleSettings, - Spectrum, - }, -}; +pub use crate::{errors::*, file_readers::FileReader}; diff --git a/src/main.rs b/src/main.rs index 1eb7242..3bc348a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use std::env; use timsrust::io::writers::mgf::MGFFormat; -use timsrust::{FileReader, MGFWriter, Spectrum}; +use timsrust::{ms_data::Spectrum, FileReader}; fn quick_test() { let args: Vec = env::args().collect(); @@ -25,7 +25,7 @@ fn quick_test() { dda_spectra[spectrum_index].intensities ); // println!("{:?}", dda_spectra[spectrum_index].as_mgf_entry()); - MGFWriter::write_spectra(d_folder_name, &dda_spectra); + // MGFWriter::write_spectra(d_folder_name, &dda_spectra); } fn main() { diff --git a/src/ms_data.rs b/src/ms_data.rs index 9ed5873..460938d 100644 --- a/src/ms_data.rs +++ b/src/ms_data.rs @@ -1,11 +1,13 @@ -pub mod acquisition; -pub mod frames; -pub mod precursors; -pub mod quadrupole; -pub mod spectra; +//! Data structures that represent MS data -pub use acquisition::AcquisitionType; -pub use frames::{Frame, MSLevel}; -pub use precursors::Precursor; -pub use quadrupole::QuadrupoleSettings; -pub use spectra::Spectrum; +mod acquisition; +mod frames; +mod precursors; +mod quadrupole; +mod spectra; + +pub use acquisition::*; +pub use frames::*; +pub use precursors::*; +pub use quadrupole::*; +pub use spectra::*; diff --git a/src/ms_data/spectra.rs b/src/ms_data/spectra.rs index 2ec334e..7828f01 100644 --- a/src/ms_data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -1,9 +1,11 @@ use crate::{ + domain_converters::{ConvertableDomain, Tof2MzConverter}, utils::vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, - ConvertableDomain, Precursor, Tof2MzConverter, }; -pub struct RawSpectrumProcessor { +use super::Precursor; + +pub(crate) struct RawSpectrumProcessor { pub raw_spectrum: RawSpectrum, } @@ -84,7 +86,7 @@ impl RawSpectrumProcessor { } #[derive(Debug, PartialEq, Clone)] -pub enum RawProcessedSpectrumState { +pub(crate) enum RawProcessedSpectrumState { Profile, SmoothedProfile, Centroided, @@ -98,7 +100,7 @@ impl Default for RawProcessedSpectrumState { } #[derive(Debug, PartialEq, Default, Clone)] -pub struct RawSpectrum { +pub(crate) struct RawSpectrum { pub tof_indices: Vec, pub intensities: Vec, pub processed_state: RawProcessedSpectrumState, diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 909cfeb..7c46561 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,6 +1,7 @@ use std::{path::Path, sync::Arc}; use timsrust::{ - AcquisitionType, FileReader, Frame, MSLevel, QuadrupoleSettings, + ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, + FileReader, }; fn get_local_directory() -> &'static Path { diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 373d954..fe149c2 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,5 +1,8 @@ use std::path::Path; -use timsrust::{FileReader, Precursor, Spectrum}; +use timsrust::{ + ms_data::{Precursor, Spectrum}, + FileReader, +}; fn get_local_directory() -> &'static Path { Path::new(std::file!()) From 38418d4940457a9e497dc9b37218b0923b9a54a2 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 15:48:30 +0200 Subject: [PATCH 014/109] CHORE: rustfmt now has width 100 --- rustfmt.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rustfmt.toml b/rustfmt.toml index ec51fcd..b92a31d 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -2,5 +2,5 @@ # Set the maximum line width to 100 characters match_block_trailing_comma = true -max_width = 80 +max_width = 100 newline_style = "Unix" From cbbb37aa68fece31a4314df923ac742aa184b66b Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 15:49:36 +0200 Subject: [PATCH 015/109] CHORE: undo rustfmt --- rustfmt.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rustfmt.toml b/rustfmt.toml index b92a31d..ec51fcd 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -2,5 +2,5 @@ # Set the maximum line width to 100 characters match_block_trailing_comma = true -max_width = 100 +max_width = 80 newline_style = "Unix" From 997385bd06c73a7d2a7209de1721e634cc67f9ab Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 15:51:34 +0200 Subject: [PATCH 016/109] FEAT: new sql reader module --- src/io/readers/common/sql_reader.rs | 38 +++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 src/io/readers/common/sql_reader.rs diff --git a/src/io/readers/common/sql_reader.rs b/src/io/readers/common/sql_reader.rs new file mode 100644 index 0000000..0abe977 --- /dev/null +++ b/src/io/readers/common/sql_reader.rs @@ -0,0 +1,38 @@ +use std::path::{Path, PathBuf}; + +use rusqlite::Connection; + +#[derive(Debug)] +pub struct SqlReader { + path: PathBuf, + connection: Connection, +} + +impl SqlReader { + pub fn open(file_name: impl AsRef) -> Result { + let path = file_name.as_ref().to_path_buf(); + let connection = Connection::open(&path)?; + Ok(Self { path, connection }) + } +} + +pub trait SqlReadable { + fn get_sql_query() -> String; + + fn from_sql_row(row: &rusqlite::Row) -> Self; + + fn from_sql_reader(reader: &SqlReader) -> Result, SqlError> + where + Self: Sized, + { + let query = Self::get_sql_query(); + let mut stmt = reader.connection.prepare(&query)?; + let rows = stmt.query_map([], |row| Ok(Self::from_sql_row(row)))?; + let result = rows.collect::, _>>()?; + Ok(result) + } +} + +#[derive(thiserror::Error, Debug)] +#[error("SqlError: {0}")] +pub struct SqlError(#[from] rusqlite::Error); From 48d18f874c933f3b41a4fa8af95913701bc62d2b Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 15:52:15 +0200 Subject: [PATCH 017/109] FIX: simplfy rt reading of precursors --- src/file_readers/spectrum_readers/dda_reader/precursors.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index bf013b7..c910b8c 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -30,7 +30,7 @@ impl PrecursorReader { PasefFrameMsMsTable::from_sql(&tdf_reader.tdf_sql_reader); let precursor_table: PrecursorTable = PrecursorTable::from_sql(&tdf_reader.tdf_sql_reader); - let retention_times: Vec = tdf_reader.frame_table.rt.clone(); + // let retention_times: Vec = tdf_reader.frame_table.rt.clone(); let collision_energies = tdf_reader .tdf_sql_reader .get_data_from_sql(&select_collision_energy_sql); @@ -41,7 +41,7 @@ impl PrecursorReader { let scan_id: f64 = precursor_table.scan_average[index]; Precursor { mz: precursor_table.mz[index], - rt: retention_times[frame_id], + rt: tdf_reader.rt_converter.convert(frame_id as u32), im: tdf_reader.im_converter.convert(scan_id), charge: precursor_table.charge[index], intensity: precursor_table.intensity[index], From 34f5ae6081ae9bbffb44f5b0a85b3cf3160d782d Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 15:52:48 +0200 Subject: [PATCH 018/109] FEAT: use new sql reader format to read frames --- src/io/readers/common/sql_frames.rs | 82 +++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 src/io/readers/common/sql_frames.rs diff --git a/src/io/readers/common/sql_frames.rs b/src/io/readers/common/sql_frames.rs new file mode 100644 index 0000000..509c2b8 --- /dev/null +++ b/src/io/readers/common/sql_frames.rs @@ -0,0 +1,82 @@ +use super::sql_reader::SqlReadable; + +#[derive(Debug, Default, PartialEq)] +pub struct SqlFrame { + pub id: usize, + pub scan_mode: u8, + pub msms_type: u8, + pub peak_count: u64, + pub rt: f64, + pub scan_count: u64, + pub binary_offset: usize, +} + +impl SqlReadable for SqlFrame { + fn get_sql_query() -> String { + "SELECT Id, ScanMode, MsMsType, NumPeaks, Time, NumScans, TimsId FROM Frames".to_string() + } + + fn from_sql_row(row: &rusqlite::Row) -> Self { + SqlFrame { + id: row.get(0).unwrap_or_default(), + scan_mode: row.get(1).unwrap_or_default(), + msms_type: row.get(2).unwrap_or_default(), + peak_count: row.get(3).unwrap_or_default(), + rt: row.get(4).unwrap_or_default(), + scan_count: row.get(5).unwrap_or_default(), + binary_offset: row.get(6).unwrap_or_default(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::io::readers::common::sql_reader::SqlReader; + + #[test] + fn test_get() { + let reader = + SqlReader::open("tests/test.d/analysis.tdf".to_string()).unwrap(); + let sql_frames = SqlFrame::from_sql_reader(&reader).unwrap(); + let target = [ + SqlFrame { + id: 1, + scan_mode: 8, + msms_type: 0, + peak_count: 10, + rt: 0.1, + scan_count: 4, + binary_offset: 0, + }, + SqlFrame { + id: 2, + scan_mode: 8, + msms_type: 8, + peak_count: 26, + rt: 0.2, + scan_count: 4, + binary_offset: 48, + }, + SqlFrame { + id: 3, + scan_mode: 8, + msms_type: 0, + peak_count: 42, + rt: 0.3, + scan_count: 4, + binary_offset: 130, + }, + SqlFrame { + id: 4, + scan_mode: 8, + msms_type: 8, + peak_count: 58, + rt: 0.4, + scan_count: 4, + binary_offset: 235, + }, + ]; + assert_eq!(sql_frames, target); + } +} From 34f99a85a5f157a036364f499cd647a488dc1749 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 15:54:19 +0200 Subject: [PATCH 019/109] FEAT: update TdfBlob with error handling and option to use non indexed reader --- src/io/readers/common/tdf_blobs.rs | 171 ++++++++++++++++++++++++----- 1 file changed, 143 insertions(+), 28 deletions(-) diff --git a/src/io/readers/common/tdf_blobs.rs b/src/io/readers/common/tdf_blobs.rs index a188a5f..6cf17b2 100644 --- a/src/io/readers/common/tdf_blobs.rs +++ b/src/io/readers/common/tdf_blobs.rs @@ -1,9 +1,11 @@ use memmap2::Mmap; use std::fs::File; use std::io; +use std::path::{Path, PathBuf}; use zstd::decode_all; const U32_SIZE: usize = std::mem::size_of::(); +const HEADER_SIZE: usize = 2; #[derive(Debug, Default)] pub struct TdfBlob { @@ -31,57 +33,170 @@ impl TdfBlob { pub fn len(&self) -> usize { self.bytes.len() / U32_SIZE } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } } #[derive(Debug)] pub struct TdfBlobReader { - file_name: String, - file_offsets: Vec, + path: PathBuf, mmap: Mmap, + global_file_offset: usize, } impl TdfBlobReader { - pub fn new( - file_name: String, - file_offsets: Vec, - ) -> Result { - let file: File = File::open(&file_name)?; + pub fn new(file_name: impl AsRef) -> Result { + let path = file_name.as_ref().to_path_buf(); + let file: File = File::open(&path)?; let mmap: Mmap = unsafe { Mmap::map(&file)? }; Ok(Self { - file_name, - file_offsets, + path, mmap, + global_file_offset: 0, }) } - pub fn get_blob(&self, index: usize) -> TdfBlob { - if index >= self.len() { - return TdfBlob::default(); - } - let offset: usize = self.file_offsets[index as usize]; - let byte_count: u32 = self.get_byte_count(offset); - if byte_count <= 8 { - return TdfBlob::default(); - } - let compressed_bytes: &[u8] = &self.mmap - [(offset + 8) as usize..offset as usize + byte_count as usize]; + pub fn get_blob(&self, offset: usize) -> Result { + let offset = self.get_offset(offset)?; + let byte_count: usize = self.get_byte_count(offset)?; + let compressed_bytes: &[u8] = + self.get_compressed_bytes(offset, byte_count); match decode_all(compressed_bytes) { - Ok(bytes) => TdfBlob { bytes }, - Err(_) => TdfBlob::default(), + Ok(bytes) => Ok(TdfBlob { bytes }), + Err(_) => Err(TdfBlobError::Decompression(self.path.clone())), } } - pub fn get_file_name(self) -> String { - self.file_name + fn get_offset(&self, offset: usize) -> Result { + let offset = self.global_file_offset + offset; + self.check_valid_offset(offset) } - fn get_byte_count(&self, offset: usize) -> u32 { + fn check_valid_offset(&self, offset: usize) -> Result { + if (offset + U32_SIZE) >= self.mmap.len() { + return Err(TdfBlobError::Offset(offset, self.path.clone())); + } + Ok(offset) + } + + fn get_byte_count(&self, offset: usize) -> Result { let raw_byte_count: &[u8] = - &self.mmap[offset as usize..(offset + 4) as usize]; - u32::from_le_bytes(raw_byte_count.try_into().unwrap()) + &self.mmap[offset as usize..(offset + U32_SIZE) as usize]; + let byte_count = + u32::from_le_bytes(raw_byte_count.try_into().unwrap()) as usize; + self.check_valid_byte_count(byte_count, offset) + } + + fn check_valid_byte_count( + &self, + byte_count: usize, + offset: usize, + ) -> Result { + if (byte_count <= (HEADER_SIZE * U32_SIZE)) + || ((offset + byte_count) > self.len()) + { + return Err(TdfBlobError::ByteCount( + byte_count, + offset, + self.path.clone(), + )); + } + Ok(byte_count) + } + + fn get_compressed_bytes(&self, offset: usize, byte_count: usize) -> &[u8] { + &self.mmap[(offset + HEADER_SIZE * U32_SIZE)..offset + byte_count] } pub fn len(&self) -> usize { - self.file_offsets.len() + self.mmap.len() } } + +#[derive(Debug)] +pub struct IndexedTdfBlobReader { + blob_reader: TdfBlobReader, + binary_offsets: Vec, +} + +impl IndexedTdfBlobReader { + pub fn new( + file_name: impl AsRef, + binary_offsets: Vec, + ) -> Result { + Ok(Self { + binary_offsets, + blob_reader: TdfBlobReader::new(file_name)?, + }) + } + + pub fn get_blob(&self, index: usize) -> Result { + self.check_valid_index(index)?; + let offset = self.binary_offsets[index]; + self.blob_reader.get_blob(offset) + } + + fn check_valid_index(&self, index: usize) -> Result { + if index >= self.len() { + return Err(TdfBlobError::Index( + index, + self.blob_reader.path.clone(), + )); + } + Ok(index) + } + + pub fn len(&self) -> usize { + self.binary_offsets.len() + } +} + +pub trait TdfBlobParsable { + fn set_tdf_blob_index(&mut self, index: usize); + + fn update_from_tdf_blob(&mut self, blob: TdfBlob); + + fn update_from_tdf_blob_reader( + &mut self, + bin_file: &IndexedTdfBlobReader, + index: usize, + ) { + let blob = bin_file.get_blob(index).unwrap(); + if !blob.is_empty() { + self.update_from_tdf_blob(blob) + } + } + + fn create_from_tdf_blob_reader( + bin_file: &IndexedTdfBlobReader, + index: usize, + ) -> Self + where + Self: Default, + { + let mut object = Self::default(); + object.set_tdf_blob_index(index); + object.update_from_tdf_blob_reader(bin_file, index); + object + } +} + +// #[derive(thiserror::Error, Debug)] +// #[error("TdfBlobError: {0}")] +// pub struct TdfBlobError(#[from] std::io::Error); + +#[derive(Debug, thiserror::Error)] +pub enum TdfBlobError { + #[error("Cannot read or mmap file: {0}")] + IO(#[from] io::Error), + #[error("Index {0} is invalid for file {1}")] + Index(usize, PathBuf), + #[error("Offset {0} is invalid for file {1}")] + Offset(usize, PathBuf), + #[error("Byte count {0} from offset {1} is invalid for file {2}")] + ByteCount(usize, usize, PathBuf), + #[error("Zstd decompression failed for file {0}")] + Decompression(PathBuf), +} From 4ed45576765805e620d508d819c1da20ea70c4e7 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 15:59:25 +0200 Subject: [PATCH 020/109] FEAT: included sql readers in io::common --- src/io/readers/common.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/io/readers/common.rs b/src/io/readers/common.rs index 0386907..8f6178f 100644 --- a/src/io/readers/common.rs +++ b/src/io/readers/common.rs @@ -1 +1,3 @@ +pub mod sql_frames; +pub mod sql_reader; pub mod tdf_blobs; From 544cf6d24ea5d4a4765b539728a0f0af3a6f1841 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 16:02:55 +0200 Subject: [PATCH 021/109] FEAT: removed unused frame table --- .../common/sql_reader/tables/frames.rs | 31 ------------------- 1 file changed, 31 deletions(-) delete mode 100644 src/file_readers/common/sql_reader/tables/frames.rs diff --git a/src/file_readers/common/sql_reader/tables/frames.rs b/src/file_readers/common/sql_reader/tables/frames.rs deleted file mode 100644 index 56508e2..0000000 --- a/src/file_readers/common/sql_reader/tables/frames.rs +++ /dev/null @@ -1,31 +0,0 @@ -use crate::file_readers::common::sql_reader::{ReadableFromSql, SqlReader}; - -#[derive(Debug)] -pub struct FrameTable { - pub id: Vec, - pub scan_mode: Vec, - pub msms_type: Vec, - pub peak_count: Vec, - pub rt: Vec, - pub scan_count: Vec, - pub offsets: Vec, -} - -impl ReadableFromSql for FrameTable { - fn from_sql(sql_reader: &SqlReader) -> Self { - let table_name: &str = "Frames"; - FrameTable { - id: sql_reader.read_column_from_table("Id", table_name), - scan_mode: sql_reader - .read_column_from_table("ScanMode", table_name), - msms_type: sql_reader - .read_column_from_table("MsMsType", table_name), - peak_count: sql_reader - .read_column_from_table("NumPeaks", table_name), - rt: sql_reader.read_column_from_table("Time", table_name), - scan_count: sql_reader - .read_column_from_table("NumScans", table_name), - offsets: sql_reader.read_column_from_table("TimsId", "Frames"), - } - } -} From d15469b5d777f286a2e0d60ce46b624340efe082 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 16:03:45 +0200 Subject: [PATCH 022/109] CHORE fully deprecated ms_data_blobs module in favor of TdfBlobs --- src/file_readers/common.rs | 1 - src/file_readers/common/ms_data_blobs.rs | 118 ----------------------- 2 files changed, 119 deletions(-) delete mode 100644 src/file_readers/common/ms_data_blobs.rs diff --git a/src/file_readers/common.rs b/src/file_readers/common.rs index 4d34103..2cf18f5 100644 --- a/src/file_readers/common.rs +++ b/src/file_readers/common.rs @@ -1,3 +1,2 @@ -pub mod ms_data_blobs; pub mod parquet_reader; pub mod sql_reader; diff --git a/src/file_readers/common/ms_data_blobs.rs b/src/file_readers/common/ms_data_blobs.rs deleted file mode 100644 index cb33803..0000000 --- a/src/file_readers/common/ms_data_blobs.rs +++ /dev/null @@ -1,118 +0,0 @@ -use crate::io::readers::common::tdf_blobs::{TdfBlob, TdfBlobReader}; - -use crate::ms_data::{Frame, Spectrum}; - -pub trait ReadableFromBinFile { - fn parse_from_ms_data_blob(buffer: TdfBlob, index: usize) -> Self; - - fn read_from_file(bin_file: &TdfBlobReader, index: usize) -> Self - where - Self: Sized, - { - let blob = bin_file.get_blob(index); - Self::parse_from_ms_data_blob(blob, index) - } -} - -impl ReadableFromBinFile for Spectrum { - fn parse_from_ms_data_blob(blob: TdfBlob, index: usize) -> Self { - let mut spectrum: Spectrum = Spectrum::default(); - spectrum.index = index; - if blob.len() == 0 { - return spectrum; - }; - let size: usize = blob.len(); - let mut spectrum_data: Vec = vec![0; size]; - for i in 0..size { - spectrum_data[i] = blob.get(i) - } - let scan_count: usize = blob.len() / 3; - let tof_indices_bytes: &[u32] = - &spectrum_data[..scan_count as usize * 2]; - let intensities_bytes: &[u32] = - &spectrum_data[scan_count as usize * 2..]; - let mz_values: &[f64] = - bytemuck::cast_slice::(tof_indices_bytes); - let intensity_values: &[f32] = - bytemuck::cast_slice::(intensities_bytes); - spectrum.intensities = - intensity_values.iter().map(|&x| x as f64).collect(); - spectrum.mz_values = mz_values.to_vec(); - spectrum - } -} - -impl ReadableFromBinFile for Frame { - fn parse_from_ms_data_blob(blob: TdfBlob, index: usize) -> Self { - let mut frame = Frame::default(); - (frame.scan_offsets, frame.tof_indices, frame.intensities) = - parse_frame(blob); - frame.index = index; - frame - } -} - -pub fn parse_frame(blob: TdfBlob) -> (Vec, Vec, Vec) { - let mut tof_indices: Vec = vec![]; - let mut intensities: Vec = vec![]; - let mut scan_offsets: Vec = vec![]; - if blob.len() != 0 { - let scan_count: usize = blob.get(0) as usize; - let peak_count: usize = (blob.len() - scan_count) / 2; - scan_offsets = read_scan_offsets(scan_count, peak_count, &blob); - intensities = read_intensities(scan_count, peak_count, &blob); - tof_indices = - read_tof_indices(scan_count, peak_count, &blob, &scan_offsets); - } - (scan_offsets, tof_indices, intensities) -} - -fn read_scan_offsets( - scan_count: usize, - peak_count: usize, - blob: &TdfBlob, -) -> Vec { - let mut scan_offsets: Vec = Vec::with_capacity(scan_count + 1); - scan_offsets.push(0); - for scan_index in 0..scan_count - 1 { - let index = scan_index + 1; - let scan_size: usize = (blob.get(index) / 2) as usize; - scan_offsets.push(scan_offsets[scan_index] + scan_size); - } - scan_offsets.push(peak_count); - scan_offsets -} - -fn read_intensities( - scan_count: usize, - peak_count: usize, - blob: &TdfBlob, -) -> Vec { - let mut intensities: Vec = Vec::with_capacity(peak_count); - for peak_index in 0..peak_count { - let index: usize = scan_count + 1 + 2 * peak_index; - intensities.push(blob.get(index)); - } - intensities -} - -fn read_tof_indices( - scan_count: usize, - peak_count: usize, - blob: &TdfBlob, - scan_offsets: &Vec, -) -> Vec { - let mut tof_indices: Vec = Vec::with_capacity(peak_count); - for scan_index in 0..scan_count { - let start_offset: usize = scan_offsets[scan_index]; - let end_offset: usize = scan_offsets[scan_index + 1]; - let mut current_sum: u32 = 0; - for peak_index in start_offset..end_offset { - let index = scan_count + 2 * peak_index; - let tof_index: u32 = blob.get(index); - current_sum += tof_index; - tof_indices.push(current_sum - 1); - } - } - tof_indices -} From 36d449e0dc3d7a7c453981f4926b75b51f263c1d Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 16:04:07 +0200 Subject: [PATCH 023/109] FEAT: created new frame_reader module --- src/io/readers/frame_reader.rs | 153 +++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 src/io/readers/frame_reader.rs diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs new file mode 100644 index 0000000..3ded68b --- /dev/null +++ b/src/io/readers/frame_reader.rs @@ -0,0 +1,153 @@ +use std::path::{Path, PathBuf}; + +use rayon::iter::{IntoParallelIterator, ParallelIterator}; + +use crate::ms_data::{AcquisitionType, Frame, MSLevel}; + +use super::common::{ + sql_frames::SqlFrame, + sql_reader::{SqlReadable, SqlReader}, + tdf_blobs::{TdfBlob, TdfBlobReader}, +}; + +#[derive(Debug)] +pub struct FrameReader { + path: PathBuf, + tdf_bin_reader: TdfBlobReader, + pub sql_frames: Vec, + acquisition: AcquisitionType, +} + +impl FrameReader { + pub fn new(path: impl AsRef) -> Self { + let sql_path = path.as_ref().join("analysis.tdf"); + let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); + let sql_frames = SqlFrame::from_sql_reader(&tdf_sql_reader).unwrap(); + let bin_path = path.as_ref().join("analysis.tdf_bin"); + let tdf_bin_reader: TdfBlobReader = + TdfBlobReader::new(bin_path).unwrap(); + let acquisition = if sql_frames.iter().any(|x| x.msms_type == 8) { + AcquisitionType::DDAPASEF + } else if sql_frames.iter().any(|x| x.msms_type == 9) { + AcquisitionType::DDAPASEF + } else { + AcquisitionType::Unknown + }; + Self { + path: path.as_ref().to_path_buf(), + tdf_bin_reader, + sql_frames, + acquisition, + } + } + + pub fn collect bool>(&self, filter: F) -> Vec { + // let selection: Vec = (0..self.len()) + // .filter(|x| filter(&self.sql_frames[*x])) + // .collect(); + // selection.into_iter().map(|x| self.get(x)).collect() + let selection: Vec> = (0..self.len()) + .map(|x| { + if filter(&self.sql_frames[x]) { + Some(x) + } else { + None + } + }) + .collect(); + selection + .into_par_iter() + .map(|x| match x { + Some(y) => self.get(y), + None => Frame::default(), + }) + .collect() + } + + pub fn get(&self, index: usize) -> Frame { + let mut frame: Frame = Frame::default(); + let sql_frame = &self.sql_frames[index]; + let blob = self + .tdf_bin_reader + .get_blob(sql_frame.binary_offset) + .unwrap(); + let scan_count: usize = blob.get(0) as usize; + let peak_count: usize = (blob.len() - scan_count) / 2; + frame.scan_offsets = read_scan_offsets(scan_count, peak_count, &blob); + frame.intensities = read_intensities(scan_count, peak_count, &blob); + frame.tof_indices = read_tof_indices( + scan_count, + peak_count, + &blob, + &frame.scan_offsets, + ); + frame.ms_level = match sql_frame.msms_type { + 0 => MSLevel::MS1, + 8 => MSLevel::MS2, + 9 => MSLevel::MS2, + _ => MSLevel::Unknown, + }; + frame.index = sql_frame.id; + frame.rt = sql_frame.rt; + frame.acquisition_type = self.acquisition; + frame + } + + pub fn get_acquisition(&self) -> AcquisitionType { + self.acquisition + } + + pub fn len(&self) -> usize { + self.sql_frames.len() + } +} + +fn read_scan_offsets( + scan_count: usize, + peak_count: usize, + blob: &TdfBlob, +) -> Vec { + let mut scan_offsets: Vec = Vec::with_capacity(scan_count + 1); + scan_offsets.push(0); + for scan_index in 0..scan_count - 1 { + let index = scan_index + 1; + let scan_size: usize = (blob.get(index) / 2) as usize; + scan_offsets.push(scan_offsets[scan_index] + scan_size); + } + scan_offsets.push(peak_count); + scan_offsets +} + +fn read_intensities( + scan_count: usize, + peak_count: usize, + blob: &TdfBlob, +) -> Vec { + let mut intensities: Vec = Vec::with_capacity(peak_count); + for peak_index in 0..peak_count { + let index: usize = scan_count + 1 + 2 * peak_index; + intensities.push(blob.get(index)); + } + intensities +} + +fn read_tof_indices( + scan_count: usize, + peak_count: usize, + blob: &TdfBlob, + scan_offsets: &Vec, +) -> Vec { + let mut tof_indices: Vec = Vec::with_capacity(peak_count); + for scan_index in 0..scan_count { + let start_offset: usize = scan_offsets[scan_index]; + let end_offset: usize = scan_offsets[scan_index + 1]; + let mut current_sum: u32 = 0; + for peak_index in start_offset..end_offset { + let index = scan_count + 2 * peak_index; + let tof_index: u32 = blob.get(index); + current_sum += tof_index; + tof_indices.push(current_sum - 1); + } + } + tof_indices +} From 5c062e1477bcfb0310633ce0eb48c5778a94ea18 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 16:04:37 +0200 Subject: [PATCH 024/109] CHORE: remove unused sql frame table --- src/file_readers/common/sql_reader/tables.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/file_readers/common/sql_reader/tables.rs b/src/file_readers/common/sql_reader/tables.rs index 0a10b58..7b2d540 100644 --- a/src/file_readers/common/sql_reader/tables.rs +++ b/src/file_readers/common/sql_reader/tables.rs @@ -1,11 +1,9 @@ mod dia_frames_info; mod dia_frames_msms; -mod frames; mod pasef_frame_msms; mod precursors; pub use dia_frames_info::DiaFramesInfoTable; pub use dia_frames_msms::DiaFramesMsMsTable; -pub use frames::FrameTable; pub use pasef_frame_msms::PasefFrameMsMsTable; pub use precursors::PrecursorTable; From c2aada8b34b89aefaff29d23cebbb9c52edac1bd Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 16:05:02 +0200 Subject: [PATCH 025/109] FEAT: exposed frame reader to crate --- src/io/readers.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/io/readers.rs b/src/io/readers.rs index 34994bf..1db12a7 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -1 +1,2 @@ -pub mod common; +pub(crate) mod common; +pub mod frame_reader; From 02bc4e5101d33e79f0abfb827ae7f26cfd7b0514 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Apr 2024 16:06:12 +0200 Subject: [PATCH 026/109] FEAT: updated TdfReader to use frameReader instead --- src/file_readers/frame_readers/tdf_reader.rs | 104 +++--------------- .../spectrum_readers/mini_tdf_reader.rs | 38 +++++-- 2 files changed, 46 insertions(+), 96 deletions(-) diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index ec5bdfb..acd936d 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -1,34 +1,20 @@ -use { - crate::{ - domain_converters::{ - ConvertableDomain, Frame2RtConverter, Scan2ImConverter, - Tof2MzConverter, - }, - file_readers::{ - common::{ - ms_data_blobs::ReadableFromBinFile, - sql_reader::{FrameTable, ReadableFromSql, SqlReader}, - }, - ReadableFrames, - }, - io::readers::common::tdf_blobs::TdfBlobReader, - ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, +use crate::{ + domain_converters::{Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}, + file_readers::{ + common::sql_reader::{ReadableFromSql, SqlReader}, + ReadableFrames, }, - rayon::prelude::*, - std::{path::Path, sync::Arc}, + io::readers::frame_reader::FrameReader, + ms_data::Frame, }; #[derive(Debug)] pub struct TDFReader { - pub path: String, + frame_reader: FrameReader, pub tdf_sql_reader: SqlReader, - tdf_bin_reader: TdfBlobReader, pub rt_converter: Frame2RtConverter, pub im_converter: Scan2ImConverter, pub mz_converter: Tof2MzConverter, - pub frame_table: FrameTable, - pub acquisition: AcquisitionType, - ms_levels: Vec, } impl TDFReader { @@ -36,89 +22,33 @@ impl TDFReader { let tdf_sql_reader: SqlReader = SqlReader { path: String::from(path), }; - let frame_table: FrameTable = FrameTable::from_sql(&tdf_sql_reader); - let file_name: String = Path::new(&path) - .join("analysis.tdf_bin") - .to_string_lossy() - .to_string(); - let tdf_bin_reader: TdfBlobReader = TdfBlobReader::new( - String::from(&file_name), - frame_table.offsets.iter().map(|x| *x as usize).collect(), - ) - .unwrap(); - let ms_levels: Vec = frame_table - .msms_type - .iter() - .map(|msms_type| match msms_type { - 0 => MSLevel::MS1, - 8 => MSLevel::MS2, - 9 => MSLevel::MS2, - _ => MSLevel::Unknown, - }) - .collect(); - let mut acquisition = AcquisitionType::Unknown; - if frame_table.msms_type.contains(&8) { - acquisition = AcquisitionType::DDAPASEF; - } else if frame_table.msms_type.contains(&9) { - acquisition = AcquisitionType::DIAPASEF; - } + let frame_reader: FrameReader = FrameReader::new(&path); Self { - path: path.to_string(), - tdf_bin_reader: tdf_bin_reader, - rt_converter: Self::get_rt_converter(&frame_table), + rt_converter: Frame2RtConverter::from_values( + frame_reader.sql_frames.iter().map(|x| x.rt).collect(), + ), im_converter: Scan2ImConverter::from_sql(&tdf_sql_reader), mz_converter: Tof2MzConverter::from_sql(&tdf_sql_reader), - frame_table: frame_table, tdf_sql_reader: tdf_sql_reader, - ms_levels: ms_levels, - acquisition: acquisition, + frame_reader: frame_reader, } } - - fn get_rt_converter(frame_table: &FrameTable) -> Frame2RtConverter { - let retention_times: Vec = frame_table.rt.clone(); - Frame2RtConverter::from_values(retention_times) - } } impl ReadableFrames for TDFReader { fn read_single_frame(&self, index: usize) -> Frame { - let mut frame: Frame = - Frame::read_from_file(&self.tdf_bin_reader, index); - frame.rt = self.rt_converter.convert(index as u32); - frame.index = self.frame_table.id[index]; - frame.ms_level = self.ms_levels[index]; - frame.acquisition_type = self.acquisition; - if frame.ms_level == MSLevel::MS2 { - frame.quadrupole_settings = Arc::new(QuadrupoleSettings::default()); - } - frame + self.frame_reader.get(index) } fn read_all_frames(&self) -> Vec { - (0..self.tdf_bin_reader.len()) - .into_par_iter() - .map(|index| self.read_single_frame(index)) - .collect() + self.frame_reader.collect(|_| true) } fn read_all_ms1_frames(&self) -> Vec { - (0..self.tdf_bin_reader.len()) - .into_par_iter() - .map(|index| match self.ms_levels[index] { - MSLevel::MS1 => self.read_single_frame(index), - _ => Frame::default(), - }) - .collect() + self.frame_reader.collect(|x| x.msms_type == 0) } fn read_all_ms2_frames(&self) -> Vec { - (0..self.tdf_bin_reader.len()) - .into_par_iter() - .map(|index| match self.ms_levels[index] { - MSLevel::MS2 => self.read_single_frame(index), - _ => Frame::default(), - }) - .collect() + self.frame_reader.collect(|x| x.msms_type != 0) } } diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index db9cca7..efbca4d 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -1,16 +1,14 @@ use crate::{ file_readers::FileFormatError, - io::readers::common::tdf_blobs::TdfBlobReader, + io::readers::common::tdf_blobs::{ + IndexedTdfBlobReader, TdfBlob, TdfBlobParsable, + }, }; use std::fs; use { crate::{ file_readers::{ - common::{ - ms_data_blobs::ReadableFromBinFile, - parquet_reader::read_parquet_precursors, - }, - ReadableSpectra, + common::parquet_reader::read_parquet_precursors, ReadableSpectra, }, ms_data::{Precursor, Spectrum}, }, @@ -24,7 +22,7 @@ pub struct MiniTDFReader { parquet_file_name: String, precursors: Vec, offsets: Vec, - frame_reader: Option, + frame_reader: Option, } fn find_ms2spectrum_file( @@ -100,7 +98,7 @@ impl MiniTDFReader { path.push(ms2_bin_file); let file_name: String = path.to_string_lossy().into_owned(); self.frame_reader = Some( - TdfBlobReader::new( + IndexedTdfBlobReader::new( String::from(&file_name), self.offsets.iter().map(|x| *x as usize).collect(), ) @@ -111,7 +109,7 @@ impl MiniTDFReader { impl ReadableSpectra for MiniTDFReader { fn read_single_spectrum(&self, index: usize) -> Spectrum { - let mut spectrum: Spectrum = Spectrum::read_from_file( + let mut spectrum: Spectrum = Spectrum::create_from_tdf_blob_reader( &self.frame_reader.as_ref().unwrap(), index, ); @@ -134,3 +132,25 @@ impl ReadableSpectra for MiniTDFReader { spectra } } + +impl TdfBlobParsable for Spectrum { + fn set_tdf_blob_index(&mut self, index: usize) { + self.index = index; + } + + fn update_from_tdf_blob(&mut self, blob: TdfBlob) { + let size: usize = blob.len(); + let spectrum_data: Vec = (0..size).map(|i| blob.get(i)).collect(); + let scan_count: usize = blob.len() / 3; + let tof_indices_bytes: &[u32] = + &spectrum_data[..scan_count as usize * 2]; + let intensities_bytes: &[u32] = + &spectrum_data[scan_count as usize * 2..]; + let mz_values: &[f64] = + bytemuck::cast_slice::(tof_indices_bytes); + let intensity_values: &[f32] = + bytemuck::cast_slice::(intensities_bytes); + self.intensities = intensity_values.iter().map(|&x| x as f64).collect(); + self.mz_values = mz_values.to_vec(); + } +} From 6120d31bd65a2f4fd90551f2a63f13283fce95bb Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 30 Apr 2024 11:38:02 +0200 Subject: [PATCH 027/109] CHORE: updated benchmarks --- benches/speed_performance.rs | 64 ++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index 40ef114..579b3e1 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -1,6 +1,13 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use timsrust::FileReader; +const DDA_TEST: &str = + "/mnt/c/Users/Sander.Willems/Documents/data/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/"; +const DIA_TEST: &str = + "/mnt/c/Users/Sander.Willems/Documents/data/20230505_TIMS05_PaSk_SA_HeLa_6min_diaP_8scans_S1-D3_1_2329.d/"; +const SYP_TEST: &str = + "/mnt/c/Users/Sander.Willems/Documents/data/20230505_TIMS05_PaSk_SA_HeLa_6min_syP_5scans_30Da_S1-D4_1_2330.d/"; + fn read_all_frames(file_reader: &FileReader) { file_reader.read_all_frames(); } @@ -17,27 +24,70 @@ fn read_all_spectra(file_reader: &FileReader) { file_reader.read_all_spectra(); } -fn criterion_benchmark(c: &mut Criterion) { +fn criterion_benchmark_dda(c: &mut Criterion) { // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); - let d_folder_name: &str = "/home/sander/data/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/"; + let d_folder_name: &str = DDA_TEST; let file_reader: FileReader = FileReader::new(d_folder_name.to_string()).unwrap(); - group.bench_function("read_all_frames 6m dda", |b| { + group.bench_function("DDA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&file_reader))) }); - group.bench_function("read_all_ms1_frames 6m dda", |b| { + group.bench_function("DDA read_all_ms1_frames 6m", |b| { b.iter(|| read_all_ms1_frames(black_box(&file_reader))) }); - group.bench_function("read_all_ms2_frames 6m dda", |b| { + group.bench_function("DDA read_all_ms2_frames 6m", |b| { b.iter(|| read_all_ms2_frames(black_box(&file_reader))) }); - group.bench_function("read_all_spectra 6m dda", |b| { + group.bench_function("DDA read_all_spectra 6m", |b| { b.iter(|| read_all_spectra(black_box(&file_reader))) }); group.finish(); } -criterion_group!(benches, criterion_benchmark); +fn criterion_benchmark_dia(c: &mut Criterion) { + // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); + let mut group = c.benchmark_group("sample-size-example"); + group.significance_level(0.001).sample_size(10); + let d_folder_name: &str = DIA_TEST; + let file_reader: FileReader = + FileReader::new(d_folder_name.to_string()).unwrap(); + group.bench_function("DIA read_all_frames 6m", |b| { + b.iter(|| read_all_frames(black_box(&file_reader))) + }); + group.bench_function("DIA read_all_ms1_frames 6m", |b| { + b.iter(|| read_all_ms1_frames(black_box(&file_reader))) + }); + group.bench_function("DIA read_all_ms2_frames 6m", |b| { + b.iter(|| read_all_ms2_frames(black_box(&file_reader))) + }); + group.finish(); +} + +fn criterion_benchmark_syp(c: &mut Criterion) { + // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); + let mut group = c.benchmark_group("sample-size-example"); + group.significance_level(0.001).sample_size(10); + let d_folder_name: &str = SYP_TEST; + let file_reader: FileReader = + FileReader::new(d_folder_name.to_string()).unwrap(); + group.bench_function("SYP read_all_frames 6m", |b| { + b.iter(|| read_all_frames(black_box(&file_reader))) + }); + group.bench_function("SYP read_all_ms1_frames 6m", |b| { + b.iter(|| read_all_ms1_frames(black_box(&file_reader))) + }); + group.bench_function("SYP read_all_ms2_frames 6m", |b| { + b.iter(|| read_all_ms2_frames(black_box(&file_reader))) + }); + group.finish(); +} + +criterion_group!( + benches, + criterion_benchmark_dda, + // criterion_benchmark_dia, + // criterion_benchmark_syp +); criterion_main!(benches); From 3032d6d329d5ef9b9dd14b742f302b2d18134009 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 13:22:14 +0200 Subject: [PATCH 028/109] DOCS: typo --- src/domain_converters.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/domain_converters.rs b/src/domain_converters.rs index cda5649..11c1387 100644 --- a/src/domain_converters.rs +++ b/src/domain_converters.rs @@ -7,7 +7,7 @@ pub use frame_to_rt::Frame2RtConverter; pub use scan_to_im::Scan2ImConverter; pub use tof_to_mz::Tof2MzConverter; -/// Convert from one domain (e.g. Time of Flight) to a another (m/z). +/// Convert from one domain (e.g. Time of Flight) to another (m/z). pub trait ConvertableDomain { fn convert + Copy>(&self, value: T) -> f64; } From abf3744e695659898c084a79498372949674795e Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 13:23:23 +0200 Subject: [PATCH 029/109] FIX: reading of acquisition software --- .../common/sql_reader/metadata.rs | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/file_readers/common/sql_reader/metadata.rs b/src/file_readers/common/sql_reader/metadata.rs index 047d3ba..558bd21 100644 --- a/src/file_readers/common/sql_reader/metadata.rs +++ b/src/file_readers/common/sql_reader/metadata.rs @@ -4,6 +4,19 @@ use crate::domain_converters::{Scan2ImConverter, Tof2MzConverter}; use super::{get_sql_connection, ReadableFromSql, SqlReader}; +const OTOF_CONTROL: &str = "Bruker otofControl"; + +fn read_software(connection: &Connection) -> String { + let software: String = connection + .query_row( + "SELECT Value FROM GlobalMetadata WHERE Key = 'AcquisitionSoftware'", + [], + |row| row.get(0), + ) + .unwrap(); + software +} + fn read_tof_max_index(connection: &Connection) -> u32 { let tof_max_index_string: String = connection .query_row( @@ -12,7 +25,7 @@ fn read_tof_max_index(connection: &Connection) -> u32 { |row| row.get(0), ) .unwrap(); - let tof_max_index: u32 = tof_max_index_string.parse().unwrap(); + let mut tof_max_index: u32 = tof_max_index_string.parse().unwrap(); tof_max_index } @@ -24,7 +37,10 @@ fn read_mz_max_value(connection: &Connection) -> f64 { |row| row.get(0), ) .unwrap(); - let mz_max_value: f64 = mz_max_value_string.parse().unwrap(); + let mut mz_max_value: f64 = mz_max_value_string.parse().unwrap(); + if read_software(connection) == OTOF_CONTROL { + mz_max_value += 5.0; + } mz_max_value } @@ -36,7 +52,10 @@ fn read_mz_min_value(connection: &Connection) -> f64 { |row| row.get(0), ) .unwrap(); - let mz_min_value: f64 = mz_min_value_string.parse().unwrap(); + let mut mz_min_value: f64 = mz_min_value_string.parse().unwrap(); + if read_software(connection) == OTOF_CONTROL { + mz_min_value -= 5.0; + } mz_min_value } From e2a145fafcc9ba920a6da52358853ef3dfa19495 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 13:25:35 +0200 Subject: [PATCH 030/109] FEAT: cleaner reading of frames --- .../spectrum_readers/dda_reader.rs | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index 66d933c..81796d4 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -49,23 +49,27 @@ impl DDASpectrumReader { let mut tof_indices: Vec = vec![]; let mut intensities: Vec = vec![]; for &index in selection.iter() { - let frame: usize = + let frame_index: usize = self.precursor_reader.pasef_frames.frame[index] - 1; - if self.ms2_frames[frame].intensities.len() == 0 { + // let frame: &Frame = &self.ms2_frames[frame_index]; + let frame: &Frame = &self + .ms2_frames + .iter() + .find(|&x| x.index == frame_index + 1) + .unwrap(); + if frame.intensities.len() == 0 { continue; } let scan_start: usize = self.precursor_reader.pasef_frames.scan_start[index]; let scan_end: usize = self.precursor_reader.pasef_frames.scan_end[index]; - let offset_start: usize = - self.ms2_frames[frame].scan_offsets[scan_start] as usize; - let offset_end: usize = - self.ms2_frames[frame].scan_offsets[scan_end] as usize; + let offset_start: usize = frame.scan_offsets[scan_start] as usize; + let offset_end: usize = frame.scan_offsets[scan_end] as usize; let tof_selection: &[u32] = - &self.ms2_frames[frame].tof_indices[offset_start..offset_end]; + &frame.tof_indices[offset_start..offset_end]; let intensity_selection: &[u32] = - &self.ms2_frames[frame].intensities[offset_start..offset_end]; + &frame.intensities[offset_start..offset_end]; tof_indices.extend(tof_selection); intensities.extend(intensity_selection); } From d4de769d56af2ea23c0d20123b74e868ce794cb2 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 13:26:04 +0200 Subject: [PATCH 031/109] FEAT: using iterator to read frames --- src/file_readers/frame_readers/tdf_reader.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index acd936d..e745e31 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -7,6 +7,7 @@ use crate::{ io::readers::frame_reader::FrameReader, ms_data::Frame, }; +use rayon::iter::ParallelIterator; #[derive(Debug)] pub struct TDFReader { @@ -41,14 +42,18 @@ impl ReadableFrames for TDFReader { } fn read_all_frames(&self) -> Vec { - self.frame_reader.collect(|_| true) + self.frame_reader.parallel_filter(|_| true).collect() } fn read_all_ms1_frames(&self) -> Vec { - self.frame_reader.collect(|x| x.msms_type == 0) + self.frame_reader + .parallel_filter(|x| x.msms_type == 0) + .collect() } fn read_all_ms2_frames(&self) -> Vec { - self.frame_reader.collect(|x| x.msms_type != 0) + self.frame_reader + .parallel_filter(|x| x.msms_type != 0) + .collect() } } From ded22c1ad79138c9baa79df89d1e44b9f51e2c21 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 13:27:19 +0200 Subject: [PATCH 032/109] FEAT: updated frame reader --- src/io/readers/frame_reader.rs | 58 ++++++++++++++++++++++------------ tests/frame_readers.rs | 47 +++++++++++++++++++-------- tests/test.d/analysis.tdf2 | 0 3 files changed, 72 insertions(+), 33 deletions(-) create mode 100644 tests/test.d/analysis.tdf2 diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 3ded68b..9d91d5e 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -41,29 +41,47 @@ impl FrameReader { } } - pub fn collect bool>(&self, filter: F) -> Vec { - // let selection: Vec = (0..self.len()) - // .filter(|x| filter(&self.sql_frames[*x])) - // .collect(); - // selection.into_iter().map(|x| self.get(x)).collect() - let selection: Vec> = (0..self.len()) - .map(|x| { - if filter(&self.sql_frames[x]) { - Some(x) - } else { - None - } - }) - .collect(); - selection + pub fn parallel_filter<'a, F: Fn(&SqlFrame) -> bool + Sync + Send + 'a>( + &'a self, + predicate: F, + ) -> impl ParallelIterator + 'a { + (0..self.len()) .into_par_iter() - .map(|x| match x { - Some(y) => self.get(y), - None => Frame::default(), - }) - .collect() + .filter(move |x| predicate(&self.sql_frames[*x])) + .map(move |x| self.get(x)) + // (0..self.len()).into_par_iter().map(move |x| { + // if predicate(&self.sql_frames[x]) { + // self.get(x) + // } else { + // Frame::default() + // } + // }) } + // pub fn parallel_filter2< + // 'a, + // T: Default + Sync + Send, + // Y: Default, + // F: Fn(&Y) -> bool + Sync + Send + 'a, + // >( + // &'a self, + // predicate: F, + // ) -> impl ParallelIterator + 'a { + // (0..self.len()) + // .into_par_iter() + // .filter(move |x| predicate(&Y::default())) + // .map(move |x| T::default()) + // // (0..self.len()).into_par_iter().map(move |x| { + // // if predicate(&Y::default()) { + // // // self.get(x) + // // T::default() + // // } else { + // // // Frame::default() + // // T::default() + // // } + // // }) + // } + pub fn get(&self, index: usize) -> Frame { let mut frame: Frame = Frame::default(); let sql_frame = &self.sql_frames[index]; diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 7c46561..2d23d65 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -11,7 +11,7 @@ fn get_local_directory() -> &'static Path { } #[test] -fn tdf_reader_frames() { +fn tdf_reader_frames1() { let file_name = "test.d"; let file_path = get_local_directory() .join(file_name) @@ -19,7 +19,7 @@ fn tdf_reader_frames() { .unwrap() .to_string(); let frames: Vec = - FileReader::new(file_path).unwrap().read_all_frames(); + FileReader::new(&file_path).unwrap().read_all_ms1_frames(); let expected: Vec = vec![ Frame { scan_offsets: vec![0, 1, 3, 6, 10], @@ -31,16 +31,7 @@ fn tdf_reader_frames() { quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, }, - Frame { - scan_offsets: vec![0, 5, 11, 18, 26], - tof_indices: (10..36).collect(), - intensities: (10..36).map(|x| (x + 1) * 2).collect(), - index: 2, - rt: 0.2, - ms_level: MSLevel::MS2, - quadrupole_settings: Arc::new(QuadrupoleSettings::default()), - acquisition_type: AcquisitionType::DDAPASEF, - }, + // Frame::default(), Frame { scan_offsets: vec![0, 9, 19, 30, 42], tof_indices: (36..78).collect(), @@ -51,6 +42,36 @@ fn tdf_reader_frames() { quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, }, + // Frame::default(), + ]; + for i in 0..expected.len() { + assert_eq!(&frames[i], &expected[i]) + } +} + +#[test] +fn tdf_reader_frames2() { + let file_name = "test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + let frames: Vec = + FileReader::new(&file_path).unwrap().read_all_ms2_frames(); + let expected: Vec = vec![ + // Frame::default(), + Frame { + scan_offsets: vec![0, 5, 11, 18, 26], + tof_indices: (10..36).collect(), + intensities: (10..36).map(|x| (x + 1) * 2).collect(), + index: 2, + rt: 0.2, + ms_level: MSLevel::MS2, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, + }, + // Frame::default(), Frame { scan_offsets: vec![0, 13, 27, 42, 58], tof_indices: (78..136).collect(), @@ -62,7 +83,7 @@ fn tdf_reader_frames() { acquisition_type: AcquisitionType::DDAPASEF, }, ]; - for i in 0..frames.len() { + for i in 0..expected.len() { assert_eq!(&frames[i], &expected[i]) } } diff --git a/tests/test.d/analysis.tdf2 b/tests/test.d/analysis.tdf2 new file mode 100644 index 0000000..e69de29 From 9e4f050536bb93c539b6c1984d1eaf7a0470009a Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 13:57:02 +0200 Subject: [PATCH 033/109] FEAT: github workflow update --- .github/workflows/rust.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 297672f..9abeb7f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -12,9 +12,7 @@ env: jobs: build: - runs-on: ubuntu-latest - steps: - uses: actions/checkout@v3 - name: Build From 01555f9916285c05aa16a9f5bbaf273ae2570034 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 15:04:20 +0200 Subject: [PATCH 034/109] FEAT: added intensity factors to frames --- src/io/readers/frame_reader.rs | 38 ++++++---------------------------- src/ms_data/frames.rs | 1 + tests/frame_readers.rs | 4 ++++ 3 files changed, 11 insertions(+), 32 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 9d91d5e..cc38df5 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -14,7 +14,7 @@ use super::common::{ pub struct FrameReader { path: PathBuf, tdf_bin_reader: TdfBlobReader, - pub sql_frames: Vec, + sql_frames: Vec, acquisition: AcquisitionType, } @@ -49,39 +49,8 @@ impl FrameReader { .into_par_iter() .filter(move |x| predicate(&self.sql_frames[*x])) .map(move |x| self.get(x)) - // (0..self.len()).into_par_iter().map(move |x| { - // if predicate(&self.sql_frames[x]) { - // self.get(x) - // } else { - // Frame::default() - // } - // }) } - // pub fn parallel_filter2< - // 'a, - // T: Default + Sync + Send, - // Y: Default, - // F: Fn(&Y) -> bool + Sync + Send + 'a, - // >( - // &'a self, - // predicate: F, - // ) -> impl ParallelIterator + 'a { - // (0..self.len()) - // .into_par_iter() - // .filter(move |x| predicate(&Y::default())) - // .map(move |x| T::default()) - // // (0..self.len()).into_par_iter().map(move |x| { - // // if predicate(&Y::default()) { - // // // self.get(x) - // // T::default() - // // } else { - // // // Frame::default() - // // T::default() - // // } - // // }) - // } - pub fn get(&self, index: usize) -> Frame { let mut frame: Frame = Frame::default(); let sql_frame = &self.sql_frames[index]; @@ -108,6 +77,7 @@ impl FrameReader { frame.index = sql_frame.id; frame.rt = sql_frame.rt; frame.acquisition_type = self.acquisition; + frame.intensity_correction_factor = 1.0 / sql_frame.accumulation_time; frame } @@ -118,6 +88,10 @@ impl FrameReader { pub fn len(&self) -> usize { self.sql_frames.len() } + + pub fn get_path(&self) -> PathBuf { + self.path.clone() + } } fn read_scan_offsets( diff --git a/src/ms_data/frames.rs b/src/ms_data/frames.rs index d62e507..212df83 100644 --- a/src/ms_data/frames.rs +++ b/src/ms_data/frames.rs @@ -12,6 +12,7 @@ pub struct Frame { pub acquisition_type: AcquisitionType, pub ms_level: MSLevel, pub quadrupole_settings: Arc, + pub intensity_correction_factor: f64, } /// The MS level used. diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 2d23d65..28bcff8 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -30,6 +30,7 @@ fn tdf_reader_frames1() { ms_level: MSLevel::MS1, quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, + intensity_correction_factor: 1.0 / 100.0, }, // Frame::default(), Frame { @@ -41,6 +42,7 @@ fn tdf_reader_frames1() { ms_level: MSLevel::MS1, quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, + intensity_correction_factor: 1.0 / 100.0, }, // Frame::default(), ]; @@ -70,6 +72,7 @@ fn tdf_reader_frames2() { ms_level: MSLevel::MS2, quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, + intensity_correction_factor: 1.0 / 100.0, }, // Frame::default(), Frame { @@ -81,6 +84,7 @@ fn tdf_reader_frames2() { ms_level: MSLevel::MS2, quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, + intensity_correction_factor: 1.0 / 100.0, }, ]; for i in 0..expected.len() { From 1f221c3e50a2957155839394a956d4dcfb181a2b Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 15:04:47 +0200 Subject: [PATCH 035/109] FEAT: added rt converter to tdf reader and metada --- src/file_readers/common/sql_reader/metadata.rs | 14 +++++++++++--- src/file_readers/frame_readers/tdf_reader.rs | 4 +--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/file_readers/common/sql_reader/metadata.rs b/src/file_readers/common/sql_reader/metadata.rs index 558bd21..a398f13 100644 --- a/src/file_readers/common/sql_reader/metadata.rs +++ b/src/file_readers/common/sql_reader/metadata.rs @@ -1,6 +1,8 @@ use rusqlite::{Connection, Statement}; -use crate::domain_converters::{Scan2ImConverter, Tof2MzConverter}; +use crate::domain_converters::{ + Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, +}; use super::{get_sql_connection, ReadableFromSql, SqlReader}; @@ -96,13 +98,19 @@ impl SqlReader { impl ReadableFromSql for Tof2MzConverter { fn from_sql(sql_reader: &SqlReader) -> Self { let (tof_max_index, mz_min, mz_max) = sql_reader.read_mz_information(); - Tof2MzConverter::from_boundaries(mz_min, mz_max, tof_max_index) + Self::from_boundaries(mz_min, mz_max, tof_max_index) } } impl ReadableFromSql for Scan2ImConverter { fn from_sql(sql_reader: &SqlReader) -> Self { let (scan_max_index, im_min, im_max) = sql_reader.read_im_information(); - Scan2ImConverter::from_boundaries(im_min, im_max, scan_max_index) + Self::from_boundaries(im_min, im_max, scan_max_index) + } +} + +impl ReadableFromSql for Frame2RtConverter { + fn from_sql(sql_reader: &SqlReader) -> Self { + Self::from_values(sql_reader.read_column_from_table("Time", "Frames")) } } diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index e745e31..5292995 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -25,9 +25,7 @@ impl TDFReader { }; let frame_reader: FrameReader = FrameReader::new(&path); Self { - rt_converter: Frame2RtConverter::from_values( - frame_reader.sql_frames.iter().map(|x| x.rt).collect(), - ), + rt_converter: Frame2RtConverter::from_sql(&tdf_sql_reader), im_converter: Scan2ImConverter::from_sql(&tdf_sql_reader), mz_converter: Tof2MzConverter::from_sql(&tdf_sql_reader), tdf_sql_reader: tdf_sql_reader, From 22448c075eab142e075502179b913259f3199526 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 15:05:13 +0200 Subject: [PATCH 036/109] CHORE: added type annotations to tdfblobs --- src/io/readers/common/tdf_blobs.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/io/readers/common/tdf_blobs.rs b/src/io/readers/common/tdf_blobs.rs index 6cf17b2..71b1b5c 100644 --- a/src/io/readers/common/tdf_blobs.rs +++ b/src/io/readers/common/tdf_blobs.rs @@ -13,7 +13,9 @@ pub struct TdfBlob { } impl TdfBlob { + #[inline(always)] pub fn get(&self, index: usize) -> u32 { + debug_assert!(index < self.len()); Self::concatenate_bytes( self.bytes[index], self.bytes[index + self.len()], @@ -48,7 +50,7 @@ pub struct TdfBlobReader { impl TdfBlobReader { pub fn new(file_name: impl AsRef) -> Result { - let path = file_name.as_ref().to_path_buf(); + let path: PathBuf = file_name.as_ref().to_path_buf(); let file: File = File::open(&path)?; let mmap: Mmap = unsafe { Mmap::map(&file)? }; Ok(Self { @@ -59,7 +61,7 @@ impl TdfBlobReader { } pub fn get_blob(&self, offset: usize) -> Result { - let offset = self.get_offset(offset)?; + let offset: usize = self.get_offset(offset)?; let byte_count: usize = self.get_byte_count(offset)?; let compressed_bytes: &[u8] = self.get_compressed_bytes(offset, byte_count); @@ -189,7 +191,7 @@ pub trait TdfBlobParsable { #[derive(Debug, thiserror::Error)] pub enum TdfBlobError { - #[error("Cannot read or mmap file: {0}")] + #[error("Cannot read or mmap file {0}")] IO(#[from] io::Error), #[error("Index {0} is invalid for file {1}")] Index(usize, PathBuf), From 80c47fbf5cfc579970b946f6a281af85566c0c1b Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 15:05:30 +0200 Subject: [PATCH 037/109] FEAT: cleaned up code --- src/io/readers/common/sql_frames.rs | 58 ++--------------------------- src/io/readers/common/sql_reader.rs | 4 ++ 2 files changed, 8 insertions(+), 54 deletions(-) diff --git a/src/io/readers/common/sql_frames.rs b/src/io/readers/common/sql_frames.rs index 509c2b8..15124e8 100644 --- a/src/io/readers/common/sql_frames.rs +++ b/src/io/readers/common/sql_frames.rs @@ -1,6 +1,6 @@ use super::sql_reader::SqlReadable; -#[derive(Debug, Default, PartialEq)] +#[derive(Debug, PartialEq)] pub struct SqlFrame { pub id: usize, pub scan_mode: u8, @@ -9,11 +9,12 @@ pub struct SqlFrame { pub rt: f64, pub scan_count: u64, pub binary_offset: usize, + pub accumulation_time: f64, } impl SqlReadable for SqlFrame { fn get_sql_query() -> String { - "SELECT Id, ScanMode, MsMsType, NumPeaks, Time, NumScans, TimsId FROM Frames".to_string() + "SELECT Id, ScanMode, MsMsType, NumPeaks, Time, NumScans, TimsId, AccumulationTime FROM Frames".to_string() } fn from_sql_row(row: &rusqlite::Row) -> Self { @@ -25,58 +26,7 @@ impl SqlReadable for SqlFrame { rt: row.get(4).unwrap_or_default(), scan_count: row.get(5).unwrap_or_default(), binary_offset: row.get(6).unwrap_or_default(), + accumulation_time: row.get(7).unwrap_or_default(), } } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::io::readers::common::sql_reader::SqlReader; - - #[test] - fn test_get() { - let reader = - SqlReader::open("tests/test.d/analysis.tdf".to_string()).unwrap(); - let sql_frames = SqlFrame::from_sql_reader(&reader).unwrap(); - let target = [ - SqlFrame { - id: 1, - scan_mode: 8, - msms_type: 0, - peak_count: 10, - rt: 0.1, - scan_count: 4, - binary_offset: 0, - }, - SqlFrame { - id: 2, - scan_mode: 8, - msms_type: 8, - peak_count: 26, - rt: 0.2, - scan_count: 4, - binary_offset: 48, - }, - SqlFrame { - id: 3, - scan_mode: 8, - msms_type: 0, - peak_count: 42, - rt: 0.3, - scan_count: 4, - binary_offset: 130, - }, - SqlFrame { - id: 4, - scan_mode: 8, - msms_type: 8, - peak_count: 58, - rt: 0.4, - scan_count: 4, - binary_offset: 235, - }, - ]; - assert_eq!(sql_frames, target); - } -} diff --git a/src/io/readers/common/sql_reader.rs b/src/io/readers/common/sql_reader.rs index 0abe977..2540312 100644 --- a/src/io/readers/common/sql_reader.rs +++ b/src/io/readers/common/sql_reader.rs @@ -14,6 +14,10 @@ impl SqlReader { let connection = Connection::open(&path)?; Ok(Self { path, connection }) } + + pub fn get_path(&self) -> PathBuf { + self.path.clone() + } } pub trait SqlReadable { From 0ae839d64e7bd5d1146b0c0b055a8f2cfb360941 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 15:48:17 +0200 Subject: [PATCH 038/109] FEAT: removed old tdf and frame reader --- src/file_readers.rs | 49 +--------------- src/file_readers/file_formats.rs | 38 +++++++++++++ src/file_readers/frame_readers.rs | 50 ---------------- src/file_readers/frame_readers/tdf_reader.rs | 57 ------------------- .../spectrum_readers/dda_reader.rs | 24 +++++--- .../spectrum_readers/dda_reader/precursors.rs | 34 ++++++----- 6 files changed, 76 insertions(+), 176 deletions(-) delete mode 100644 src/file_readers/frame_readers.rs delete mode 100644 src/file_readers/frame_readers/tdf_reader.rs diff --git a/src/file_readers.rs b/src/file_readers.rs index 883fb06..9596557 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -1,25 +1,16 @@ -use crate::{ - domain_converters::{Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}, - Error, -}; +use crate::Error; mod common; mod file_formats; -mod frame_readers; mod spectrum_readers; use { - self::{ - file_formats::FileFormat, frame_readers::ReadableFrames, - spectrum_readers::ReadableSpectra, - }, + self::{file_formats::FileFormat, spectrum_readers::ReadableSpectra}, crate::ms_data::{Frame, Spectrum}, }; pub use file_formats::FileFormatError; -use self::frame_readers::tdf_reader::TDFReader; - /// A reader to read [frames](crate::ms_data::Frame) and [spectra](crate::ms_data::Spectrum). pub struct FileReader { format: FileFormat, @@ -65,40 +56,4 @@ impl FileReader { pub fn read_all_spectra(&self) -> Vec { self.format.read_all_spectra() } - - pub fn get_frame_converter(&self) -> Result { - match &self.format { - FileFormat::DFolder(path) => Ok(TDFReader::new( - &path.to_str().unwrap_or_default().to_string(), - ) - .rt_converter), - _ => Err(Error::FileFormatError( - FileFormatError::MetadataFilesAreMissing, - )), - } - } - - pub fn get_scan_converter(&self) -> Result { - match &self.format { - FileFormat::DFolder(path) => Ok(TDFReader::new( - &path.to_str().unwrap_or_default().to_string(), - ) - .im_converter), - _ => Err(Error::FileFormatError( - FileFormatError::MetadataFilesAreMissing, - )), - } - } - - pub fn get_tof_converter(&self) -> Result { - match &self.format { - FileFormat::DFolder(path) => Ok(TDFReader::new( - &path.to_str().unwrap_or_default().to_string(), - ) - .mz_converter), - _ => Err(Error::FileFormatError( - FileFormatError::MetadataFilesAreMissing, - )), - } - } } diff --git a/src/file_readers/file_formats.rs b/src/file_readers/file_formats.rs index ec83680..0ab4472 100644 --- a/src/file_readers/file_formats.rs +++ b/src/file_readers/file_formats.rs @@ -1,5 +1,9 @@ use std::{fs, path::PathBuf}; +use crate::{io::readers::frame_reader::FrameReader, ms_data::Frame}; +use rayon::iter::ParallelIterator; + +use super::common::sql_reader::SqlReader; pub enum FileFormat { DFolder(PathBuf), MS2Folder(PathBuf), @@ -72,6 +76,40 @@ fn folder_contains_extension( false } +impl FileFormat { + fn get_frame_reader(&self) -> FrameReader { + let path = match &self { + Self::DFolder(path) => path, + Self::MS2Folder(path) => panic!( + "Folder {:} is not frame readable", + path.to_str().unwrap_or_default().to_string() + ), + }; + let frame_reader: FrameReader = FrameReader::new(&path); + frame_reader + } + + pub fn read_single_frame(&self, index: usize) -> Frame { + self.get_frame_reader().get(index) + } + + pub fn read_all_frames(&self) -> Vec { + self.get_frame_reader().parallel_filter(|_| true).collect() + } + + pub fn read_all_ms1_frames(&self) -> Vec { + self.get_frame_reader() + .parallel_filter(|x| x.msms_type == 0) + .collect() + } + + pub fn read_all_ms2_frames(&self) -> Vec { + self.get_frame_reader() + .parallel_filter(|x| x.msms_type != 0) + .collect() + } +} + #[derive(thiserror::Error, Debug)] pub enum FileFormatError { #[error("DirectoryDoesNotExist")] diff --git a/src/file_readers/frame_readers.rs b/src/file_readers/frame_readers.rs deleted file mode 100644 index 94f0db3..0000000 --- a/src/file_readers/frame_readers.rs +++ /dev/null @@ -1,50 +0,0 @@ -use crate::ms_data::Frame; - -use self::tdf_reader::TDFReader; - -use super::file_formats::FileFormat; - -pub mod tdf_reader; - -pub trait ReadableFrames { - fn read_single_frame(&self, index: usize) -> Frame; - - fn read_all_frames(&self) -> Vec; - - fn read_all_ms1_frames(&self) -> Vec; - - fn read_all_ms2_frames(&self) -> Vec; -} - -impl FileFormat { - fn unwrap_frame_reader(&self) -> Box { - let result = match &self { - Self::DFolder(path) => Box::new(TDFReader::new( - &path.to_str().unwrap_or_default().to_string(), - )) as Box, - Self::MS2Folder(path) => panic!( - "Folder {:} is not frame readable", - path.to_str().unwrap_or_default().to_string() - ), - }; - result - } -} - -impl ReadableFrames for FileFormat { - fn read_single_frame(&self, index: usize) -> Frame { - self.unwrap_frame_reader().read_single_frame(index) - } - - fn read_all_frames(&self) -> Vec { - self.unwrap_frame_reader().read_all_frames() - } - - fn read_all_ms1_frames(&self) -> Vec { - self.unwrap_frame_reader().read_all_ms1_frames() - } - - fn read_all_ms2_frames(&self) -> Vec { - self.unwrap_frame_reader().read_all_ms2_frames() - } -} diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs deleted file mode 100644 index 5292995..0000000 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ /dev/null @@ -1,57 +0,0 @@ -use crate::{ - domain_converters::{Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}, - file_readers::{ - common::sql_reader::{ReadableFromSql, SqlReader}, - ReadableFrames, - }, - io::readers::frame_reader::FrameReader, - ms_data::Frame, -}; -use rayon::iter::ParallelIterator; - -#[derive(Debug)] -pub struct TDFReader { - frame_reader: FrameReader, - pub tdf_sql_reader: SqlReader, - pub rt_converter: Frame2RtConverter, - pub im_converter: Scan2ImConverter, - pub mz_converter: Tof2MzConverter, -} - -impl TDFReader { - pub fn new(path: &String) -> Self { - let tdf_sql_reader: SqlReader = SqlReader { - path: String::from(path), - }; - let frame_reader: FrameReader = FrameReader::new(&path); - Self { - rt_converter: Frame2RtConverter::from_sql(&tdf_sql_reader), - im_converter: Scan2ImConverter::from_sql(&tdf_sql_reader), - mz_converter: Tof2MzConverter::from_sql(&tdf_sql_reader), - tdf_sql_reader: tdf_sql_reader, - frame_reader: frame_reader, - } - } -} - -impl ReadableFrames for TDFReader { - fn read_single_frame(&self, index: usize) -> Frame { - self.frame_reader.get(index) - } - - fn read_all_frames(&self) -> Vec { - self.frame_reader.parallel_filter(|_| true).collect() - } - - fn read_all_ms1_frames(&self) -> Vec { - self.frame_reader - .parallel_filter(|x| x.msms_type == 0) - .collect() - } - - fn read_all_ms2_frames(&self) -> Vec { - self.frame_reader - .parallel_filter(|x| x.msms_type != 0) - .collect() - } -} diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index 81796d4..50eff5d 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -4,11 +4,14 @@ use crate::{ calibration::Tof2MzCalibrator, domain_converters::Tof2MzConverter, file_readers::{ - frame_readers::{tdf_reader::TDFReader, ReadableFrames}, + common::sql_reader::{ReadableFromSql, SqlReader}, ReadableSpectra, }, - ms_data::{Frame, Spectrum}, - ms_data::{RawProcessedSpectrumState, RawSpectrum, RawSpectrumProcessor}, + io::readers::frame_reader::FrameReader, + ms_data::{ + Frame, RawProcessedSpectrumState, RawSpectrum, RawSpectrumProcessor, + Spectrum, + }, utils::vec_utils::group_and_sum, }; @@ -29,11 +32,18 @@ pub struct DDASpectrumReader { impl DDASpectrumReader { pub fn new(path_name: String) -> Self { - let tdf_reader: TDFReader = TDFReader::new(&path_name.to_string()); - let mz_reader: Tof2MzConverter = tdf_reader.mz_converter.clone(); - let ms2_frames: Vec = tdf_reader.read_all_ms2_frames(); + // let tdf_reader: TDFReader = TDFReader::new(&path_name.to_string()); + let tdf_sql_reader: SqlReader = SqlReader { + path: String::from(&path_name), + }; + let frame_reader: FrameReader = FrameReader::new(&path_name); + let mz_reader: Tof2MzConverter = + Tof2MzConverter::from_sql(&tdf_sql_reader); + + let ms2_frames: Vec = + frame_reader.parallel_filter(|x| x.msms_type != 0).collect(); let precursor_reader: PrecursorReader = - PrecursorReader::new(&tdf_reader); + PrecursorReader::new(&path_name); Self { path_name, precursor_reader, diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index c910b8c..dbccdbf 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -1,12 +1,11 @@ use rayon::prelude::*; use crate::{ - domain_converters::ConvertableDomain, - file_readers::{ - common::sql_reader::{ - PasefFrameMsMsTable, PrecursorTable, ReadableFromSql, - }, - frame_readers::tdf_reader::TDFReader, + domain_converters::{ + ConvertableDomain, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, + }, + file_readers::common::sql_reader::{ + PasefFrameMsMsTable, PrecursorTable, ReadableFromSql, SqlReader, }, ms_data::Precursor, utils::vec_utils::argsort, @@ -22,18 +21,23 @@ pub struct PrecursorReader { } impl PrecursorReader { - pub fn new(tdf_reader: &TDFReader) -> Self { + pub fn new(path: &String) -> Self { + let tdf_sql_reader: SqlReader = SqlReader { + path: String::from(path), + }; + let rt_converter: Frame2RtConverter = + Frame2RtConverter::from_sql(&tdf_sql_reader); + let im_converter: Scan2ImConverter = + Scan2ImConverter::from_sql(&tdf_sql_reader); let select_collision_energy_sql = String::from( "SELECT CollisionEnergy FROM PasefFrameMsMsInfo GROUP BY Precursor", ); let pasef_frames: PasefFrameMsMsTable = - PasefFrameMsMsTable::from_sql(&tdf_reader.tdf_sql_reader); + PasefFrameMsMsTable::from_sql(&tdf_sql_reader); let precursor_table: PrecursorTable = - PrecursorTable::from_sql(&tdf_reader.tdf_sql_reader); - // let retention_times: Vec = tdf_reader.frame_table.rt.clone(); - let collision_energies = tdf_reader - .tdf_sql_reader - .get_data_from_sql(&select_collision_energy_sql); + PrecursorTable::from_sql(&tdf_sql_reader); + let collision_energies = + tdf_sql_reader.get_data_from_sql(&select_collision_energy_sql); let precursors: Vec = (0..precursor_table.mz.len()) .into_par_iter() .map(|index| { @@ -41,8 +45,8 @@ impl PrecursorReader { let scan_id: f64 = precursor_table.scan_average[index]; Precursor { mz: precursor_table.mz[index], - rt: tdf_reader.rt_converter.convert(frame_id as u32), - im: tdf_reader.im_converter.convert(scan_id), + rt: rt_converter.convert(frame_id as u32), + im: im_converter.convert(scan_id), charge: precursor_table.charge[index], intensity: precursor_table.intensity[index], index: index + 1, //TODO? From 72217a9d1b72f47b808f764a264d85dd7e093c8f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 16:00:42 +0200 Subject: [PATCH 039/109] CHORE: renamed io:common to io:file_readers --- src/file_readers/spectrum_readers/mini_tdf_reader.rs | 2 +- src/io/readers.rs | 2 +- src/io/readers/{common.rs => file_readers.rs} | 0 src/io/readers/{common => file_readers}/sql_frames.rs | 0 src/io/readers/{common => file_readers}/sql_reader.rs | 0 src/io/readers/{common => file_readers}/tdf_blobs.rs | 0 src/io/readers/frame_reader.rs | 2 +- 7 files changed, 3 insertions(+), 3 deletions(-) rename src/io/readers/{common.rs => file_readers.rs} (100%) rename src/io/readers/{common => file_readers}/sql_frames.rs (100%) rename src/io/readers/{common => file_readers}/sql_reader.rs (100%) rename src/io/readers/{common => file_readers}/tdf_blobs.rs (100%) diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index efbca4d..abf63ea 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -1,6 +1,6 @@ use crate::{ file_readers::FileFormatError, - io::readers::common::tdf_blobs::{ + io::readers::file_readers::tdf_blobs::{ IndexedTdfBlobReader, TdfBlob, TdfBlobParsable, }, }; diff --git a/src/io/readers.rs b/src/io/readers.rs index 1db12a7..5a5f8b0 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -1,2 +1,2 @@ -pub(crate) mod common; +pub(crate) mod file_readers; pub mod frame_reader; diff --git a/src/io/readers/common.rs b/src/io/readers/file_readers.rs similarity index 100% rename from src/io/readers/common.rs rename to src/io/readers/file_readers.rs diff --git a/src/io/readers/common/sql_frames.rs b/src/io/readers/file_readers/sql_frames.rs similarity index 100% rename from src/io/readers/common/sql_frames.rs rename to src/io/readers/file_readers/sql_frames.rs diff --git a/src/io/readers/common/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs similarity index 100% rename from src/io/readers/common/sql_reader.rs rename to src/io/readers/file_readers/sql_reader.rs diff --git a/src/io/readers/common/tdf_blobs.rs b/src/io/readers/file_readers/tdf_blobs.rs similarity index 100% rename from src/io/readers/common/tdf_blobs.rs rename to src/io/readers/file_readers/tdf_blobs.rs diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index cc38df5..513b4d1 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -4,7 +4,7 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use crate::ms_data::{AcquisitionType, Frame, MSLevel}; -use super::common::{ +use super::file_readers::{ sql_frames::SqlFrame, sql_reader::{SqlReadable, SqlReader}, tdf_blobs::{TdfBlob, TdfBlobReader}, From 87a0b98faa0afe88f1770d6418a19d5f20835ccb Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 16:27:14 +0200 Subject: [PATCH 040/109] FEAT: reformatted sql precursors --- src/errors.rs | 9 ++++- src/file_readers/common.rs | 1 - src/file_readers/common/sql_reader/tables.rs | 6 ---- .../sql_reader/tables/dia_frames_info.rs | 17 ---------- .../sql_reader/tables/dia_frames_msms.rs | 27 --------------- .../common/sql_reader/tables/precursors.rs | 28 --------------- .../spectrum_readers/dda_reader/precursors.rs | 34 +++++++++++++------ .../spectrum_readers/mini_tdf_reader.rs | 9 +++-- src/io/readers/file_readers.rs | 2 +- .../readers/file_readers}/parquet_reader.rs | 0 src/io/readers/file_readers/sql_reader.rs | 3 ++ .../{sql_frames.rs => sql_reader/frames.rs} | 4 +-- .../file_readers/sql_reader/precursors.rs | 28 +++++++++++++++ src/io/readers/frame_reader.rs | 3 +- 14 files changed, 70 insertions(+), 101 deletions(-) delete mode 100644 src/file_readers/common/sql_reader/tables/dia_frames_info.rs delete mode 100644 src/file_readers/common/sql_reader/tables/dia_frames_msms.rs delete mode 100644 src/file_readers/common/sql_reader/tables/precursors.rs rename src/{file_readers/common => io/readers/file_readers}/parquet_reader.rs (100%) rename src/io/readers/file_readers/{sql_frames.rs => sql_reader/frames.rs} (94%) create mode 100644 src/io/readers/file_readers/sql_reader/precursors.rs diff --git a/src/errors.rs b/src/errors.rs index e429674..7af743c 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,4 +1,7 @@ -use crate::file_readers; +use crate::{ + file_readers, + // io::readers::common::{sql_reader::SqlError, tdf_blobs::TdfBlobError}, +}; /// An error that is produced by timsrust (uses [thiserror]). #[derive(thiserror::Error, Debug)] @@ -6,4 +9,8 @@ pub enum Error { /// An error to indicate a path is not a Bruker File Format. #[error("FileFormatError: {0}")] FileFormatError(#[from] file_readers::FileFormatError), + // #[error("SqlError: {0}")] + // SqlError(#[from] SqlError), + // #[error("BinError: {0}")] + // BinError(#[from] TdfBlobError), } diff --git a/src/file_readers/common.rs b/src/file_readers/common.rs index 2cf18f5..55cbaa0 100644 --- a/src/file_readers/common.rs +++ b/src/file_readers/common.rs @@ -1,2 +1 @@ -pub mod parquet_reader; pub mod sql_reader; diff --git a/src/file_readers/common/sql_reader/tables.rs b/src/file_readers/common/sql_reader/tables.rs index 7b2d540..e95e427 100644 --- a/src/file_readers/common/sql_reader/tables.rs +++ b/src/file_readers/common/sql_reader/tables.rs @@ -1,9 +1,3 @@ -mod dia_frames_info; -mod dia_frames_msms; mod pasef_frame_msms; -mod precursors; -pub use dia_frames_info::DiaFramesInfoTable; -pub use dia_frames_msms::DiaFramesMsMsTable; pub use pasef_frame_msms::PasefFrameMsMsTable; -pub use precursors::PrecursorTable; diff --git a/src/file_readers/common/sql_reader/tables/dia_frames_info.rs b/src/file_readers/common/sql_reader/tables/dia_frames_info.rs deleted file mode 100644 index 1511c2d..0000000 --- a/src/file_readers/common/sql_reader/tables/dia_frames_info.rs +++ /dev/null @@ -1,17 +0,0 @@ -use crate::file_readers::common::sql_reader::{ReadableFromSql, SqlReader}; - -#[derive(Debug)] -pub struct DiaFramesInfoTable { - pub frame: Vec, - pub group: Vec, -} - -impl ReadableFromSql for DiaFramesInfoTable { - fn from_sql(sql_reader: &SqlReader) -> Self { - let table_name: &str = "DiaFrameMsMsInfo"; - DiaFramesInfoTable { - frame: sql_reader.read_column_from_table("Frame", table_name), - group: sql_reader.read_column_from_table("WindowGroup", table_name), - } - } -} diff --git a/src/file_readers/common/sql_reader/tables/dia_frames_msms.rs b/src/file_readers/common/sql_reader/tables/dia_frames_msms.rs deleted file mode 100644 index 807eb29..0000000 --- a/src/file_readers/common/sql_reader/tables/dia_frames_msms.rs +++ /dev/null @@ -1,27 +0,0 @@ -use crate::file_readers::common::sql_reader::{ReadableFromSql, SqlReader}; - -#[derive(Debug)] -pub struct DiaFramesMsMsTable { - pub group: Vec, - pub scan_start: Vec, - pub scan_end: Vec, - pub mz_center: Vec, - pub mz_width: Vec, -} - -impl ReadableFromSql for DiaFramesMsMsTable { - fn from_sql(sql_reader: &SqlReader) -> Self { - let table_name: &str = "DiaFrameMsMsWindows"; - DiaFramesMsMsTable { - group: sql_reader.read_column_from_table("WindowGroup", table_name), - scan_start: sql_reader - .read_column_from_table("ScanNumBegin", table_name), - scan_end: sql_reader - .read_column_from_table("ScanNumEnd", table_name), - mz_center: sql_reader - .read_column_from_table("IsolationMz", table_name), - mz_width: sql_reader - .read_column_from_table("IsolationWidth", table_name), - } - } -} diff --git a/src/file_readers/common/sql_reader/tables/precursors.rs b/src/file_readers/common/sql_reader/tables/precursors.rs deleted file mode 100644 index 5187ec1..0000000 --- a/src/file_readers/common/sql_reader/tables/precursors.rs +++ /dev/null @@ -1,28 +0,0 @@ -use crate::file_readers::common::sql_reader::{ReadableFromSql, SqlReader}; - -#[derive(Debug)] -pub struct PrecursorTable { - pub id: Vec, - pub mz: Vec, - pub charge: Vec, - pub scan_average: Vec, - pub intensity: Vec, - pub precursor_frame: Vec, -} - -impl ReadableFromSql for PrecursorTable { - fn from_sql(sql_reader: &SqlReader) -> Self { - let table_name: &str = "Precursors"; - PrecursorTable { - id: sql_reader.read_column_from_table("Id", table_name), - mz: sql_reader.read_column_from_table("MonoisotopicMz", table_name), - charge: sql_reader.read_column_from_table("Charge", table_name), - scan_average: sql_reader - .read_column_from_table("ScanNumber", table_name), - intensity: sql_reader - .read_column_from_table("Intensity", table_name), - precursor_frame: sql_reader - .read_column_from_table("Parent", table_name), - } - } -} diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index dbccdbf..6429dfb 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -1,11 +1,17 @@ +use std::path::Path; + use rayon::prelude::*; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, }, - file_readers::common::sql_reader::{ - PasefFrameMsMsTable, PrecursorTable, ReadableFromSql, SqlReader, + file_readers::{ + self, + common::sql_reader::{PasefFrameMsMsTable, ReadableFromSql}, + }, + io::readers::file_readers::sql_reader::{ + precursors::SqlPrecursor, SqlReadable, SqlReader, }, ms_data::Precursor, utils::vec_utils::argsort, @@ -22,7 +28,7 @@ pub struct PrecursorReader { impl PrecursorReader { pub fn new(path: &String) -> Self { - let tdf_sql_reader: SqlReader = SqlReader { + let tdf_sql_reader = file_readers::common::sql_reader::SqlReader { path: String::from(path), }; let rt_converter: Frame2RtConverter = @@ -34,21 +40,27 @@ impl PrecursorReader { ); let pasef_frames: PasefFrameMsMsTable = PasefFrameMsMsTable::from_sql(&tdf_sql_reader); - let precursor_table: PrecursorTable = - PrecursorTable::from_sql(&tdf_sql_reader); + // let precursor_table: PrecursorTable = + // PrecursorTable::from_sql(&tdf_sql_reader); + + let tdf_sql_reader2 = + SqlReader::open(Path::new(path).join("analysis.tdf")).unwrap(); + + let precursors = + SqlPrecursor::from_sql_reader(&tdf_sql_reader2).unwrap(); let collision_energies = tdf_sql_reader.get_data_from_sql(&select_collision_energy_sql); - let precursors: Vec = (0..precursor_table.mz.len()) + let precursors: Vec = (0..precursors.len()) .into_par_iter() .map(|index| { - let frame_id: usize = precursor_table.precursor_frame[index]; - let scan_id: f64 = precursor_table.scan_average[index]; + let frame_id: usize = precursors[index].precursor_frame; + let scan_id: f64 = precursors[index].scan_average; Precursor { - mz: precursor_table.mz[index], + mz: precursors[index].mz, rt: rt_converter.convert(frame_id as u32), im: im_converter.convert(scan_id), - charge: precursor_table.charge[index], - intensity: precursor_table.intensity[index], + charge: precursors[index].charge, + intensity: precursors[index].intensity, index: index + 1, //TODO? frame_index: frame_id, collision_energy: collision_energies[index], diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index abf63ea..8f584bb 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -1,15 +1,14 @@ use crate::{ file_readers::FileFormatError, - io::readers::file_readers::tdf_blobs::{ - IndexedTdfBlobReader, TdfBlob, TdfBlobParsable, + io::readers::file_readers::{ + parquet_reader::read_parquet_precursors, + tdf_blobs::{IndexedTdfBlobReader, TdfBlob, TdfBlobParsable}, }, }; use std::fs; use { crate::{ - file_readers::{ - common::parquet_reader::read_parquet_precursors, ReadableSpectra, - }, + file_readers::ReadableSpectra, ms_data::{Precursor, Spectrum}, }, rayon::prelude::*, diff --git a/src/io/readers/file_readers.rs b/src/io/readers/file_readers.rs index 8f6178f..b6068b2 100644 --- a/src/io/readers/file_readers.rs +++ b/src/io/readers/file_readers.rs @@ -1,3 +1,3 @@ -pub mod sql_frames; +pub mod parquet_reader; pub mod sql_reader; pub mod tdf_blobs; diff --git a/src/file_readers/common/parquet_reader.rs b/src/io/readers/file_readers/parquet_reader.rs similarity index 100% rename from src/file_readers/common/parquet_reader.rs rename to src/io/readers/file_readers/parquet_reader.rs diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index 2540312..e9d4bce 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -1,3 +1,6 @@ +pub mod frames; +pub mod precursors; + use std::path::{Path, PathBuf}; use rusqlite::Connection; diff --git a/src/io/readers/file_readers/sql_frames.rs b/src/io/readers/file_readers/sql_reader/frames.rs similarity index 94% rename from src/io/readers/file_readers/sql_frames.rs rename to src/io/readers/file_readers/sql_reader/frames.rs index 15124e8..fafff13 100644 --- a/src/io/readers/file_readers/sql_frames.rs +++ b/src/io/readers/file_readers/sql_reader/frames.rs @@ -1,4 +1,4 @@ -use super::sql_reader::SqlReadable; +use super::SqlReadable; #[derive(Debug, PartialEq)] pub struct SqlFrame { @@ -18,7 +18,7 @@ impl SqlReadable for SqlFrame { } fn from_sql_row(row: &rusqlite::Row) -> Self { - SqlFrame { + Self { id: row.get(0).unwrap_or_default(), scan_mode: row.get(1).unwrap_or_default(), msms_type: row.get(2).unwrap_or_default(), diff --git a/src/io/readers/file_readers/sql_reader/precursors.rs b/src/io/readers/file_readers/sql_reader/precursors.rs new file mode 100644 index 0000000..55c2451 --- /dev/null +++ b/src/io/readers/file_readers/sql_reader/precursors.rs @@ -0,0 +1,28 @@ +use super::SqlReadable; + +#[derive(Debug, PartialEq)] +pub struct SqlPrecursor { + pub id: usize, + pub mz: f64, + pub charge: usize, + pub scan_average: f64, + pub intensity: f64, + pub precursor_frame: usize, +} + +impl SqlReadable for SqlPrecursor { + fn get_sql_query() -> String { + "SELECT Id, MonoisotopicMz, Charge, ScanNumber, Intensity, Parent FROM Precursors".to_string() + } + + fn from_sql_row(row: &rusqlite::Row) -> Self { + Self { + id: row.get(0).unwrap_or_default(), + mz: row.get(1).unwrap_or_default(), + charge: row.get(2).unwrap_or_default(), + scan_average: row.get(3).unwrap_or_default(), + intensity: row.get(4).unwrap_or_default(), + precursor_frame: row.get(5).unwrap_or_default(), + } + } +} diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 513b4d1..8ae121f 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -5,8 +5,7 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use crate::ms_data::{AcquisitionType, Frame, MSLevel}; use super::file_readers::{ - sql_frames::SqlFrame, - sql_reader::{SqlReadable, SqlReader}, + sql_reader::{frames::SqlFrame, SqlReadable, SqlReader}, tdf_blobs::{TdfBlob, TdfBlobReader}, }; From 1b817de915229bc79933756613ef9be4be7ee1ff Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 16:37:49 +0200 Subject: [PATCH 041/109] CHORE: refactored pasefmsmsframes --- src/file_readers/common/sql_reader.rs | 4 +-- src/file_readers/common/sql_reader/tables.rs | 3 -- .../sql_reader/tables/pasef_frame_msms.rs | 33 ------------------- .../spectrum_readers/dda_reader.rs | 6 ++-- .../spectrum_readers/dda_reader/precursors.rs | 30 +++++++---------- src/io/readers/file_readers/sql_reader.rs | 1 + .../sql_reader/pasef_frame_msms.rs | 30 +++++++++++++++++ 7 files changed, 48 insertions(+), 59 deletions(-) delete mode 100644 src/file_readers/common/sql_reader/tables.rs delete mode 100644 src/file_readers/common/sql_reader/tables/pasef_frame_msms.rs create mode 100644 src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs diff --git a/src/file_readers/common/sql_reader.rs b/src/file_readers/common/sql_reader.rs index a5e10f0..f2fb8e1 100644 --- a/src/file_readers/common/sql_reader.rs +++ b/src/file_readers/common/sql_reader.rs @@ -1,7 +1,7 @@ mod metadata; -mod tables; +// mod tables; -pub use tables::*; +// pub use tables::*; use rusqlite::{Connection, Result, Statement}; use std::path::Path; diff --git a/src/file_readers/common/sql_reader/tables.rs b/src/file_readers/common/sql_reader/tables.rs deleted file mode 100644 index e95e427..0000000 --- a/src/file_readers/common/sql_reader/tables.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod pasef_frame_msms; - -pub use pasef_frame_msms::PasefFrameMsMsTable; diff --git a/src/file_readers/common/sql_reader/tables/pasef_frame_msms.rs b/src/file_readers/common/sql_reader/tables/pasef_frame_msms.rs deleted file mode 100644 index 49332e8..0000000 --- a/src/file_readers/common/sql_reader/tables/pasef_frame_msms.rs +++ /dev/null @@ -1,33 +0,0 @@ -use crate::file_readers::common::sql_reader::{ReadableFromSql, SqlReader}; - -#[derive(Debug)] -pub struct PasefFrameMsMsTable { - pub frame: Vec, - pub scan_start: Vec, - pub scan_end: Vec, - pub mz_center: Vec, - pub mz_width: Vec, - pub collision_energy: Vec, - pub precursor: Vec, -} - -impl ReadableFromSql for PasefFrameMsMsTable { - fn from_sql(sql_reader: &SqlReader) -> Self { - let table_name: &str = "PasefFrameMsMsInfo"; - PasefFrameMsMsTable { - frame: sql_reader.read_column_from_table("Frame", table_name), - scan_start: sql_reader - .read_column_from_table("ScanNumBegin", table_name), - scan_end: sql_reader - .read_column_from_table("ScanNumEnd", table_name), - mz_center: sql_reader - .read_column_from_table("IsolationMz", table_name), - mz_width: sql_reader - .read_column_from_table("IsolationWidth", table_name), - collision_energy: sql_reader - .read_column_from_table("CollisionEnergy", table_name), - precursor: sql_reader - .read_column_from_table("Precursor", table_name), - } - } -} diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index 50eff5d..b758136 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -60,7 +60,7 @@ impl DDASpectrumReader { let mut intensities: Vec = vec![]; for &index in selection.iter() { let frame_index: usize = - self.precursor_reader.pasef_frames.frame[index] - 1; + self.precursor_reader.pasef_frames[index].frame - 1; // let frame: &Frame = &self.ms2_frames[frame_index]; let frame: &Frame = &self .ms2_frames @@ -71,9 +71,9 @@ impl DDASpectrumReader { continue; } let scan_start: usize = - self.precursor_reader.pasef_frames.scan_start[index]; + self.precursor_reader.pasef_frames[index].scan_start; let scan_end: usize = - self.precursor_reader.pasef_frames.scan_end[index]; + self.precursor_reader.pasef_frames[index].scan_end; let offset_start: usize = frame.scan_offsets[scan_start] as usize; let offset_end: usize = frame.scan_offsets[scan_end] as usize; let tof_selection: &[u32] = diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index 6429dfb..8e82a91 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -4,14 +4,12 @@ use rayon::prelude::*; use crate::{ domain_converters::{ - ConvertableDomain, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, - }, - file_readers::{ - self, - common::sql_reader::{PasefFrameMsMsTable, ReadableFromSql}, + ConvertableDomain, Frame2RtConverter, Scan2ImConverter, }, + file_readers::{self, common::sql_reader::ReadableFromSql}, io::readers::file_readers::sql_reader::{ - precursors::SqlPrecursor, SqlReadable, SqlReader, + pasef_frame_msms::SqlPasefFrameMsMs, precursors::SqlPrecursor, + SqlReadable, SqlReader, }, ms_data::Precursor, utils::vec_utils::argsort, @@ -20,7 +18,7 @@ use crate::{ #[derive(Debug)] pub struct PrecursorReader { pub precursors: Vec, - pub pasef_frames: PasefFrameMsMsTable, + pub pasef_frames: Vec, pub order: Vec, pub offsets: Vec, pub count: usize, @@ -38,14 +36,10 @@ impl PrecursorReader { let select_collision_energy_sql = String::from( "SELECT CollisionEnergy FROM PasefFrameMsMsInfo GROUP BY Precursor", ); - let pasef_frames: PasefFrameMsMsTable = - PasefFrameMsMsTable::from_sql(&tdf_sql_reader); - // let precursor_table: PrecursorTable = - // PrecursorTable::from_sql(&tdf_sql_reader); - let tdf_sql_reader2 = SqlReader::open(Path::new(path).join("analysis.tdf")).unwrap(); - + let pasef_frames = + SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader2).unwrap(); let precursors = SqlPrecursor::from_sql_reader(&tdf_sql_reader2).unwrap(); let collision_energies = @@ -67,15 +61,15 @@ impl PrecursorReader { } }) .collect(); - let order: Vec = argsort(&pasef_frames.precursor); - let count: usize = *pasef_frames.precursor.iter().max().unwrap(); + let pasef_precursors = + &pasef_frames.iter().map(|x| x.precursor).collect(); + let order: Vec = argsort(&pasef_precursors); + let count: usize = *pasef_precursors.iter().max().unwrap(); let mut offsets: Vec = Vec::with_capacity(count + 1); offsets.push(0); for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { let second_index: usize = order[offset + 1]; - if pasef_frames.precursor[index] - != pasef_frames.precursor[second_index] - { + if pasef_precursors[index] != pasef_precursors[second_index] { offsets.push(offset + 1) } } diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index e9d4bce..6c255a8 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -1,4 +1,5 @@ pub mod frames; +pub mod pasef_frame_msms; pub mod precursors; use std::path::{Path, PathBuf}; diff --git a/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs b/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs new file mode 100644 index 0000000..dbbc5f2 --- /dev/null +++ b/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs @@ -0,0 +1,30 @@ +use super::SqlReadable; + +#[derive(Debug, PartialEq)] +pub struct SqlPasefFrameMsMs { + pub frame: usize, + pub scan_start: usize, + pub scan_end: usize, + pub mz_center: f64, + pub mz_width: f64, + pub collision_energy: f64, + pub precursor: usize, +} + +impl SqlReadable for SqlPasefFrameMsMs { + fn get_sql_query() -> String { + "SELECT Frame, ScanNumBegin, ScanNumEnd, IsolationMz, IsolationWidth, CollisionEnergy, Precursor FROM PasefFrameMsMsInfo".to_string() + } + + fn from_sql_row(row: &rusqlite::Row) -> Self { + Self { + frame: row.get(0).unwrap_or_default(), + scan_start: row.get(1).unwrap_or_default(), + scan_end: row.get(2).unwrap_or_default(), + mz_center: row.get(3).unwrap_or_default(), + mz_width: row.get(4).unwrap_or_default(), + collision_energy: row.get(5).unwrap_or_default(), + precursor: row.get(6).unwrap_or_default(), + } + } +} From 17d2cf80f52539697f0deb0ad1d64ad308ea5ca3 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 16:39:55 +0200 Subject: [PATCH 042/109] CHORE: cargo fmt --- src/file_readers/common/sql_reader/metadata.rs | 2 +- src/file_readers/file_formats.rs | 1 - src/io/readers.rs | 3 ++- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/file_readers/common/sql_reader/metadata.rs b/src/file_readers/common/sql_reader/metadata.rs index a398f13..a3b4839 100644 --- a/src/file_readers/common/sql_reader/metadata.rs +++ b/src/file_readers/common/sql_reader/metadata.rs @@ -27,7 +27,7 @@ fn read_tof_max_index(connection: &Connection) -> u32 { |row| row.get(0), ) .unwrap(); - let mut tof_max_index: u32 = tof_max_index_string.parse().unwrap(); + let tof_max_index: u32 = tof_max_index_string.parse().unwrap(); tof_max_index } diff --git a/src/file_readers/file_formats.rs b/src/file_readers/file_formats.rs index 0ab4472..a1f4440 100644 --- a/src/file_readers/file_formats.rs +++ b/src/file_readers/file_formats.rs @@ -3,7 +3,6 @@ use std::{fs, path::PathBuf}; use crate::{io::readers::frame_reader::FrameReader, ms_data::Frame}; use rayon::iter::ParallelIterator; -use super::common::sql_reader::SqlReader; pub enum FileFormat { DFolder(PathBuf), MS2Folder(PathBuf), diff --git a/src/io/readers.rs b/src/io/readers.rs index 5a5f8b0..34126cf 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -1,2 +1,3 @@ -pub(crate) mod file_readers; +// pub(crate) mod file_readers; +pub mod file_readers; pub mod frame_reader; From 3b8a4ed9a76908c47d39b3b2730f95839938c387 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 16:53:00 +0200 Subject: [PATCH 043/109] CHORE: simplificaiton of precursor reader --- src/file_readers/spectrum_readers/dda_reader.rs | 2 +- .../spectrum_readers/dda_reader/precursors.rs | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index b758136..ed693c8 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -61,7 +61,7 @@ impl DDASpectrumReader { for &index in selection.iter() { let frame_index: usize = self.precursor_reader.pasef_frames[index].frame - 1; - // let frame: &Frame = &self.ms2_frames[frame_index]; + // TODO OPTIMIZE!!!!! let frame: &Frame = &self .ms2_frames .iter() diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index 8e82a91..c48e33c 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -33,17 +33,12 @@ impl PrecursorReader { Frame2RtConverter::from_sql(&tdf_sql_reader); let im_converter: Scan2ImConverter = Scan2ImConverter::from_sql(&tdf_sql_reader); - let select_collision_energy_sql = String::from( - "SELECT CollisionEnergy FROM PasefFrameMsMsInfo GROUP BY Precursor", - ); let tdf_sql_reader2 = SqlReader::open(Path::new(path).join("analysis.tdf")).unwrap(); let pasef_frames = SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader2).unwrap(); let precursors = SqlPrecursor::from_sql_reader(&tdf_sql_reader2).unwrap(); - let collision_energies = - tdf_sql_reader.get_data_from_sql(&select_collision_energy_sql); let precursors: Vec = (0..precursors.len()) .into_par_iter() .map(|index| { @@ -57,7 +52,12 @@ impl PrecursorReader { intensity: precursors[index].intensity, index: index + 1, //TODO? frame_index: frame_id, - collision_energy: collision_energies[index], + // TODO OPTIMIZE!!!!! + collision_energy: pasef_frames + .iter() + .find(|&x| x.precursor == index + 1) + .unwrap() + .collision_energy, } }) .collect(); From 4705a580c826334bf34cc94bcc16583efeeb418c Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 14 Jun 2024 16:55:27 +0200 Subject: [PATCH 044/109] FIX: diaPASEF type typo --- src/io/readers/frame_reader.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 8ae121f..600deca 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -28,7 +28,7 @@ impl FrameReader { let acquisition = if sql_frames.iter().any(|x| x.msms_type == 8) { AcquisitionType::DDAPASEF } else if sql_frames.iter().any(|x| x.msms_type == 9) { - AcquisitionType::DDAPASEF + AcquisitionType::DIAPASEF } else { AcquisitionType::Unknown }; From 81cd6eeb01b01f2127edc8abf14d569bec047b91 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 12:00:15 +0200 Subject: [PATCH 045/109] FIX: unified MsLevel parsing --- src/io/readers/frame_reader.rs | 7 +------ src/ms_data/frames.rs | 11 +++++++++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 600deca..cc9f4d8 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -67,12 +67,7 @@ impl FrameReader { &blob, &frame.scan_offsets, ); - frame.ms_level = match sql_frame.msms_type { - 0 => MSLevel::MS1, - 8 => MSLevel::MS2, - 9 => MSLevel::MS2, - _ => MSLevel::Unknown, - }; + frame.ms_level = MSLevel::read_from_msms_type(sql_frame.msms_type); frame.index = sql_frame.id; frame.rt = sql_frame.rt; frame.acquisition_type = self.acquisition; diff --git a/src/ms_data/frames.rs b/src/ms_data/frames.rs index 212df83..be2f2e2 100644 --- a/src/ms_data/frames.rs +++ b/src/ms_data/frames.rs @@ -24,3 +24,14 @@ pub enum MSLevel { #[default] Unknown, } + +impl MSLevel { + pub fn read_from_msms_type(msms_type: u8) -> MSLevel { + match msms_type { + 0 => MSLevel::MS1, + 8 => MSLevel::MS2, + 9 => MSLevel::MS2, + _ => MSLevel::Unknown, + } + } +} From ac6a23d8b9837bf9310571228813a3d181367fa5 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 13:40:35 +0200 Subject: [PATCH 046/109] FEAT: added metadata --- src/ms_data.rs | 2 ++ src/ms_data/metadata.rs | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 src/ms_data/metadata.rs diff --git a/src/ms_data.rs b/src/ms_data.rs index 460938d..5f9173c 100644 --- a/src/ms_data.rs +++ b/src/ms_data.rs @@ -2,12 +2,14 @@ mod acquisition; mod frames; +mod metadata; mod precursors; mod quadrupole; mod spectra; pub use acquisition::*; pub use frames::*; +pub use metadata::*; pub use precursors::*; pub use quadrupole::*; pub use spectra::*; diff --git a/src/ms_data/metadata.rs b/src/ms_data/metadata.rs new file mode 100644 index 0000000..469181b --- /dev/null +++ b/src/ms_data/metadata.rs @@ -0,0 +1,17 @@ +use std::path::PathBuf; + +use crate::domain_converters::{ + Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, +}; + +use super::AcquisitionType; + +/// Metadata from a single run +#[derive(Debug, Clone)] +pub struct Metadata { + pub path: PathBuf, + pub acquisition_type: AcquisitionType, + pub rt_converter: Frame2RtConverter, + pub im_converter: Scan2ImConverter, + pub mz_converter: Tof2MzConverter, +} From 0763df4ca7abb725f69d5b19119b2756431cb700 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 13:41:48 +0200 Subject: [PATCH 047/109] CHORE: updated sqlreadable trait name to ReadableSqlTable --- src/file_readers/spectrum_readers/dda_reader/precursors.rs | 2 +- src/io/readers/file_readers/sql_reader.rs | 7 ++++++- src/io/readers/file_readers/sql_reader/frames.rs | 4 ++-- src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs | 4 ++-- src/io/readers/file_readers/sql_reader/precursors.rs | 4 ++-- src/io/readers/frame_reader.rs | 2 +- 6 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index c48e33c..9026b29 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -9,7 +9,7 @@ use crate::{ file_readers::{self, common::sql_reader::ReadableFromSql}, io::readers::file_readers::sql_reader::{ pasef_frame_msms::SqlPasefFrameMsMs, precursors::SqlPrecursor, - SqlReadable, SqlReader, + ReadableSqlTable, SqlReader, }, ms_data::Precursor, utils::vec_utils::argsort, diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index 6c255a8..aa90e9d 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -1,4 +1,5 @@ pub mod frames; +pub mod metadata; pub mod pasef_frame_msms; pub mod precursors; @@ -22,9 +23,13 @@ impl SqlReader { pub fn get_path(&self) -> PathBuf { self.path.clone() } + + pub fn get_connection(&self) -> &Connection { + &self.connection + } } -pub trait SqlReadable { +pub trait ReadableSqlTable { fn get_sql_query() -> String; fn from_sql_row(row: &rusqlite::Row) -> Self; diff --git a/src/io/readers/file_readers/sql_reader/frames.rs b/src/io/readers/file_readers/sql_reader/frames.rs index fafff13..e1d7337 100644 --- a/src/io/readers/file_readers/sql_reader/frames.rs +++ b/src/io/readers/file_readers/sql_reader/frames.rs @@ -1,4 +1,4 @@ -use super::SqlReadable; +use super::ReadableSqlTable; #[derive(Debug, PartialEq)] pub struct SqlFrame { @@ -12,7 +12,7 @@ pub struct SqlFrame { pub accumulation_time: f64, } -impl SqlReadable for SqlFrame { +impl ReadableSqlTable for SqlFrame { fn get_sql_query() -> String { "SELECT Id, ScanMode, MsMsType, NumPeaks, Time, NumScans, TimsId, AccumulationTime FROM Frames".to_string() } diff --git a/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs b/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs index dbbc5f2..33fe4b6 100644 --- a/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs +++ b/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs @@ -1,4 +1,4 @@ -use super::SqlReadable; +use super::ReadableSqlTable; #[derive(Debug, PartialEq)] pub struct SqlPasefFrameMsMs { @@ -11,7 +11,7 @@ pub struct SqlPasefFrameMsMs { pub precursor: usize, } -impl SqlReadable for SqlPasefFrameMsMs { +impl ReadableSqlTable for SqlPasefFrameMsMs { fn get_sql_query() -> String { "SELECT Frame, ScanNumBegin, ScanNumEnd, IsolationMz, IsolationWidth, CollisionEnergy, Precursor FROM PasefFrameMsMsInfo".to_string() } diff --git a/src/io/readers/file_readers/sql_reader/precursors.rs b/src/io/readers/file_readers/sql_reader/precursors.rs index 55c2451..c2b00aa 100644 --- a/src/io/readers/file_readers/sql_reader/precursors.rs +++ b/src/io/readers/file_readers/sql_reader/precursors.rs @@ -1,4 +1,4 @@ -use super::SqlReadable; +use super::ReadableSqlTable; #[derive(Debug, PartialEq)] pub struct SqlPrecursor { @@ -10,7 +10,7 @@ pub struct SqlPrecursor { pub precursor_frame: usize, } -impl SqlReadable for SqlPrecursor { +impl ReadableSqlTable for SqlPrecursor { fn get_sql_query() -> String { "SELECT Id, MonoisotopicMz, Charge, ScanNumber, Intensity, Parent FROM Precursors".to_string() } diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index cc9f4d8..b40fcd6 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -5,7 +5,7 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use crate::ms_data::{AcquisitionType, Frame, MSLevel}; use super::file_readers::{ - sql_reader::{frames::SqlFrame, SqlReadable, SqlReader}, + sql_reader::{frames::SqlFrame, ReadableSqlTable, SqlReader}, tdf_blobs::{TdfBlob, TdfBlobReader}, }; From bfe71ccfc781c041e6b1a7fbd12813bf8bbf5f6c Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 14:00:21 +0200 Subject: [PATCH 048/109] FEAT: added hashmap reader for sql --- src/io/readers/file_readers/sql_reader.rs | 25 ++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index aa90e9d..dabbbca 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -3,7 +3,10 @@ pub mod metadata; pub mod pasef_frame_msms; pub mod precursors; -use std::path::{Path, PathBuf}; +use std::{ + collections::HashMap, + path::{Path, PathBuf}, +}; use rusqlite::Connection; @@ -46,6 +49,26 @@ pub trait ReadableSqlTable { } } +pub trait ReadableSqlHashMap { + fn get_sql_query() -> String; + + fn from_sql_reader( + reader: &SqlReader, + ) -> Result, SqlError> + where + Self: Sized, + { + let query = Self::get_sql_query(); + let mut stmt = reader.connection.prepare(&query)?; + let kv_map: HashMap = stmt + .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))? + .map(Result::unwrap) + .collect(); + + Ok(kv_map) + } +} + #[derive(thiserror::Error, Debug)] #[error("SqlError: {0}")] pub struct SqlError(#[from] rusqlite::Error); From 53d474831478bef961def63769751d424c160514 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 14:01:21 +0200 Subject: [PATCH 049/109] FEAT: reamoved option to get connection from sqlreader --- src/io/readers/file_readers/sql_reader.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index dabbbca..6d5d9b8 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -26,10 +26,6 @@ impl SqlReader { pub fn get_path(&self) -> PathBuf { self.path.clone() } - - pub fn get_connection(&self) -> &Connection { - &self.connection - } } pub trait ReadableSqlTable { From 6f4f2a80d6f342c10851aa3f09c752b5f2600629 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 14:09:40 +0200 Subject: [PATCH 050/109] FEAT: make sqlhashmap propagate error --- src/io/readers/file_readers/sql_reader.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index 6d5d9b8..f39ca4e 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -56,12 +56,14 @@ pub trait ReadableSqlHashMap { { let query = Self::get_sql_query(); let mut stmt = reader.connection.prepare(&query)?; - let kv_map: HashMap = stmt - .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))? - .map(Result::unwrap) - .collect(); - - Ok(kv_map) + let mut result = HashMap::new(); + let _ = stmt.query_map([], |row| { + let key: String = row.get(0)?; + let value: String = row.get(1)?; + result.insert(key, value); + Ok(()) + })?; + Ok(result) } } From 4e0e38732487d4def6b07c16a338347e041bbfa6 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 15:01:39 +0200 Subject: [PATCH 051/109] FEAT: implemented metadata reader --- src/io/readers.rs | 1 + src/io/readers/file_readers/sql_reader.rs | 15 ++++ .../file_readers/sql_reader/metadata.rs | 9 +++ src/io/readers/metadata_reader.rs | 77 +++++++++++++++++++ src/ms_data/metadata.rs | 3 - 5 files changed, 102 insertions(+), 3 deletions(-) create mode 100644 src/io/readers/file_readers/sql_reader/metadata.rs create mode 100644 src/io/readers/metadata_reader.rs diff --git a/src/io/readers.rs b/src/io/readers.rs index 34126cf..3d0a4f0 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -1,3 +1,4 @@ // pub(crate) mod file_readers; pub mod file_readers; pub mod frame_reader; +pub mod metadata_reader; diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index f39ca4e..7db5c62 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -26,6 +26,21 @@ impl SqlReader { pub fn get_path(&self) -> PathBuf { self.path.clone() } + + pub fn read_column_from_table( + &self, + column_name: &str, + table_name: &str, + ) -> Result, SqlError> { + let query = format!("SELECT {} FROM {}", column_name, table_name); + let mut stmt = self.connection.prepare(&query)?; + let rows = stmt.query_map([], |row| match row.get::(0) { + Ok(value) => Ok(value), + _ => Ok(T::default()), + })?; + let result = rows.collect::, _>>()?; + Ok(result) + } } pub trait ReadableSqlTable { diff --git a/src/io/readers/file_readers/sql_reader/metadata.rs b/src/io/readers/file_readers/sql_reader/metadata.rs new file mode 100644 index 0000000..7a3b536 --- /dev/null +++ b/src/io/readers/file_readers/sql_reader/metadata.rs @@ -0,0 +1,9 @@ +use super::ReadableSqlHashMap; + +pub struct SqlMetadata {} + +impl ReadableSqlHashMap for SqlMetadata { + fn get_sql_query() -> String { + "SELECT key, value FROM GlobalMetadata".to_string() + } +} diff --git a/src/io/readers/metadata_reader.rs b/src/io/readers/metadata_reader.rs new file mode 100644 index 0000000..01bcf73 --- /dev/null +++ b/src/io/readers/metadata_reader.rs @@ -0,0 +1,77 @@ +use std::{collections::HashMap, path::Path}; + +use crate::{ + domain_converters::{Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}, + ms_data::Metadata, +}; + +use super::file_readers::sql_reader::{ + metadata::SqlMetadata, ReadableSqlHashMap, SqlReader, +}; + +const OTOF_CONTROL: &str = "Bruker otofControl"; + +pub struct MetadataReader; + +impl MetadataReader { + pub fn new(path: impl AsRef) -> Metadata { + let sql_path = path.as_ref().join("analysis.tdf"); + let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); + let sql_metadata: HashMap = + SqlMetadata::from_sql_reader(&tdf_sql_reader).unwrap(); + Metadata { + path: path.as_ref().to_path_buf(), + rt_converter: get_rt_converter(&tdf_sql_reader), + im_converter: get_im_converter(&sql_metadata), + mz_converter: get_mz_converter(&sql_metadata), + } + } +} + +fn get_rt_converter(tdf_sql_reader: &SqlReader) -> Frame2RtConverter { + let rt_values: Vec = tdf_sql_reader + .read_column_from_table("Time", "Frames") + .unwrap(); + Frame2RtConverter::from_values(rt_values) +} + +fn get_mz_converter(sql_metadata: &HashMap) -> Tof2MzConverter { + let software = sql_metadata.get("AcquisitionSoftware").unwrap(); + let tof_max_index: u32 = sql_metadata + .get("DigitizerNumSamples") + .unwrap() + .parse() + .unwrap(); + let mut mz_min: f64 = sql_metadata + .get("MzAcqRangeLower") + .unwrap() + .parse() + .unwrap(); + let mut mz_max: f64 = sql_metadata + .get("MzAcqRangeUpper") + .unwrap() + .parse() + .unwrap(); + if software == OTOF_CONTROL { + mz_min -= 5.0; + mz_max += 5.0; + } + Tof2MzConverter::from_boundaries(mz_min, mz_max, tof_max_index) +} + +fn get_im_converter( + sql_metadata: &HashMap, +) -> Scan2ImConverter { + let scan_max_index: u32 = 927; //TODO + let im_min: f64 = sql_metadata + .get("OneOverK0AcqRangeLower") + .unwrap() + .parse() + .unwrap(); + let im_max: f64 = sql_metadata + .get("OneOverK0AcqRangeUpper") + .unwrap() + .parse() + .unwrap(); + Scan2ImConverter::from_boundaries(im_min, im_max, scan_max_index) +} diff --git a/src/ms_data/metadata.rs b/src/ms_data/metadata.rs index 469181b..2ade40c 100644 --- a/src/ms_data/metadata.rs +++ b/src/ms_data/metadata.rs @@ -4,13 +4,10 @@ use crate::domain_converters::{ Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, }; -use super::AcquisitionType; - /// Metadata from a single run #[derive(Debug, Clone)] pub struct Metadata { pub path: PathBuf, - pub acquisition_type: AcquisitionType, pub rt_converter: Frame2RtConverter, pub im_converter: Scan2ImConverter, pub mz_converter: Tof2MzConverter, From 04de7ff3cee1c0068e67d79a70c3f78fd45144b5 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 15:12:49 +0200 Subject: [PATCH 052/109] FEAT: fully refactored sql reader to use metadata struct --- src/file_readers/common.rs | 1 - src/file_readers/common/sql_reader.rs | 67 ---------- .../common/sql_reader/metadata.rs | 116 ------------------ .../spectrum_readers/dda_reader.rs | 15 +-- .../spectrum_readers/dda_reader/precursors.rs | 26 ++-- src/io/readers/file_readers/sql_reader.rs | 3 +- .../file_readers/sql_reader/metadata.rs | 2 +- 7 files changed, 19 insertions(+), 211 deletions(-) delete mode 100644 src/file_readers/common.rs delete mode 100644 src/file_readers/common/sql_reader.rs delete mode 100644 src/file_readers/common/sql_reader/metadata.rs diff --git a/src/file_readers/common.rs b/src/file_readers/common.rs deleted file mode 100644 index 55cbaa0..0000000 --- a/src/file_readers/common.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod sql_reader; diff --git a/src/file_readers/common/sql_reader.rs b/src/file_readers/common/sql_reader.rs deleted file mode 100644 index f2fb8e1..0000000 --- a/src/file_readers/common/sql_reader.rs +++ /dev/null @@ -1,67 +0,0 @@ -mod metadata; -// mod tables; - -// pub use tables::*; - -use rusqlite::{Connection, Result, Statement}; -use std::path::Path; - -#[derive(Debug)] -pub struct SqlReader { - pub path: String, -} - -impl SqlReader { - fn read_column_from_table( - &self, - column_name: &str, - table_name: &str, - ) -> Vec { - let column_names: Vec = - self.get_table_columns(table_name).unwrap(); - let order_by: String = column_names.join(", "); - let query: String = format!( - "SELECT {} FROM {} ORDER BY {}", - column_name, table_name, order_by - ); - - self.get_data_from_sql(&query) - } - - pub fn get_data_from_sql( - &self, - query: &String, - ) -> Vec { - let connection: Connection = get_sql_connection(&self.path); - let mut stmt: Statement = connection.prepare(&query).unwrap(); - let rows = stmt - .query_map( - [], - // |row| row.get::(0) - |row| match row.get::(0) { - Ok(value) => Ok(value), - _ => Ok(T::default()), - }, - ) - .unwrap(); - rows.collect::>>().unwrap() - } - - fn get_table_columns(&self, table_name: &str) -> Result> { - let connection: Connection = get_sql_connection(&self.path); - let query = format!("PRAGMA table_info({})", table_name); - let mut stmt: Statement = connection.prepare(&query)?; - let rows = stmt.query_map([], |row| row.get::(1))?; - rows.collect() - } -} - -fn get_sql_connection(path: &String) -> Connection { - let db_file_path: std::path::PathBuf = Path::new(path).join("analysis.tdf"); - let connection: Connection = Connection::open(&db_file_path).unwrap(); - connection -} - -pub trait ReadableFromSql { - fn from_sql(sql_reader: &SqlReader) -> Self; -} diff --git a/src/file_readers/common/sql_reader/metadata.rs b/src/file_readers/common/sql_reader/metadata.rs deleted file mode 100644 index a3b4839..0000000 --- a/src/file_readers/common/sql_reader/metadata.rs +++ /dev/null @@ -1,116 +0,0 @@ -use rusqlite::{Connection, Statement}; - -use crate::domain_converters::{ - Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, -}; - -use super::{get_sql_connection, ReadableFromSql, SqlReader}; - -const OTOF_CONTROL: &str = "Bruker otofControl"; - -fn read_software(connection: &Connection) -> String { - let software: String = connection - .query_row( - "SELECT Value FROM GlobalMetadata WHERE Key = 'AcquisitionSoftware'", - [], - |row| row.get(0), - ) - .unwrap(); - software -} - -fn read_tof_max_index(connection: &Connection) -> u32 { - let tof_max_index_string: String = connection - .query_row( - "SELECT Value FROM GlobalMetadata WHERE Key = 'DigitizerNumSamples'", - [], - |row| row.get(0), - ) - .unwrap(); - let tof_max_index: u32 = tof_max_index_string.parse().unwrap(); - tof_max_index -} - -fn read_mz_max_value(connection: &Connection) -> f64 { - let mz_max_value_string: String = connection - .query_row( - "SELECT Value FROM GlobalMetadata WHERE Key = 'MzAcqRangeUpper'", - [], - |row| row.get(0), - ) - .unwrap(); - let mut mz_max_value: f64 = mz_max_value_string.parse().unwrap(); - if read_software(connection) == OTOF_CONTROL { - mz_max_value += 5.0; - } - mz_max_value -} - -fn read_mz_min_value(connection: &Connection) -> f64 { - let mz_min_value_string: String = connection - .query_row( - "SELECT Value FROM GlobalMetadata WHERE Key = 'MzAcqRangeLower'", - [], - |row| row.get(0), - ) - .unwrap(); - let mut mz_min_value: f64 = mz_min_value_string.parse().unwrap(); - if read_software(connection) == OTOF_CONTROL { - mz_min_value -= 5.0; - } - mz_min_value -} - -impl SqlReader { - fn read_metadata(&self, value_name: &str) -> String { - let connection: Connection = get_sql_connection(&self.path); - let query: String = format!( - "SELECT Value FROM GlobalMetadata WHERE Key = '{}'", - value_name - ); - let mut stmt: Statement = connection.prepare(&query).unwrap(); - let value_str: String = stmt.query_row([], |row| row.get(0)).unwrap(); - value_str - } - - pub fn read_im_information(&self) -> (u32, f64, f64) { - let lower_im_value: f64 = self - .read_metadata("OneOverK0AcqRangeLower") - .parse() - .unwrap(); - let upper_im_value: f64 = self - .read_metadata("OneOverK0AcqRangeUpper") - .parse() - .unwrap(); - let scan_max_index: u32 = 927; - (scan_max_index, lower_im_value, upper_im_value) - } - - pub fn read_mz_information(&self) -> (u32, f64, f64) { - let connection: Connection = get_sql_connection(&self.path); - let tof_max_index: u32 = read_tof_max_index(&connection); - let lower_mz_value: f64 = read_mz_min_value(&connection); - let upper_mz_value: f64 = read_mz_max_value(&connection); - (tof_max_index, lower_mz_value, upper_mz_value) - } -} - -impl ReadableFromSql for Tof2MzConverter { - fn from_sql(sql_reader: &SqlReader) -> Self { - let (tof_max_index, mz_min, mz_max) = sql_reader.read_mz_information(); - Self::from_boundaries(mz_min, mz_max, tof_max_index) - } -} - -impl ReadableFromSql for Scan2ImConverter { - fn from_sql(sql_reader: &SqlReader) -> Self { - let (scan_max_index, im_min, im_max) = sql_reader.read_im_information(); - Self::from_boundaries(im_min, im_max, scan_max_index) - } -} - -impl ReadableFromSql for Frame2RtConverter { - fn from_sql(sql_reader: &SqlReader) -> Self { - Self::from_values(sql_reader.read_column_from_table("Time", "Frames")) - } -} diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index ed693c8..99c1ce9 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -3,11 +3,8 @@ mod precursors; use crate::{ calibration::Tof2MzCalibrator, domain_converters::Tof2MzConverter, - file_readers::{ - common::sql_reader::{ReadableFromSql, SqlReader}, - ReadableSpectra, - }, - io::readers::frame_reader::FrameReader, + file_readers::ReadableSpectra, + io::readers::{frame_reader::FrameReader, metadata_reader::MetadataReader}, ms_data::{ Frame, RawProcessedSpectrumState, RawSpectrum, RawSpectrumProcessor, Spectrum, @@ -32,13 +29,9 @@ pub struct DDASpectrumReader { impl DDASpectrumReader { pub fn new(path_name: String) -> Self { - // let tdf_reader: TDFReader = TDFReader::new(&path_name.to_string()); - let tdf_sql_reader: SqlReader = SqlReader { - path: String::from(&path_name), - }; let frame_reader: FrameReader = FrameReader::new(&path_name); - let mz_reader: Tof2MzConverter = - Tof2MzConverter::from_sql(&tdf_sql_reader); + let metadata = MetadataReader::new(&path_name); + let mz_reader: Tof2MzConverter = metadata.mz_converter; let ms2_frames: Vec = frame_reader.parallel_filter(|x| x.msms_type != 0).collect(); diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs index 9026b29..2fcb309 100644 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ b/src/file_readers/spectrum_readers/dda_reader/precursors.rs @@ -6,10 +6,12 @@ use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, }, - file_readers::{self, common::sql_reader::ReadableFromSql}, - io::readers::file_readers::sql_reader::{ - pasef_frame_msms::SqlPasefFrameMsMs, precursors::SqlPrecursor, - ReadableSqlTable, SqlReader, + io::readers::{ + file_readers::sql_reader::{ + pasef_frame_msms::SqlPasefFrameMsMs, precursors::SqlPrecursor, + ReadableSqlTable, SqlReader, + }, + metadata_reader::MetadataReader, }, ms_data::Precursor, utils::vec_utils::argsort, @@ -26,19 +28,15 @@ pub struct PrecursorReader { impl PrecursorReader { pub fn new(path: &String) -> Self { - let tdf_sql_reader = file_readers::common::sql_reader::SqlReader { - path: String::from(path), - }; - let rt_converter: Frame2RtConverter = - Frame2RtConverter::from_sql(&tdf_sql_reader); - let im_converter: Scan2ImConverter = - Scan2ImConverter::from_sql(&tdf_sql_reader); - let tdf_sql_reader2 = + let metadata = MetadataReader::new(&path); + let rt_converter: Frame2RtConverter = metadata.rt_converter; + let im_converter: Scan2ImConverter = metadata.im_converter; + let tdf_sql_reader = SqlReader::open(Path::new(path).join("analysis.tdf")).unwrap(); let pasef_frames = - SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader2).unwrap(); + SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); let precursors = - SqlPrecursor::from_sql_reader(&tdf_sql_reader2).unwrap(); + SqlPrecursor::from_sql_reader(&tdf_sql_reader).unwrap(); let precursors: Vec = (0..precursors.len()) .into_par_iter() .map(|index| { diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index 7db5c62..5356b35 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -72,12 +72,13 @@ pub trait ReadableSqlHashMap { let query = Self::get_sql_query(); let mut stmt = reader.connection.prepare(&query)?; let mut result = HashMap::new(); - let _ = stmt.query_map([], |row| { + let rows = stmt.query_map([], |row| { let key: String = row.get(0)?; let value: String = row.get(1)?; result.insert(key, value); Ok(()) })?; + rows.collect::, _>>()?; Ok(result) } } diff --git a/src/io/readers/file_readers/sql_reader/metadata.rs b/src/io/readers/file_readers/sql_reader/metadata.rs index 7a3b536..920791f 100644 --- a/src/io/readers/file_readers/sql_reader/metadata.rs +++ b/src/io/readers/file_readers/sql_reader/metadata.rs @@ -4,6 +4,6 @@ pub struct SqlMetadata {} impl ReadableSqlHashMap for SqlMetadata { fn get_sql_query() -> String { - "SELECT key, value FROM GlobalMetadata".to_string() + "SELECT Key, Value FROM GlobalMetadata".to_string() } } From 1eb57f9fcf6832d9e620d2733c0418c3637dcec3 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 15:13:38 +0200 Subject: [PATCH 053/109] FIX: remove lingering module that was refactored --- src/file_readers.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/file_readers.rs b/src/file_readers.rs index 9596557..70c022e 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -1,6 +1,5 @@ use crate::Error; -mod common; mod file_formats; mod spectrum_readers; From cee2b42f02e6ea46b0cf0ee97e7490e8e6bbc1d2 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 15:54:11 +0200 Subject: [PATCH 054/109] FEAT: renamed tdf blobs to refelect reader --- src/file_readers/spectrum_readers/mini_tdf_reader.rs | 2 +- src/io/readers/file_readers.rs | 2 +- .../readers/file_readers/{tdf_blobs.rs => tdf_blob_reader.rs} | 0 src/io/readers/frame_reader.rs | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename src/io/readers/file_readers/{tdf_blobs.rs => tdf_blob_reader.rs} (100%) diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index 8f584bb..ce60fb4 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -2,7 +2,7 @@ use crate::{ file_readers::FileFormatError, io::readers::file_readers::{ parquet_reader::read_parquet_precursors, - tdf_blobs::{IndexedTdfBlobReader, TdfBlob, TdfBlobParsable}, + tdf_blob_reader::{IndexedTdfBlobReader, TdfBlob, TdfBlobParsable}, }, }; use std::fs; diff --git a/src/io/readers/file_readers.rs b/src/io/readers/file_readers.rs index b6068b2..38aa955 100644 --- a/src/io/readers/file_readers.rs +++ b/src/io/readers/file_readers.rs @@ -1,3 +1,3 @@ pub mod parquet_reader; pub mod sql_reader; -pub mod tdf_blobs; +pub mod tdf_blob_reader; diff --git a/src/io/readers/file_readers/tdf_blobs.rs b/src/io/readers/file_readers/tdf_blob_reader.rs similarity index 100% rename from src/io/readers/file_readers/tdf_blobs.rs rename to src/io/readers/file_readers/tdf_blob_reader.rs diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index b40fcd6..119a485 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -6,7 +6,7 @@ use crate::ms_data::{AcquisitionType, Frame, MSLevel}; use super::file_readers::{ sql_reader::{frames::SqlFrame, ReadableSqlTable, SqlReader}, - tdf_blobs::{TdfBlob, TdfBlobReader}, + tdf_blob_reader::{TdfBlob, TdfBlobReader}, }; #[derive(Debug)] From 1cc18423777709ea9978eb5dc5339732e35d1c10 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 16:23:50 +0200 Subject: [PATCH 055/109] FEAT: refactored precursor reader in the new io structure --- .../spectrum_readers/dda_reader.rs | 68 ++++++++++----- .../spectrum_readers/dda_reader/precursors.rs | 83 ------------------- src/io/readers.rs | 1 + src/io/readers/precursor_reader.rs | 70 ++++++++++++++++ 4 files changed, 120 insertions(+), 102 deletions(-) delete mode 100644 src/file_readers/spectrum_readers/dda_reader/precursors.rs create mode 100644 src/io/readers/precursor_reader.rs diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index 99c1ce9..1525982 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -1,20 +1,27 @@ -mod precursors; +use std::path::Path; use crate::{ calibration::Tof2MzCalibrator, domain_converters::Tof2MzConverter, file_readers::ReadableSpectra, - io::readers::{frame_reader::FrameReader, metadata_reader::MetadataReader}, + io::readers::{ + file_readers::sql_reader::{ + pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, + }, + frame_reader::FrameReader, + metadata_reader::MetadataReader, + precursor_reader::PrecursorReader, + }, ms_data::{ - Frame, RawProcessedSpectrumState, RawSpectrum, RawSpectrumProcessor, - Spectrum, + Frame, Precursor, RawProcessedSpectrumState, RawSpectrum, + RawSpectrumProcessor, Spectrum, }, - utils::vec_utils::group_and_sum, + utils::vec_utils::{argsort, group_and_sum}, }; use rayon::prelude::*; -use self::precursors::PrecursorReader; +// use self::precursors::PrecursorReader; const SMOOTHING_WINDOW: u32 = 1; const CENTROIDING_WINDOW: u32 = 1; @@ -25,6 +32,9 @@ pub struct DDASpectrumReader { precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, ms2_frames: Vec, + pub pasef_frames: Vec, + pub order: Vec, + pub offsets: Vec, } impl DDASpectrumReader { @@ -32,28 +42,47 @@ impl DDASpectrumReader { let frame_reader: FrameReader = FrameReader::new(&path_name); let metadata = MetadataReader::new(&path_name); let mz_reader: Tof2MzConverter = metadata.mz_converter; - + let tdf_sql_reader = + SqlReader::open(Path::new(&path_name).join("analysis.tdf")) + .unwrap(); + let pasef_frames = + SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); let ms2_frames: Vec = frame_reader.parallel_filter(|x| x.msms_type != 0).collect(); let precursor_reader: PrecursorReader = PrecursorReader::new(&path_name); + let pasef_precursors = + &pasef_frames.iter().map(|x| x.precursor).collect(); + let order: Vec = argsort(&pasef_precursors); + let mut offsets: Vec = + Vec::with_capacity(precursor_reader.len() + 1); + offsets.push(0); + for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { + let second_index: usize = order[offset + 1]; + if pasef_precursors[index] != pasef_precursors[second_index] { + offsets.push(offset + 1) + } + } + offsets.push(order.len()); Self { path_name, precursor_reader, mz_reader, ms2_frames, + pasef_frames, + order, + offsets, } } pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { - let start: usize = self.precursor_reader.offsets[index]; - let end: usize = self.precursor_reader.offsets[index + 1]; - let selection: &[usize] = &self.precursor_reader.order[start..end]; + let start: usize = self.offsets[index]; + let end: usize = self.offsets[index + 1]; + let selection: &[usize] = &self.order[start..end]; let mut tof_indices: Vec = vec![]; let mut intensities: Vec = vec![]; for &index in selection.iter() { - let frame_index: usize = - self.precursor_reader.pasef_frames[index].frame - 1; + let frame_index: usize = self.pasef_frames[index].frame - 1; // TODO OPTIMIZE!!!!! let frame: &Frame = &self .ms2_frames @@ -63,10 +92,8 @@ impl DDASpectrumReader { if frame.intensities.len() == 0 { continue; } - let scan_start: usize = - self.precursor_reader.pasef_frames[index].scan_start; - let scan_end: usize = - self.precursor_reader.pasef_frames[index].scan_end; + let scan_start: usize = self.pasef_frames[index].scan_start; + let scan_end: usize = self.pasef_frames[index].scan_end; let offset_start: usize = frame.scan_offsets[scan_start] as usize; let offset_end: usize = frame.scan_offsets[scan_end] as usize; let tof_selection: &[u32] = @@ -101,7 +128,7 @@ impl DDASpectrumReader { let index: usize = raw_spectrum.index as usize; let spectrum_processer = RawSpectrumProcessor { raw_spectrum }; let spectrum = spectrum_processer - .finalize(self.precursor_reader.precursors[index], mz_reader); + .finalize(self.precursor_reader.get(index), mz_reader); spectrum } } @@ -113,14 +140,17 @@ impl ReadableSpectra for DDASpectrumReader { } fn read_all_spectra(&self) -> Vec { - let raw_spectra: Vec = (0..self.precursor_reader.count) + let raw_spectra: Vec = (0..self.precursor_reader.len()) .into_par_iter() .map(|index| self.read_single_raw_spectrum(index)) .collect(); + let precursors: Vec = (0..self.precursor_reader.len()) + .map(|index| self.precursor_reader.get(index)) + .collect(); let hits = Tof2MzCalibrator::find_unfragmented_precursors( &raw_spectra, &self.mz_reader, - &self.precursor_reader.precursors, + &precursors, 0.1, ); let temp_mz_reader: Tof2MzConverter; diff --git a/src/file_readers/spectrum_readers/dda_reader/precursors.rs b/src/file_readers/spectrum_readers/dda_reader/precursors.rs deleted file mode 100644 index 2fcb309..0000000 --- a/src/file_readers/spectrum_readers/dda_reader/precursors.rs +++ /dev/null @@ -1,83 +0,0 @@ -use std::path::Path; - -use rayon::prelude::*; - -use crate::{ - domain_converters::{ - ConvertableDomain, Frame2RtConverter, Scan2ImConverter, - }, - io::readers::{ - file_readers::sql_reader::{ - pasef_frame_msms::SqlPasefFrameMsMs, precursors::SqlPrecursor, - ReadableSqlTable, SqlReader, - }, - metadata_reader::MetadataReader, - }, - ms_data::Precursor, - utils::vec_utils::argsort, -}; - -#[derive(Debug)] -pub struct PrecursorReader { - pub precursors: Vec, - pub pasef_frames: Vec, - pub order: Vec, - pub offsets: Vec, - pub count: usize, -} - -impl PrecursorReader { - pub fn new(path: &String) -> Self { - let metadata = MetadataReader::new(&path); - let rt_converter: Frame2RtConverter = metadata.rt_converter; - let im_converter: Scan2ImConverter = metadata.im_converter; - let tdf_sql_reader = - SqlReader::open(Path::new(path).join("analysis.tdf")).unwrap(); - let pasef_frames = - SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); - let precursors = - SqlPrecursor::from_sql_reader(&tdf_sql_reader).unwrap(); - let precursors: Vec = (0..precursors.len()) - .into_par_iter() - .map(|index| { - let frame_id: usize = precursors[index].precursor_frame; - let scan_id: f64 = precursors[index].scan_average; - Precursor { - mz: precursors[index].mz, - rt: rt_converter.convert(frame_id as u32), - im: im_converter.convert(scan_id), - charge: precursors[index].charge, - intensity: precursors[index].intensity, - index: index + 1, //TODO? - frame_index: frame_id, - // TODO OPTIMIZE!!!!! - collision_energy: pasef_frames - .iter() - .find(|&x| x.precursor == index + 1) - .unwrap() - .collision_energy, - } - }) - .collect(); - let pasef_precursors = - &pasef_frames.iter().map(|x| x.precursor).collect(); - let order: Vec = argsort(&pasef_precursors); - let count: usize = *pasef_precursors.iter().max().unwrap(); - let mut offsets: Vec = Vec::with_capacity(count + 1); - offsets.push(0); - for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { - let second_index: usize = order[offset + 1]; - if pasef_precursors[index] != pasef_precursors[second_index] { - offsets.push(offset + 1) - } - } - offsets.push(order.len()); - Self { - precursors, - pasef_frames, - order, - offsets, - count, - } - } -} diff --git a/src/io/readers.rs b/src/io/readers.rs index 3d0a4f0..8614127 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -2,3 +2,4 @@ pub mod file_readers; pub mod frame_reader; pub mod metadata_reader; +pub mod precursor_reader; diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs new file mode 100644 index 0000000..e1c27b4 --- /dev/null +++ b/src/io/readers/precursor_reader.rs @@ -0,0 +1,70 @@ +use std::path::{Path, PathBuf}; + +use crate::{ + domain_converters::{ + ConvertableDomain, Frame2RtConverter, Scan2ImConverter, + }, + ms_data::Precursor, +}; + +use super::{ + file_readers::sql_reader::{ + precursors::SqlPrecursor, ReadableSqlTable, SqlReader, + }, + metadata_reader::MetadataReader, +}; + +#[derive(Debug)] +pub struct PrecursorReader { + path: PathBuf, + sql_precursors: Vec, + rt_converter: Frame2RtConverter, + im_converter: Scan2ImConverter, +} + +impl PrecursorReader { + pub fn new(path: impl AsRef) -> Self { + let sql_path = path.as_ref().join("analysis.tdf"); + let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); + let metadata = MetadataReader::new(&path); + let rt_converter: Frame2RtConverter = metadata.rt_converter; + let im_converter: Scan2ImConverter = metadata.im_converter; + let sql_precursors = + SqlPrecursor::from_sql_reader(&tdf_sql_reader).unwrap(); + Self { + path: path.as_ref().to_path_buf(), + sql_precursors, + rt_converter, + im_converter, + } + } + + pub fn get(&self, index: usize) -> Precursor { + let mut precursor: Precursor = Precursor::default(); + let sql_precursor = &self.sql_precursors[index]; + let frame_id: usize = sql_precursor.precursor_frame; + let scan_id: f64 = sql_precursor.scan_average; + precursor.mz = sql_precursor.mz; + precursor.rt = self.rt_converter.convert(frame_id as u32); + precursor.im = self.im_converter.convert(scan_id); + precursor.charge = sql_precursor.charge; + precursor.intensity = sql_precursor.intensity; + precursor.index = index + 1; //TODO; + precursor.frame_index = frame_id; + // TODO OPTIMIZE!!!!! + // precursor.collision_energy = pasef_frames + // .iter() + // .find(|&x| x.precursor == index + 1) + // .unwrap() + // .collision_energy; + precursor + } + + pub fn get_path(&self) -> PathBuf { + self.path.clone() + } + + pub fn len(&self) -> usize { + self.sql_precursors.len() + } +} From d81d93fc612fa744b0e089f171a49b0cae98c40d Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 16:37:03 +0200 Subject: [PATCH 056/109] CHORE: cleaned tdf blob reader --- .../spectrum_readers/mini_tdf_reader.rs | 28 ++++++- .../readers/file_readers/tdf_blob_reader.rs | 73 +------------------ .../file_readers/tdf_blob_reader/tdf_blobs.rs | 39 ++++++++++ 3 files changed, 69 insertions(+), 71 deletions(-) create mode 100644 src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index ce60fb4..80e3d74 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -2,7 +2,7 @@ use crate::{ file_readers::FileFormatError, io::readers::file_readers::{ parquet_reader::read_parquet_precursors, - tdf_blob_reader::{IndexedTdfBlobReader, TdfBlob, TdfBlobParsable}, + tdf_blob_reader::{IndexedTdfBlobReader, TdfBlob}, }, }; use std::fs; @@ -132,7 +132,7 @@ impl ReadableSpectra for MiniTDFReader { } } -impl TdfBlobParsable for Spectrum { +impl Spectrum { fn set_tdf_blob_index(&mut self, index: usize) { self.index = index; } @@ -152,4 +152,28 @@ impl TdfBlobParsable for Spectrum { self.intensities = intensity_values.iter().map(|&x| x as f64).collect(); self.mz_values = mz_values.to_vec(); } + + fn update_from_tdf_blob_reader( + &mut self, + bin_file: &IndexedTdfBlobReader, + index: usize, + ) { + let blob = bin_file.get_blob(index).unwrap(); + if !blob.is_empty() { + self.update_from_tdf_blob(blob) + } + } + + fn create_from_tdf_blob_reader( + bin_file: &IndexedTdfBlobReader, + index: usize, + ) -> Self + where + Self: Default, + { + let mut object = Self::default(); + object.set_tdf_blob_index(index); + object.update_from_tdf_blob_reader(bin_file, index); + object + } } diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index 71b1b5c..b37415b 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -1,46 +1,15 @@ +mod tdf_blobs; + use memmap2::Mmap; use std::fs::File; use std::io; use std::path::{Path, PathBuf}; +pub use tdf_blobs::*; use zstd::decode_all; const U32_SIZE: usize = std::mem::size_of::(); const HEADER_SIZE: usize = 2; -#[derive(Debug, Default)] -pub struct TdfBlob { - bytes: Vec, -} - -impl TdfBlob { - #[inline(always)] - pub fn get(&self, index: usize) -> u32 { - debug_assert!(index < self.len()); - Self::concatenate_bytes( - self.bytes[index], - self.bytes[index + self.len()], - self.bytes[index + 2 * self.len()], - self.bytes[index + 3 * self.len()], - ) - } - - #[inline(always)] - fn concatenate_bytes(b1: u8, b2: u8, b3: u8, b4: u8) -> u32 { - b1 as u32 - | ((b2 as u32) << 8) - | ((b3 as u32) << 16) - | ((b4 as u32) << 24) - } - - pub fn len(&self) -> usize { - self.bytes.len() / U32_SIZE - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } -} - #[derive(Debug)] pub struct TdfBlobReader { path: PathBuf, @@ -66,7 +35,7 @@ impl TdfBlobReader { let compressed_bytes: &[u8] = self.get_compressed_bytes(offset, byte_count); match decode_all(compressed_bytes) { - Ok(bytes) => Ok(TdfBlob { bytes }), + Ok(bytes) => Ok(TdfBlob::new(bytes)), Err(_) => Err(TdfBlobError::Decompression(self.path.clone())), } } @@ -155,40 +124,6 @@ impl IndexedTdfBlobReader { } } -pub trait TdfBlobParsable { - fn set_tdf_blob_index(&mut self, index: usize); - - fn update_from_tdf_blob(&mut self, blob: TdfBlob); - - fn update_from_tdf_blob_reader( - &mut self, - bin_file: &IndexedTdfBlobReader, - index: usize, - ) { - let blob = bin_file.get_blob(index).unwrap(); - if !blob.is_empty() { - self.update_from_tdf_blob(blob) - } - } - - fn create_from_tdf_blob_reader( - bin_file: &IndexedTdfBlobReader, - index: usize, - ) -> Self - where - Self: Default, - { - let mut object = Self::default(); - object.set_tdf_blob_index(index); - object.update_from_tdf_blob_reader(bin_file, index); - object - } -} - -// #[derive(thiserror::Error, Debug)] -// #[error("TdfBlobError: {0}")] -// pub struct TdfBlobError(#[from] std::io::Error); - #[derive(Debug, thiserror::Error)] pub enum TdfBlobError { #[error("Cannot read or mmap file {0}")] diff --git a/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs b/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs new file mode 100644 index 0000000..e661cd0 --- /dev/null +++ b/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs @@ -0,0 +1,39 @@ +const U32_SIZE: usize = std::mem::size_of::(); + +#[derive(Debug, Default)] +pub struct TdfBlob { + bytes: Vec, +} + +impl TdfBlob { + pub fn new(bytes: Vec) -> Self { + Self { bytes } + } + + #[inline(always)] + pub fn get(&self, index: usize) -> u32 { + debug_assert!(index < self.len()); + Self::concatenate_bytes( + self.bytes[index], + self.bytes[index + self.len()], + self.bytes[index + 2 * self.len()], + self.bytes[index + 3 * self.len()], + ) + } + + #[inline(always)] + fn concatenate_bytes(b1: u8, b2: u8, b3: u8, b4: u8) -> u32 { + b1 as u32 + | ((b2 as u32) << 8) + | ((b3 as u32) << 16) + | ((b4 as u32) << 24) + } + + pub fn len(&self) -> usize { + self.bytes.len() / U32_SIZE + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} From 4d0d7ca0823b0a2551305e487393a41e764d797a Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 16:49:44 +0200 Subject: [PATCH 057/109] FEAT: simplifying spectrum readers --- src/file_readers.rs | 2 +- src/file_readers/file_formats.rs | 37 ++++++++++++++++++- src/file_readers/spectrum_readers.rs | 36 ------------------ .../spectrum_readers/dda_reader.rs | 8 ++-- .../spectrum_readers/mini_tdf_reader.rs | 8 ++-- 5 files changed, 45 insertions(+), 46 deletions(-) diff --git a/src/file_readers.rs b/src/file_readers.rs index 70c022e..30ec99b 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -4,7 +4,7 @@ mod file_formats; mod spectrum_readers; use { - self::{file_formats::FileFormat, spectrum_readers::ReadableSpectra}, + self::file_formats::FileFormat, crate::ms_data::{Frame, Spectrum}, }; diff --git a/src/file_readers/file_formats.rs b/src/file_readers/file_formats.rs index a1f4440..7bfac9a 100644 --- a/src/file_readers/file_formats.rs +++ b/src/file_readers/file_formats.rs @@ -1,8 +1,15 @@ use std::{fs, path::PathBuf}; -use crate::{io::readers::frame_reader::FrameReader, ms_data::Frame}; +use crate::{ + io::readers::frame_reader::FrameReader, + ms_data::{Frame, Spectrum}, +}; use rayon::iter::ParallelIterator; +use super::spectrum_readers::{ + dda_reader::DDASpectrumReader, mini_tdf_reader::MiniTDFReader, +}; + pub enum FileFormat { DFolder(PathBuf), MS2Folder(PathBuf), @@ -120,3 +127,31 @@ pub enum FileFormatError { #[error("MetadataFilesAreMissing")] MetadataFilesAreMissing, } + +impl FileFormat { + pub fn read_single_spectrum(&self, index: usize) -> Spectrum { + match &self { + Self::DFolder(path) => DDASpectrumReader::new( + path.to_str().unwrap_or_default().to_string(), + ) + .read_single_spectrum(index), + Self::MS2Folder(path) => MiniTDFReader::new( + path.to_str().unwrap_or_default().to_string(), + ) + .read_single_spectrum(index), + } + } + + pub fn read_all_spectra(&self) -> Vec { + match &self { + Self::DFolder(path) => DDASpectrumReader::new( + path.to_str().unwrap_or_default().to_string(), + ) + .read_all_spectra(), + Self::MS2Folder(path) => MiniTDFReader::new( + path.to_str().unwrap_or_default().to_string(), + ) + .read_all_spectra(), + } + } +} diff --git a/src/file_readers/spectrum_readers.rs b/src/file_readers/spectrum_readers.rs index 2de5277..2bd6411 100644 --- a/src/file_readers/spectrum_readers.rs +++ b/src/file_readers/spectrum_readers.rs @@ -1,38 +1,2 @@ -use crate::ms_data::Spectrum; - -use self::{dda_reader::DDASpectrumReader, mini_tdf_reader::MiniTDFReader}; - -use super::file_formats::FileFormat; - pub mod dda_reader; pub mod mini_tdf_reader; - -pub trait ReadableSpectra { - fn read_single_spectrum(&self, index: usize) -> Spectrum; - - fn read_all_spectra(&self) -> Vec; -} - -impl FileFormat { - fn unwrap_spectrum_reader(&self) -> Box { - let result = match &self { - Self::DFolder(path) => Box::new(DDASpectrumReader::new( - path.to_str().unwrap_or_default().to_string(), - )) as Box, - Self::MS2Folder(path) => Box::new(MiniTDFReader::new( - path.to_str().unwrap_or_default().to_string(), - )) as Box, - }; - result - } -} - -impl ReadableSpectra for FileFormat { - fn read_single_spectrum(&self, index: usize) -> Spectrum { - self.unwrap_spectrum_reader().read_single_spectrum(index) - } - - fn read_all_spectra(&self) -> Vec { - self.unwrap_spectrum_reader().read_all_spectra() - } -} diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index 1525982..de28a43 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -3,7 +3,7 @@ use std::path::Path; use crate::{ calibration::Tof2MzCalibrator, domain_converters::Tof2MzConverter, - file_readers::ReadableSpectra, + // file_readers::ReadableSpectra, io::readers::{ file_readers::sql_reader::{ pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, @@ -133,13 +133,13 @@ impl DDASpectrumReader { } } -impl ReadableSpectra for DDASpectrumReader { - fn read_single_spectrum(&self, index: usize) -> Spectrum { +impl DDASpectrumReader { + pub fn read_single_spectrum(&self, index: usize) -> Spectrum { let raw_spectrum = self.read_single_raw_spectrum(index); self.process_single_raw_spectrum(raw_spectrum, &self.mz_reader) } - fn read_all_spectra(&self) -> Vec { + pub fn read_all_spectra(&self) -> Vec { let raw_spectra: Vec = (0..self.precursor_reader.len()) .into_par_iter() .map(|index| self.read_single_raw_spectrum(index)) diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index 80e3d74..f62aa5e 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -8,7 +8,7 @@ use crate::{ use std::fs; use { crate::{ - file_readers::ReadableSpectra, + // file_readers::ReadableSpectra, ms_data::{Precursor, Spectrum}, }, rayon::prelude::*, @@ -106,8 +106,8 @@ impl MiniTDFReader { } } -impl ReadableSpectra for MiniTDFReader { - fn read_single_spectrum(&self, index: usize) -> Spectrum { +impl MiniTDFReader { + pub fn read_single_spectrum(&self, index: usize) -> Spectrum { let mut spectrum: Spectrum = Spectrum::create_from_tdf_blob_reader( &self.frame_reader.as_ref().unwrap(), index, @@ -117,7 +117,7 @@ impl ReadableSpectra for MiniTDFReader { spectrum } - fn read_all_spectra(&self) -> Vec { + pub fn read_all_spectra(&self) -> Vec { let size: usize = self.offsets.len(); let mut spectra: Vec = (0..size) .into_par_iter() From ad6d2d513742ee2b43f1f2eafbf41d0dd1ab2eef Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 24 Jun 2024 17:00:07 +0200 Subject: [PATCH 058/109] FEAT: simplied old file reader for easier refactoring --- src/file_readers.rs | 157 +++++++++++++++++- .../{spectrum_readers => }/dda_reader.rs | 0 src/file_readers/file_formats.rs | 157 ------------------ .../{spectrum_readers => }/mini_tdf_reader.rs | 0 src/file_readers/spectrum_readers.rs | 2 - 5 files changed, 151 insertions(+), 165 deletions(-) rename src/file_readers/{spectrum_readers => }/dda_reader.rs (100%) delete mode 100644 src/file_readers/file_formats.rs rename src/file_readers/{spectrum_readers => }/mini_tdf_reader.rs (100%) delete mode 100644 src/file_readers/spectrum_readers.rs diff --git a/src/file_readers.rs b/src/file_readers.rs index 30ec99b..2b31a12 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -1,15 +1,18 @@ -use crate::Error; +mod dda_reader; +mod mini_tdf_reader; -mod file_formats; -mod spectrum_readers; +use std::{fs, path::PathBuf}; +use crate::{io::readers::frame_reader::FrameReader, Error}; + +use dda_reader::DDASpectrumReader; +use mini_tdf_reader::MiniTDFReader; +use rayon::iter::ParallelIterator; use { - self::file_formats::FileFormat, + // self::file_formats::FileFormat, crate::ms_data::{Frame, Spectrum}, }; -pub use file_formats::FileFormatError; - /// A reader to read [frames](crate::ms_data::Frame) and [spectra](crate::ms_data::Spectrum). pub struct FileReader { format: FileFormat, @@ -56,3 +59,145 @@ impl FileReader { self.format.read_all_spectra() } } + +pub enum FileFormat { + DFolder(PathBuf), + MS2Folder(PathBuf), +} + +impl FileFormat { + pub fn parse( + input: impl AsRef, + ) -> Result { + let path: PathBuf = input.as_ref().to_path_buf(); + if !path.exists() { + return Err(FileFormatError::DirectoryDoesNotExist); + } + let extension: &str = path + .extension() + .unwrap_or_default() + .to_str() + .unwrap_or_default(); + let format = match extension { + "d" => Self::DFolder(path), + _ => Self::MS2Folder(path), + }; + format.is_valid()?; + Ok(format) + } + + /// FileFormat is guaranteed to be `valid` if it is constructed + fn is_valid(&self) -> Result<(), FileFormatError> { + match &self { + Self::DFolder(path) => { + if !folder_contains_extension(path, "tdf_bin") { + return Err(FileFormatError::BinaryFilesAreMissing); + } + if !folder_contains_extension(path, "tdf") { + return Err(FileFormatError::MetadataFilesAreMissing); + } + }, + Self::MS2Folder(path) => { + if !folder_contains_extension(path, "bin") { + return Err(FileFormatError::BinaryFilesAreMissing); + } + if !folder_contains_extension(path, "parquet") { + return Err(FileFormatError::MetadataFilesAreMissing); + } + }, + } + Ok(()) + } + + fn get_frame_reader(&self) -> FrameReader { + let path = match &self { + Self::DFolder(path) => path, + Self::MS2Folder(path) => panic!( + "Folder {:} is not frame readable", + path.to_str().unwrap_or_default().to_string() + ), + }; + let frame_reader: FrameReader = FrameReader::new(&path); + frame_reader + } + + pub fn read_single_frame(&self, index: usize) -> Frame { + self.get_frame_reader().get(index) + } + + pub fn read_all_frames(&self) -> Vec { + self.get_frame_reader().parallel_filter(|_| true).collect() + } + + pub fn read_all_ms1_frames(&self) -> Vec { + self.get_frame_reader() + .parallel_filter(|x| x.msms_type == 0) + .collect() + } + + pub fn read_all_ms2_frames(&self) -> Vec { + self.get_frame_reader() + .parallel_filter(|x| x.msms_type != 0) + .collect() + } + + pub fn read_single_spectrum(&self, index: usize) -> Spectrum { + match &self { + Self::DFolder(path) => DDASpectrumReader::new( + path.to_str().unwrap_or_default().to_string(), + ) + .read_single_spectrum(index), + Self::MS2Folder(path) => MiniTDFReader::new( + path.to_str().unwrap_or_default().to_string(), + ) + .read_single_spectrum(index), + } + } + + pub fn read_all_spectra(&self) -> Vec { + match &self { + Self::DFolder(path) => DDASpectrumReader::new( + path.to_str().unwrap_or_default().to_string(), + ) + .read_all_spectra(), + Self::MS2Folder(path) => MiniTDFReader::new( + path.to_str().unwrap_or_default().to_string(), + ) + .read_all_spectra(), + } + } +} + +fn folder_contains_extension( + input: impl AsRef, + extension: &str, +) -> bool { + let folder_path: PathBuf = input.as_ref().to_path_buf(); + if !folder_path.is_dir() { + return false; + } + if let Ok(entries) = fs::read_dir(folder_path) { + for entry in entries { + if let Ok(entry) = entry { + if let Some(ext) = entry.path().extension() { + if ext == extension { + return true; + } + } + } + } + } + false +} + +#[derive(thiserror::Error, Debug)] +pub enum FileFormatError { + #[error("DirectoryDoesNotExist")] + DirectoryDoesNotExist, + #[error("NoParentWithBrukerExtension")] + NoParentWithBrukerExtension, + #[error("BinaryFilesAreMissing")] + BinaryFilesAreMissing, + #[error("MetadataFilesAreMissing")] + MetadataFilesAreMissing, +} diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/dda_reader.rs similarity index 100% rename from src/file_readers/spectrum_readers/dda_reader.rs rename to src/file_readers/dda_reader.rs diff --git a/src/file_readers/file_formats.rs b/src/file_readers/file_formats.rs deleted file mode 100644 index 7bfac9a..0000000 --- a/src/file_readers/file_formats.rs +++ /dev/null @@ -1,157 +0,0 @@ -use std::{fs, path::PathBuf}; - -use crate::{ - io::readers::frame_reader::FrameReader, - ms_data::{Frame, Spectrum}, -}; -use rayon::iter::ParallelIterator; - -use super::spectrum_readers::{ - dda_reader::DDASpectrumReader, mini_tdf_reader::MiniTDFReader, -}; - -pub enum FileFormat { - DFolder(PathBuf), - MS2Folder(PathBuf), -} - -impl FileFormat { - pub fn parse( - input: impl AsRef, - ) -> Result { - let path: PathBuf = input.as_ref().to_path_buf(); - if !path.exists() { - return Err(FileFormatError::DirectoryDoesNotExist); - } - let extension: &str = path - .extension() - .unwrap_or_default() - .to_str() - .unwrap_or_default(); - let format = match extension { - "d" => Self::DFolder(path), - _ => Self::MS2Folder(path), - }; - format.is_valid()?; - Ok(format) - } - - /// FileFormat is guaranteed to be `valid` if it is constructed - fn is_valid(&self) -> Result<(), FileFormatError> { - match &self { - Self::DFolder(path) => { - if !folder_contains_extension(path, "tdf_bin") { - return Err(FileFormatError::BinaryFilesAreMissing); - } - if !folder_contains_extension(path, "tdf") { - return Err(FileFormatError::MetadataFilesAreMissing); - } - }, - Self::MS2Folder(path) => { - if !folder_contains_extension(path, "bin") { - return Err(FileFormatError::BinaryFilesAreMissing); - } - if !folder_contains_extension(path, "parquet") { - return Err(FileFormatError::MetadataFilesAreMissing); - } - }, - } - Ok(()) - } -} - -fn folder_contains_extension( - input: impl AsRef, - extension: &str, -) -> bool { - let folder_path: PathBuf = input.as_ref().to_path_buf(); - if !folder_path.is_dir() { - return false; - } - if let Ok(entries) = fs::read_dir(folder_path) { - for entry in entries { - if let Ok(entry) = entry { - if let Some(ext) = entry.path().extension() { - if ext == extension { - return true; - } - } - } - } - } - false -} - -impl FileFormat { - fn get_frame_reader(&self) -> FrameReader { - let path = match &self { - Self::DFolder(path) => path, - Self::MS2Folder(path) => panic!( - "Folder {:} is not frame readable", - path.to_str().unwrap_or_default().to_string() - ), - }; - let frame_reader: FrameReader = FrameReader::new(&path); - frame_reader - } - - pub fn read_single_frame(&self, index: usize) -> Frame { - self.get_frame_reader().get(index) - } - - pub fn read_all_frames(&self) -> Vec { - self.get_frame_reader().parallel_filter(|_| true).collect() - } - - pub fn read_all_ms1_frames(&self) -> Vec { - self.get_frame_reader() - .parallel_filter(|x| x.msms_type == 0) - .collect() - } - - pub fn read_all_ms2_frames(&self) -> Vec { - self.get_frame_reader() - .parallel_filter(|x| x.msms_type != 0) - .collect() - } -} - -#[derive(thiserror::Error, Debug)] -pub enum FileFormatError { - #[error("DirectoryDoesNotExist")] - DirectoryDoesNotExist, - #[error("NoParentWithBrukerExtension")] - NoParentWithBrukerExtension, - #[error("BinaryFilesAreMissing")] - BinaryFilesAreMissing, - #[error("MetadataFilesAreMissing")] - MetadataFilesAreMissing, -} - -impl FileFormat { - pub fn read_single_spectrum(&self, index: usize) -> Spectrum { - match &self { - Self::DFolder(path) => DDASpectrumReader::new( - path.to_str().unwrap_or_default().to_string(), - ) - .read_single_spectrum(index), - Self::MS2Folder(path) => MiniTDFReader::new( - path.to_str().unwrap_or_default().to_string(), - ) - .read_single_spectrum(index), - } - } - - pub fn read_all_spectra(&self) -> Vec { - match &self { - Self::DFolder(path) => DDASpectrumReader::new( - path.to_str().unwrap_or_default().to_string(), - ) - .read_all_spectra(), - Self::MS2Folder(path) => MiniTDFReader::new( - path.to_str().unwrap_or_default().to_string(), - ) - .read_all_spectra(), - } - } -} diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/mini_tdf_reader.rs similarity index 100% rename from src/file_readers/spectrum_readers/mini_tdf_reader.rs rename to src/file_readers/mini_tdf_reader.rs diff --git a/src/file_readers/spectrum_readers.rs b/src/file_readers/spectrum_readers.rs deleted file mode 100644 index 2bd6411..0000000 --- a/src/file_readers/spectrum_readers.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod dda_reader; -pub mod mini_tdf_reader; From c56ae7df39fd7dea3325427a5139520054cf5ae7 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 10:33:04 +0200 Subject: [PATCH 059/109] DOCS: cleaned up io --- src/file_readers.rs | 2 +- src/file_readers/dda_reader.rs | 6 +----- src/file_readers/mini_tdf_reader.rs | 2 -- src/io/readers.rs | 15 ++++++++++----- src/io/writers.rs | 4 +++- src/main.rs | 2 +- 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/file_readers.rs b/src/file_readers.rs index 2b31a12..5878531 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -3,7 +3,7 @@ mod mini_tdf_reader; use std::{fs, path::PathBuf}; -use crate::{io::readers::frame_reader::FrameReader, Error}; +use crate::{io::readers::FrameReader, Error}; use dda_reader::DDASpectrumReader; use mini_tdf_reader::MiniTDFReader; diff --git a/src/file_readers/dda_reader.rs b/src/file_readers/dda_reader.rs index de28a43..5539823 100644 --- a/src/file_readers/dda_reader.rs +++ b/src/file_readers/dda_reader.rs @@ -8,9 +8,7 @@ use crate::{ file_readers::sql_reader::{ pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, }, - frame_reader::FrameReader, - metadata_reader::MetadataReader, - precursor_reader::PrecursorReader, + FrameReader, MetadataReader, PrecursorReader, }, ms_data::{ Frame, Precursor, RawProcessedSpectrumState, RawSpectrum, @@ -131,9 +129,7 @@ impl DDASpectrumReader { .finalize(self.precursor_reader.get(index), mz_reader); spectrum } -} -impl DDASpectrumReader { pub fn read_single_spectrum(&self, index: usize) -> Spectrum { let raw_spectrum = self.read_single_raw_spectrum(index); self.process_single_raw_spectrum(raw_spectrum, &self.mz_reader) diff --git a/src/file_readers/mini_tdf_reader.rs b/src/file_readers/mini_tdf_reader.rs index f62aa5e..1923aee 100644 --- a/src/file_readers/mini_tdf_reader.rs +++ b/src/file_readers/mini_tdf_reader.rs @@ -104,9 +104,7 @@ impl MiniTDFReader { .unwrap(), ); } -} -impl MiniTDFReader { pub fn read_single_spectrum(&self, index: usize) -> Spectrum { let mut spectrum: Spectrum = Spectrum::create_from_tdf_blob_reader( &self.frame_reader.as_ref().unwrap(), diff --git a/src/io/readers.rs b/src/io/readers.rs index 8614127..b0d1ec0 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -1,5 +1,10 @@ -// pub(crate) mod file_readers; -pub mod file_readers; -pub mod frame_reader; -pub mod metadata_reader; -pub mod precursor_reader; +pub(crate) mod file_readers; +mod frame_reader; +mod metadata_reader; +mod precursor_reader; +mod spectrum_reader; + +pub use frame_reader::*; +pub use metadata_reader::*; +pub use precursor_reader::*; +pub use spectrum_reader::*; diff --git a/src/io/writers.rs b/src/io/writers.rs index b399291..d626864 100644 --- a/src/io/writers.rs +++ b/src/io/writers.rs @@ -1 +1,3 @@ -pub mod mgf; +mod mgf; + +pub use mgf::*; diff --git a/src/main.rs b/src/main.rs index 3bc348a..887c334 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ use std::env; -use timsrust::io::writers::mgf::MGFFormat; +use timsrust::io::writers::MGFFormat; use timsrust::{ms_data::Spectrum, FileReader}; fn quick_test() { From e43da824cfe5f0e15ef4891424504a68b5938688 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 10:39:23 +0200 Subject: [PATCH 060/109] CHORE: implemented MGFEntry as struct rather than impl for spectrum --- src/io/writers/mgf.rs | 22 +++++++++------------- src/main.rs | 4 ++-- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/io/writers/mgf.rs b/src/io/writers/mgf.rs index ab27b3c..e2f2432 100644 --- a/src/io/writers/mgf.rs +++ b/src/io/writers/mgf.rs @@ -4,7 +4,7 @@ use std::path::Path; use crate::ms_data::Spectrum; -pub struct MGFWriter {} +pub struct MGFWriter; impl MGFWriter { pub fn write_spectra(input_file_path: &str, spectra: &Vec) { @@ -19,23 +19,19 @@ impl MGFWriter { File::create(output_file_path).expect("Failed to create file"); for spectrum in spectra { _ = file.write_all("BEGIN IONS\n".as_bytes()); - _ = file.write_all(spectrum.as_mgf_header().as_bytes()); - _ = file.write_all(spectrum.as_mgf_peaks().as_bytes()); + _ = file.write_all(MGFEntry::write_header(spectrum).as_bytes()); + _ = file.write_all(MGFEntry::write_peaks(spectrum).as_bytes()); _ = file.write_all("END IONS\n".as_bytes()); } file.flush().expect("Failed to flush to file"); } } -pub trait MGFFormat { - fn as_mgf_header(&self) -> String; +pub struct MGFEntry; - fn as_mgf_peaks(&self) -> String; -} - -impl MGFFormat for Spectrum { - fn as_mgf_header(&self) -> String { - let precursor = self.precursor; +impl MGFEntry { + pub fn write_header(spectrum: &Spectrum) -> String { + let precursor = spectrum.precursor; let title = precursor.index; let ms2_data = format!( "TITLE=index:{}, im:{:.4}, intensity:{:.4}, frame:{}, ce:{:.4}\nPEPMASS={:.4}\nCHARGE={}\nRT={:.2}\n", @@ -44,10 +40,10 @@ impl MGFFormat for Spectrum { ms2_data } - fn as_mgf_peaks(&self) -> String { + pub fn write_peaks(spectrum: &Spectrum) -> String { let mut ms2_data: String = String::new(); for (mz, intensity) in - self.mz_values.iter().zip(self.intensities.iter()) + spectrum.mz_values.iter().zip(spectrum.intensities.iter()) { ms2_data.push_str(&format!("{:.4}\t{:.0}\n", mz, intensity)); } diff --git a/src/main.rs b/src/main.rs index 887c334..59f86b6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ use std::env; -use timsrust::io::writers::MGFFormat; +use timsrust::io::writers::MGFEntry; use timsrust::{ms_data::Spectrum, FileReader}; fn quick_test() { @@ -14,7 +14,7 @@ fn quick_test() { spectrum_index = 10; } println!("precursor {:?}", dda_spectra[spectrum_index].precursor); - _ = dda_spectra[spectrum_index].as_mgf_header(); + _ = MGFEntry::write_header(&dda_spectra[spectrum_index]); // println!( // "precursor\n{:?}", // dda_spectra[spectrum_index].as_mgf_header() From 7ec524b9b5ad0f69f05d1c6d0657346f48c58e1f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 10:40:16 +0200 Subject: [PATCH 061/109] FIX: typos --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 59f86b6..597efaf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,7 +17,7 @@ fn quick_test() { _ = MGFEntry::write_header(&dda_spectra[spectrum_index]); // println!( // "precursor\n{:?}", - // dda_spectra[spectrum_index].as_mgf_header() + // MGFEntry::write_header(&dda_spectra[spectrum_index]) // ); println!("mz values {:?}", dda_spectra[spectrum_index].mz_values); println!( From 3faadc8816f66969e0b45a9359b176cd16e657e7 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 10:45:23 +0200 Subject: [PATCH 062/109] FEAT: cleaner mgf writing --- src/io/writers/mgf.rs | 11 +++++++++-- src/main.rs | 17 ++++++++--------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/io/writers/mgf.rs b/src/io/writers/mgf.rs index e2f2432..f16b1bc 100644 --- a/src/io/writers/mgf.rs +++ b/src/io/writers/mgf.rs @@ -19,8 +19,7 @@ impl MGFWriter { File::create(output_file_path).expect("Failed to create file"); for spectrum in spectra { _ = file.write_all("BEGIN IONS\n".as_bytes()); - _ = file.write_all(MGFEntry::write_header(spectrum).as_bytes()); - _ = file.write_all(MGFEntry::write_peaks(spectrum).as_bytes()); + _ = file.write_all(MGFEntry::write(spectrum).as_bytes()); _ = file.write_all("END IONS\n".as_bytes()); } file.flush().expect("Failed to flush to file"); @@ -49,4 +48,12 @@ impl MGFEntry { } ms2_data } + + pub fn write(spectrum: &Spectrum) -> String { + format!( + "{}{}", + Self::write_header(spectrum), + Self::write_peaks(spectrum) + ) + } } diff --git a/src/main.rs b/src/main.rs index 597efaf..129041f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,17 +13,16 @@ fn quick_test() { } else { spectrum_index = 10; } - println!("precursor {:?}", dda_spectra[spectrum_index].precursor); - _ = MGFEntry::write_header(&dda_spectra[spectrum_index]); + // println!("precursor {:?}", dda_spectra[spectrum_index].precursor); + // _ = MGFEntry::write_header(&dda_spectra[spectrum_index]); + println!("{}", MGFEntry::write(&dda_spectra[spectrum_index])); + // println!("{}", MGFEntry::write_header(&dda_spectra[spectrum_index])); + // println!("{}", MGFEntry::write_peaks(&dda_spectra[spectrum_index])); + // println!("mz values {:?}", dda_spectra[spectrum_index].mz_values); // println!( - // "precursor\n{:?}", - // MGFEntry::write_header(&dda_spectra[spectrum_index]) + // "intensity values {:?}", + // dda_spectra[spectrum_index].intensities // ); - println!("mz values {:?}", dda_spectra[spectrum_index].mz_values); - println!( - "intensity values {:?}", - dda_spectra[spectrum_index].intensities - ); // println!("{:?}", dda_spectra[spectrum_index].as_mgf_entry()); // MGFWriter::write_spectra(d_folder_name, &dda_spectra); } From 74650406cbf9672c5d3d27c9a597e906ec45e5aa Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 10:47:28 +0200 Subject: [PATCH 063/109] FIX: typo --- src/ms_data/metadata.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ms_data/metadata.rs b/src/ms_data/metadata.rs index 2ade40c..8da4852 100644 --- a/src/ms_data/metadata.rs +++ b/src/ms_data/metadata.rs @@ -4,7 +4,7 @@ use crate::domain_converters::{ Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, }; -/// Metadata from a single run +/// Metadata from a single run. #[derive(Debug, Clone)] pub struct Metadata { pub path: PathBuf, From 73dfbdcc4adf3ff0a6c8f3ef854fbb110f306046 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 11:42:07 +0200 Subject: [PATCH 064/109] FEAT: updated parquet reader --- src/file_readers/mini_tdf_reader.rs | 24 +++++- src/io/readers/file_readers/parquet_reader.rs | 79 +++++++++---------- .../file_readers/parquet_reader/precursors.rs | 50 ++++++++++++ src/io/readers/file_readers/sql_reader.rs | 14 +--- 4 files changed, 111 insertions(+), 56 deletions(-) create mode 100644 src/io/readers/file_readers/parquet_reader/precursors.rs diff --git a/src/file_readers/mini_tdf_reader.rs b/src/file_readers/mini_tdf_reader.rs index 1923aee..810fd07 100644 --- a/src/file_readers/mini_tdf_reader.rs +++ b/src/file_readers/mini_tdf_reader.rs @@ -1,7 +1,7 @@ use crate::{ file_readers::FileFormatError, io::readers::file_readers::{ - parquet_reader::read_parquet_precursors, + parquet_reader::{precursors::ParquetPrecursor, ReadableParquetTable}, tdf_blob_reader::{IndexedTdfBlobReader, TdfBlob}, }, }; @@ -87,9 +87,27 @@ impl MiniTDFReader { } fn read_precursors(&mut self) { - (self.precursors, self.offsets) = - read_parquet_precursors(&self.parquet_file_name); + // (self.precursors, self.offsets) = + // read_parquet_precursors(&self.parquet_file_name); + let parquet_precursors = + ParquetPrecursor::from_parquet_file(&&self.parquet_file_name) + .unwrap(); + self.offsets = parquet_precursors.iter().map(|x| x.offset).collect(); + self.precursors = parquet_precursors + .iter() + .map(|x| Precursor { + mz: x.mz, + rt: x.rt, + im: x.im, + charge: x.charge, + intensity: x.intensity, + index: x.index, + frame_index: x.frame_index, + collision_energy: x.collision_energy, + }) + .collect(); } + fn set_spectrum_reader(&mut self) { let mut path: PathBuf = PathBuf::from(&self.path_name); let ms2_bin_file = diff --git a/src/io/readers/file_readers/parquet_reader.rs b/src/io/readers/file_readers/parquet_reader.rs index ba78f7a..8140c33 100644 --- a/src/io/readers/file_readers/parquet_reader.rs +++ b/src/io/readers/file_readers/parquet_reader.rs @@ -1,46 +1,41 @@ -use parquet::file::reader::{FileReader, SerializedFileReader}; -use std::fs::File; +pub mod precursors; -use crate::ms_data::Precursor; +use parquet::{ + file::reader::{FileReader, SerializedFileReader}, + record::Field, +}; +use std::{fs::File, io, path::Path}; -pub fn read_parquet_precursors( - parquet_file_name: &String, -) -> (Vec, Vec) { - let file: File = File::open(parquet_file_name).unwrap(); - let reader: SerializedFileReader = - SerializedFileReader::new(file).unwrap(); - let mut precursors: Vec = vec![]; - let mut offsets: Vec = vec![]; - for record in reader.get_row_iter(None).unwrap() { - let mut precursor: Precursor = Precursor::default(); - for (name, field) in record.get_column_iter() { - match name.to_string().as_str() { - "Id" => precursor.index = field.to_string().parse().unwrap(), - "RetentionTime" => { - precursor.rt = field.to_string().parse().unwrap() - }, - "MonoisotopicMz" => { - precursor.mz = field.to_string().parse().unwrap_or(0.0) - }, - "Charge" => { - precursor.charge = - field.to_string().parse().unwrap_or(0.0) as usize - }, - "Intensity" => { - precursor.intensity = field.to_string().parse().unwrap() - }, - "ooK0" => precursor.im = field.to_string().parse().unwrap(), - "MS1ParentFrameId" => { - precursor.frame_index = - field.to_string().parse::().unwrap() as usize - }, - "BinaryOffset" => { - offsets.push(field.to_string().parse().unwrap()) - }, - _ => {}, - } - } - precursors.push(precursor); +pub trait ReadableParquetTable { + fn update_from_parquet_file(&mut self, name: &String, field: &Field); + + fn from_parquet_file( + file_name: impl AsRef, + ) -> Result, ParquetError> + where + Self: Sized + Default, + { + let file: File = File::open(file_name)?; + let reader: SerializedFileReader = + SerializedFileReader::new(file)?; + let results: Vec = reader + .get_row_iter(None)? + .map(|record| { + let mut result = Self::default(); + for (name, field) in record.get_column_iter() { + result.update_from_parquet_file(name, field); + } + result + }) + .collect(); + Ok(results) } - (precursors, offsets) +} + +#[derive(Debug, thiserror::Error)] +pub enum ParquetError { + #[error("Cannot read file {0}")] + IO(#[from] io::Error), + #[error("Cannot iterate over row {0}")] + ParquetIO(#[from] parquet::errors::ParquetError), } diff --git a/src/io/readers/file_readers/parquet_reader/precursors.rs b/src/io/readers/file_readers/parquet_reader/precursors.rs new file mode 100644 index 0000000..1a4799b --- /dev/null +++ b/src/io/readers/file_readers/parquet_reader/precursors.rs @@ -0,0 +1,50 @@ +use parquet::record::Field; + +use super::ReadableParquetTable; + +#[derive(Default, Debug, PartialEq)] +pub struct ParquetPrecursor { + pub mz: f64, + pub rt: f64, + pub im: f64, + pub charge: usize, + pub intensity: f64, + pub index: usize, + pub frame_index: usize, + pub offset: u64, + pub collision_energy: f64, +} + +impl ReadableParquetTable for ParquetPrecursor { + fn update_from_parquet_file(&mut self, name: &String, field: &Field) { + match name.to_string().as_str() { + "Id" => self.index = field.to_string().parse().unwrap_or_default(), + "RetentionTime" => { + self.rt = field.to_string().parse().unwrap_or_default() + }, + "MonoisotopicMz" => { + self.mz = field.to_string().parse().unwrap_or_default() + }, + "Charge" => { + self.charge = field.to_string().parse().unwrap_or_default() + }, + "Intensity" => { + self.intensity = field.to_string().parse().unwrap_or_default() + }, + "ooK0" => self.im = field.to_string().parse().unwrap_or_default(), + "MS1ParentFrameId" => { + self.frame_index = + field.to_string().parse::().unwrap_or_default() + as usize + }, + "BinaryOffset" => { + self.offset = field.to_string().parse().unwrap_or_default() + }, + "CollisionEnergy" => { + self.collision_energy = + field.to_string().parse().unwrap_or_default() + }, + _ => {}, + } + } +} diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index 5356b35..76bdb5b 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -3,28 +3,20 @@ pub mod metadata; pub mod pasef_frame_msms; pub mod precursors; -use std::{ - collections::HashMap, - path::{Path, PathBuf}, -}; +use std::{collections::HashMap, path::Path}; use rusqlite::Connection; #[derive(Debug)] pub struct SqlReader { - path: PathBuf, connection: Connection, } impl SqlReader { - pub fn open(file_name: impl AsRef) -> Result { + pub fn open(file_name: impl AsRef) -> Result { let path = file_name.as_ref().to_path_buf(); let connection = Connection::open(&path)?; - Ok(Self { path, connection }) - } - - pub fn get_path(&self) -> PathBuf { - self.path.clone() + Ok(Self { connection }) } pub fn read_column_from_table( From 4a25da92f642efd309615dd64f74a7ae4c8ba3c1 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 16:44:56 +0200 Subject: [PATCH 065/109] FEAT: updated metadata path and precursor reader --- src/file_readers/dda_reader.rs | 10 ++-- src/io/readers/metadata_reader.rs | 4 +- src/io/readers/precursor_reader.rs | 80 ++++++++++---------------- src/io/readers/precursor_reader/tdf.rs | 73 +++++++++++++++++++++++ 4 files changed, 109 insertions(+), 58 deletions(-) create mode 100644 src/io/readers/precursor_reader/tdf.rs diff --git a/src/file_readers/dda_reader.rs b/src/file_readers/dda_reader.rs index 5539823..e29c01c 100644 --- a/src/file_readers/dda_reader.rs +++ b/src/file_readers/dda_reader.rs @@ -38,17 +38,15 @@ pub struct DDASpectrumReader { impl DDASpectrumReader { pub fn new(path_name: String) -> Self { let frame_reader: FrameReader = FrameReader::new(&path_name); - let metadata = MetadataReader::new(&path_name); + let sql_path = Path::new(&path_name).join("analysis.tdf"); + let metadata = MetadataReader::new(&sql_path); let mz_reader: Tof2MzConverter = metadata.mz_converter; - let tdf_sql_reader = - SqlReader::open(Path::new(&path_name).join("analysis.tdf")) - .unwrap(); + let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); let pasef_frames = SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); let ms2_frames: Vec = frame_reader.parallel_filter(|x| x.msms_type != 0).collect(); - let precursor_reader: PrecursorReader = - PrecursorReader::new(&path_name); + let precursor_reader: PrecursorReader = PrecursorReader::new(&sql_path); let pasef_precursors = &pasef_frames.iter().map(|x| x.precursor).collect(); let order: Vec = argsort(&pasef_precursors); diff --git a/src/io/readers/metadata_reader.rs b/src/io/readers/metadata_reader.rs index 01bcf73..fdc0d6a 100644 --- a/src/io/readers/metadata_reader.rs +++ b/src/io/readers/metadata_reader.rs @@ -15,8 +15,8 @@ pub struct MetadataReader; impl MetadataReader { pub fn new(path: impl AsRef) -> Metadata { - let sql_path = path.as_ref().join("analysis.tdf"); - let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); + let sql_path = path.as_ref(); + let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); let sql_metadata: HashMap = SqlMetadata::from_sql_reader(&tdf_sql_reader).unwrap(); Metadata { diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index e1c27b4..9706158 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -1,70 +1,50 @@ +pub mod minitdf; +pub mod tdf; + +use core::fmt; use std::path::{Path, PathBuf}; -use crate::{ - domain_converters::{ - ConvertableDomain, Frame2RtConverter, Scan2ImConverter, - }, - ms_data::Precursor, -}; +use minitdf::MiniTDFPrecursorReader; +use tdf::TDFPrecursorReader; -use super::{ - file_readers::sql_reader::{ - precursors::SqlPrecursor, ReadableSqlTable, SqlReader, - }, - metadata_reader::MetadataReader, -}; +use crate::ms_data::Precursor; -#[derive(Debug)] pub struct PrecursorReader { - path: PathBuf, - sql_precursors: Vec, - rt_converter: Frame2RtConverter, - im_converter: Scan2ImConverter, + precursor_reader: Box, +} + +impl fmt::Debug for PrecursorReader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "PrecursorReader {{ /* fields omitted */ }}") + } } impl PrecursorReader { pub fn new(path: impl AsRef) -> Self { - let sql_path = path.as_ref().join("analysis.tdf"); - let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); - let metadata = MetadataReader::new(&path); - let rt_converter: Frame2RtConverter = metadata.rt_converter; - let im_converter: Scan2ImConverter = metadata.im_converter; - let sql_precursors = - SqlPrecursor::from_sql_reader(&tdf_sql_reader).unwrap(); - Self { - path: path.as_ref().to_path_buf(), - sql_precursors, - rt_converter, - im_converter, - } + let precursor_reader: Box = + match path.as_ref().extension().and_then(|e| e.to_str()) { + Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)), + Some("tdf") => Box::new(TDFPrecursorReader::new(path)), + _ => panic!(), + }; + Self { precursor_reader } } pub fn get(&self, index: usize) -> Precursor { - let mut precursor: Precursor = Precursor::default(); - let sql_precursor = &self.sql_precursors[index]; - let frame_id: usize = sql_precursor.precursor_frame; - let scan_id: f64 = sql_precursor.scan_average; - precursor.mz = sql_precursor.mz; - precursor.rt = self.rt_converter.convert(frame_id as u32); - precursor.im = self.im_converter.convert(scan_id); - precursor.charge = sql_precursor.charge; - precursor.intensity = sql_precursor.intensity; - precursor.index = index + 1; //TODO; - precursor.frame_index = frame_id; - // TODO OPTIMIZE!!!!! - // precursor.collision_energy = pasef_frames - // .iter() - // .find(|&x| x.precursor == index + 1) - // .unwrap() - // .collision_energy; - precursor + self.precursor_reader.get(index) } pub fn get_path(&self) -> PathBuf { - self.path.clone() + self.precursor_reader.get_path() } pub fn len(&self) -> usize { - self.sql_precursors.len() + self.precursor_reader.len() } } + +trait PrecursorReaderTrait: Sync { + fn get(&self, index: usize) -> Precursor; + fn get_path(&self) -> PathBuf; + fn len(&self) -> usize; +} diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs new file mode 100644 index 0000000..95b2417 --- /dev/null +++ b/src/io/readers/precursor_reader/tdf.rs @@ -0,0 +1,73 @@ +use std::path::{Path, PathBuf}; + +use crate::{ + domain_converters::{ + ConvertableDomain, Frame2RtConverter, Scan2ImConverter, + }, + io::readers::{ + file_readers::sql_reader::{ + precursors::SqlPrecursor, ReadableSqlTable, SqlReader, + }, + MetadataReader, + }, + ms_data::Precursor, +}; + +use super::PrecursorReaderTrait; + +#[derive(Debug)] +pub struct TDFPrecursorReader { + path: PathBuf, + sql_precursors: Vec, + rt_converter: Frame2RtConverter, + im_converter: Scan2ImConverter, +} + +impl TDFPrecursorReader { + pub fn new(path: impl AsRef) -> Self { + let sql_path = path.as_ref(); + let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); + let metadata = MetadataReader::new(&path); + let rt_converter: Frame2RtConverter = metadata.rt_converter; + let im_converter: Scan2ImConverter = metadata.im_converter; + let sql_precursors = + SqlPrecursor::from_sql_reader(&tdf_sql_reader).unwrap(); + Self { + path: path.as_ref().to_path_buf(), + sql_precursors, + rt_converter, + im_converter, + } + } +} + +impl PrecursorReaderTrait for TDFPrecursorReader { + fn get(&self, index: usize) -> Precursor { + let mut precursor: Precursor = Precursor::default(); + let sql_precursor = &self.sql_precursors[index]; + let frame_id: usize = sql_precursor.precursor_frame; + let scan_id: f64 = sql_precursor.scan_average; + precursor.mz = sql_precursor.mz; + precursor.rt = self.rt_converter.convert(frame_id as u32); + precursor.im = self.im_converter.convert(scan_id); + precursor.charge = sql_precursor.charge; + precursor.intensity = sql_precursor.intensity; + precursor.index = index + 1; //TODO; + precursor.frame_index = frame_id; + // TODO OPTIMIZE!!!!! + // precursor.collision_energy = pasef_frames + // .iter() + // .find(|&x| x.precursor == index + 1) + // .unwrap() + // .collision_energy; + precursor + } + + fn len(&self) -> usize { + self.sql_precursors.len() + } + + fn get_path(&self) -> PathBuf { + self.path.clone() + } +} From 0389075db03758d7c8cc44864ff07dfdba124b98 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 16:45:29 +0200 Subject: [PATCH 066/109] FEAT: updated minitdf reader to muse precursor reader properly --- src/file_readers/mini_tdf_reader.rs | 101 +++++++++------------ src/io/readers/precursor_reader/minitdf.rs | 51 +++++++++++ 2 files changed, 92 insertions(+), 60 deletions(-) create mode 100644 src/io/readers/precursor_reader/minitdf.rs diff --git a/src/file_readers/mini_tdf_reader.rs b/src/file_readers/mini_tdf_reader.rs index 810fd07..6c6e130 100644 --- a/src/file_readers/mini_tdf_reader.rs +++ b/src/file_readers/mini_tdf_reader.rs @@ -1,8 +1,13 @@ use crate::{ file_readers::FileFormatError, - io::readers::file_readers::{ - parquet_reader::{precursors::ParquetPrecursor, ReadableParquetTable}, - tdf_blob_reader::{IndexedTdfBlobReader, TdfBlob}, + io::readers::{ + file_readers::{ + parquet_reader::{ + precursors::ParquetPrecursor, ReadableParquetTable, + }, + tdf_blob_reader::{IndexedTdfBlobReader, TdfBlob, TdfBlobError}, + }, + PrecursorReader, }, }; use std::fs; @@ -18,10 +23,8 @@ use { #[derive(Debug)] pub struct MiniTDFReader { pub path_name: String, - parquet_file_name: String, - precursors: Vec, - offsets: Vec, - frame_reader: Option, + precursor_reader: PrecursorReader, + blob_reader: IndexedTdfBlobReader, } fn find_ms2spectrum_file( @@ -61,80 +64,58 @@ fn find_ms2spectrum_file( impl MiniTDFReader { pub fn new(path_name: String) -> Self { - let parquet_file_name: String = String::default(); - let precursors: Vec = Vec::default(); - let offsets: Vec = Vec::default(); - let mut reader: MiniTDFReader = MiniTDFReader { + let parquet_file_name = Self::read_parquet_file_name(&path_name); + let precursor_reader = PrecursorReader::new(&parquet_file_name); + let offsets = Self::get_offsets(&parquet_file_name); + let blob_reader = + Self::get_spectrum_reader(&path_name, offsets).unwrap(); + Self { path_name, - parquet_file_name, - precursors, - offsets, - frame_reader: None, - }; - reader.read_parquet_file_name(); - reader.read_precursors(); - reader.set_spectrum_reader(); - reader + precursor_reader, + blob_reader, + } } - fn read_parquet_file_name(&mut self) { - let mut path: PathBuf = PathBuf::from(&self.path_name); + fn read_parquet_file_name(path_name: &String) -> String { + let mut path: PathBuf = PathBuf::from(&path_name); let ms2_parquet_file = - find_ms2spectrum_file(&self.path_name, "parquet".to_owned()) - .unwrap(); + find_ms2spectrum_file(&path_name, "parquet".to_owned()).unwrap(); path.push(ms2_parquet_file); - self.parquet_file_name = path.to_string_lossy().into_owned(); + path.to_string_lossy().into_owned() } - fn read_precursors(&mut self) { - // (self.precursors, self.offsets) = - // read_parquet_precursors(&self.parquet_file_name); + fn get_offsets(parquet_file_name: &String) -> Vec { let parquet_precursors = - ParquetPrecursor::from_parquet_file(&&self.parquet_file_name) - .unwrap(); - self.offsets = parquet_precursors.iter().map(|x| x.offset).collect(); - self.precursors = parquet_precursors + ParquetPrecursor::from_parquet_file(&parquet_file_name).unwrap(); + parquet_precursors .iter() - .map(|x| Precursor { - mz: x.mz, - rt: x.rt, - im: x.im, - charge: x.charge, - intensity: x.intensity, - index: x.index, - frame_index: x.frame_index, - collision_energy: x.collision_energy, - }) - .collect(); + .map(|x| x.offset as usize) + .collect() } - fn set_spectrum_reader(&mut self) { - let mut path: PathBuf = PathBuf::from(&self.path_name); + fn get_spectrum_reader( + path_name: &String, + offsets: Vec, + ) -> Result { + let mut path: PathBuf = PathBuf::from(&path_name); let ms2_bin_file = - find_ms2spectrum_file(&self.path_name, "bin".to_owned()).unwrap(); + find_ms2spectrum_file(&path_name, "bin".to_owned()).unwrap(); path.push(ms2_bin_file); let file_name: String = path.to_string_lossy().into_owned(); - self.frame_reader = Some( - IndexedTdfBlobReader::new( - String::from(&file_name), - self.offsets.iter().map(|x| *x as usize).collect(), - ) - .unwrap(), - ); + IndexedTdfBlobReader::new(String::from(&file_name), offsets) } pub fn read_single_spectrum(&self, index: usize) -> Spectrum { - let mut spectrum: Spectrum = Spectrum::create_from_tdf_blob_reader( - &self.frame_reader.as_ref().unwrap(), - index, - ); - spectrum.precursor = self.precursors[index]; - spectrum.index = self.precursors[index].index; + let mut spectrum: Spectrum = + Spectrum::create_from_tdf_blob_reader(&self.blob_reader, index); + let precursor = self.precursor_reader.get(index); + spectrum.precursor = precursor; + spectrum.index = precursor.index; spectrum } pub fn read_all_spectra(&self) -> Vec { - let size: usize = self.offsets.len(); + let size: usize = self.precursor_reader.len(); let mut spectra: Vec = (0..size) .into_par_iter() .map(|index| self.read_single_spectrum(index)) diff --git a/src/io/readers/precursor_reader/minitdf.rs b/src/io/readers/precursor_reader/minitdf.rs new file mode 100644 index 0000000..c5e1728 --- /dev/null +++ b/src/io/readers/precursor_reader/minitdf.rs @@ -0,0 +1,51 @@ +use std::path::{Path, PathBuf}; + +use crate::{ + io::readers::file_readers::parquet_reader::{ + precursors::ParquetPrecursor, ReadableParquetTable, + }, + ms_data::Precursor, +}; + +use super::PrecursorReaderTrait; + +#[derive(Debug)] +pub struct MiniTDFPrecursorReader { + path: PathBuf, + parquet_precursors: Vec, +} + +impl MiniTDFPrecursorReader { + pub fn new(path: impl AsRef) -> Self { + let parquet_precursors = + ParquetPrecursor::from_parquet_file(&path).unwrap(); + Self { + path: path.as_ref().to_path_buf(), + parquet_precursors, + } + } +} + +impl PrecursorReaderTrait for MiniTDFPrecursorReader { + fn get(&self, index: usize) -> Precursor { + let x = &self.parquet_precursors[index]; + Precursor { + mz: x.mz, + rt: x.rt, + im: x.im, + charge: x.charge, + intensity: x.intensity, + index: x.index, + frame_index: x.frame_index, + collision_energy: x.collision_energy, + } + } + + fn len(&self) -> usize { + self.parquet_precursors.len() + } + + fn get_path(&self) -> PathBuf { + self.path.clone() + } +} From b98b5ab0ca8a0899c4991813d267cabac648529f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 16:52:54 +0200 Subject: [PATCH 067/109] CHORE: cleaned up minitdf spectrum reader --- src/file_readers/mini_tdf_reader.rs | 50 +++++++---------------------- 1 file changed, 12 insertions(+), 38 deletions(-) diff --git a/src/file_readers/mini_tdf_reader.rs b/src/file_readers/mini_tdf_reader.rs index 6c6e130..0b3071d 100644 --- a/src/file_readers/mini_tdf_reader.rs +++ b/src/file_readers/mini_tdf_reader.rs @@ -11,14 +11,7 @@ use crate::{ }, }; use std::fs; -use { - crate::{ - // file_readers::ReadableSpectra, - ms_data::{Precursor, Spectrum}, - }, - rayon::prelude::*, - std::path::PathBuf, -}; +use {crate::ms_data::Spectrum, rayon::prelude::*, std::path::PathBuf}; #[derive(Debug)] pub struct MiniTDFReader { @@ -107,7 +100,7 @@ impl MiniTDFReader { pub fn read_single_spectrum(&self, index: usize) -> Spectrum { let mut spectrum: Spectrum = - Spectrum::create_from_tdf_blob_reader(&self.blob_reader, index); + Self::create_from_tdf_blob_reader(&self, index); let precursor = self.precursor_reader.get(index); spectrum.precursor = precursor; spectrum.index = precursor.index; @@ -127,14 +120,8 @@ impl MiniTDFReader { }); spectra } -} - -impl Spectrum { - fn set_tdf_blob_index(&mut self, index: usize) { - self.index = index; - } - fn update_from_tdf_blob(&mut self, blob: TdfBlob) { + fn update_from_tdf_blob(spectrum: &mut Spectrum, blob: TdfBlob) { let size: usize = blob.len(); let spectrum_data: Vec = (0..size).map(|i| blob.get(i)).collect(); let scan_count: usize = blob.len() / 3; @@ -146,31 +133,18 @@ impl Spectrum { bytemuck::cast_slice::(tof_indices_bytes); let intensity_values: &[f32] = bytemuck::cast_slice::(intensities_bytes); - self.intensities = intensity_values.iter().map(|&x| x as f64).collect(); - self.mz_values = mz_values.to_vec(); + spectrum.intensities = + intensity_values.iter().map(|&x| x as f64).collect(); + spectrum.mz_values = mz_values.to_vec(); } - fn update_from_tdf_blob_reader( - &mut self, - bin_file: &IndexedTdfBlobReader, - index: usize, - ) { - let blob = bin_file.get_blob(index).unwrap(); + fn create_from_tdf_blob_reader(&self, index: usize) -> Spectrum { + let mut spectrum = Spectrum::default(); + spectrum.index = index; + let blob = self.blob_reader.get_blob(index).unwrap(); if !blob.is_empty() { - self.update_from_tdf_blob(blob) + Self::update_from_tdf_blob(&mut spectrum, blob) } - } - - fn create_from_tdf_blob_reader( - bin_file: &IndexedTdfBlobReader, - index: usize, - ) -> Self - where - Self: Default, - { - let mut object = Self::default(); - object.set_tdf_blob_index(index); - object.update_from_tdf_blob_reader(bin_file, index); - object + spectrum } } From 45e37d19f17b5697839dd85edbc47c141016abf5 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 16:56:41 +0200 Subject: [PATCH 068/109] CHORE: formatting of code --- src/file_readers/mini_tdf_reader.rs | 63 +++++++++---------- .../file_readers/sql_reader/metadata.rs | 2 +- 2 files changed, 29 insertions(+), 36 deletions(-) diff --git a/src/file_readers/mini_tdf_reader.rs b/src/file_readers/mini_tdf_reader.rs index 0b3071d..e0f3098 100644 --- a/src/file_readers/mini_tdf_reader.rs +++ b/src/file_readers/mini_tdf_reader.rs @@ -20,41 +20,6 @@ pub struct MiniTDFReader { blob_reader: IndexedTdfBlobReader, } -fn find_ms2spectrum_file( - ms2_dir_path: &str, - extension: String, -) -> Result { - let files = fs::read_dir(ms2_dir_path).unwrap(); - for file in files { - let filename = file - .unwrap() - .path() - .file_name() - .unwrap() - .to_str() - .unwrap() - .to_owned(); - if filename - .ends_with(std::format!("ms2spectrum.{}", extension).as_str()) - { - return Ok(filename); - } - } - let err = match extension.as_str() { - "parquet" => FileFormatError::MetadataFilesAreMissing, - "bin" => FileFormatError::BinaryFilesAreMissing, - _ => FileFormatError::BinaryFilesAreMissing, - }; - println!( - "{}", - format!( - "No '*.ms2spectrum.{}' file found in '{}'", - extension, ms2_dir_path - ) - ); - return Err(err); -} - impl MiniTDFReader { pub fn new(path_name: String) -> Self { let parquet_file_name = Self::read_parquet_file_name(&path_name); @@ -148,3 +113,31 @@ impl MiniTDFReader { spectrum } } + +fn find_ms2spectrum_file( + ms2_dir_path: &str, + extension: String, +) -> Result { + let files = fs::read_dir(ms2_dir_path).unwrap(); + for file in files { + let filename = file + .unwrap() + .path() + .file_name() + .unwrap() + .to_str() + .unwrap() + .to_owned(); + if filename + .ends_with(std::format!("ms2spectrum.{}", extension).as_str()) + { + return Ok(filename); + } + } + let err = match extension.as_str() { + "parquet" => FileFormatError::MetadataFilesAreMissing, + "bin" => FileFormatError::BinaryFilesAreMissing, + _ => FileFormatError::BinaryFilesAreMissing, + }; + return Err(err); +} diff --git a/src/io/readers/file_readers/sql_reader/metadata.rs b/src/io/readers/file_readers/sql_reader/metadata.rs index 920791f..fb045ba 100644 --- a/src/io/readers/file_readers/sql_reader/metadata.rs +++ b/src/io/readers/file_readers/sql_reader/metadata.rs @@ -1,6 +1,6 @@ use super::ReadableSqlHashMap; -pub struct SqlMetadata {} +pub struct SqlMetadata; impl ReadableSqlHashMap for SqlMetadata { fn get_sql_query() -> String { From 5717c6db9cfcb58c992bf05dbbd91e8e2a1d1aaf Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 17:10:09 +0200 Subject: [PATCH 069/109] FEAT: added dummy code to start refactoring spectrum readers --- src/io/readers/spectrum_reader.rs | 50 +++++++++++++++++++++++ src/io/readers/spectrum_reader/minitdf.rs | 32 +++++++++++++++ src/io/readers/spectrum_reader/tdf.rs | 32 +++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 src/io/readers/spectrum_reader.rs create mode 100644 src/io/readers/spectrum_reader/minitdf.rs create mode 100644 src/io/readers/spectrum_reader/tdf.rs diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs new file mode 100644 index 0000000..bf813bc --- /dev/null +++ b/src/io/readers/spectrum_reader.rs @@ -0,0 +1,50 @@ +pub mod minitdf; +pub mod tdf; + +use core::fmt; +use std::path::{Path, PathBuf}; + +use minitdf::MiniTDFSpectrumReader; +use tdf::TDFSpectrumReader; + +use crate::ms_data::Spectrum; + +pub struct SpectrumReader { + spectrum_reader: Box, +} + +impl fmt::Debug for SpectrumReader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "SpectrumReader {{ /* fields omitted */ }}") + } +} + +impl SpectrumReader { + pub fn new(path: impl AsRef) -> Self { + let spectrum_reader: Box = + match path.as_ref().extension().and_then(|e| e.to_str()) { + Some("parquet") => Box::new(MiniTDFSpectrumReader::new(path)), + Some("tdf") => Box::new(TDFSpectrumReader::new(path)), + _ => panic!(), + }; + Self { spectrum_reader } + } + + pub fn get(&self, index: usize) -> Spectrum { + self.spectrum_reader.get(index) + } + + pub fn get_path(&self) -> PathBuf { + self.spectrum_reader.get_path() + } + + pub fn len(&self) -> usize { + self.spectrum_reader.len() + } +} + +trait SpectrumReaderTrait: Sync { + fn get(&self, index: usize) -> Spectrum; + fn get_path(&self) -> PathBuf; + fn len(&self) -> usize; +} diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs new file mode 100644 index 0000000..8b32992 --- /dev/null +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -0,0 +1,32 @@ +use std::path::{Path, PathBuf}; + +use crate::ms_data::Spectrum; + +use super::SpectrumReaderTrait; + +#[derive(Debug)] +pub struct MiniTDFSpectrumReader { + path: PathBuf, +} + +impl MiniTDFSpectrumReader { + pub fn new(path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + } + } +} + +impl SpectrumReaderTrait for MiniTDFSpectrumReader { + fn get(&self, index: usize) -> Spectrum { + Spectrum::default() + } + + fn len(&self) -> usize { + 0 //TODO + } + + fn get_path(&self) -> PathBuf { + self.path.clone() + } +} diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs new file mode 100644 index 0000000..d3bb41f --- /dev/null +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -0,0 +1,32 @@ +use std::path::{Path, PathBuf}; + +use crate::ms_data::Spectrum; + +use super::SpectrumReaderTrait; + +#[derive(Debug)] +pub struct TDFSpectrumReader { + path: PathBuf, +} + +impl TDFSpectrumReader { + pub fn new(path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + } + } +} + +impl SpectrumReaderTrait for TDFSpectrumReader { + fn get(&self, index: usize) -> Spectrum { + Spectrum::default() + } + + fn len(&self) -> usize { + 0 //TODO + } + + fn get_path(&self) -> PathBuf { + self.path.clone() + } +} From 5db16b3d9429ea829225e569c16f883d133fd492 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 17:11:22 +0200 Subject: [PATCH 070/109] CHORE: changed visibility of sub readers --- src/io/readers/precursor_reader.rs | 4 ++-- src/io/readers/spectrum_reader.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index 9706158..23a804b 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -1,5 +1,5 @@ -pub mod minitdf; -pub mod tdf; +mod minitdf; +mod tdf; use core::fmt; use std::path::{Path, PathBuf}; diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index bf813bc..58fef84 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -1,5 +1,5 @@ -pub mod minitdf; -pub mod tdf; +mod minitdf; +mod tdf; use core::fmt; use std::path::{Path, PathBuf}; From dd768d3951ea23e8b1aabbb40ff119bdd6984bfe Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 17:23:57 +0200 Subject: [PATCH 071/109] CHORE: formatting --- src/file_readers/dda_reader.rs | 6 ++-- src/file_readers/mini_tdf_reader.rs | 18 ++++------ src/io/readers/file_readers/parquet_reader.rs | 12 +++---- .../file_readers/parquet_reader/precursors.rs | 36 ++++++------------- 4 files changed, 26 insertions(+), 46 deletions(-) diff --git a/src/file_readers/dda_reader.rs b/src/file_readers/dda_reader.rs index e29c01c..0d0db1b 100644 --- a/src/file_readers/dda_reader.rs +++ b/src/file_readers/dda_reader.rs @@ -30,9 +30,9 @@ pub struct DDASpectrumReader { precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, ms2_frames: Vec, - pub pasef_frames: Vec, - pub order: Vec, - pub offsets: Vec, + pasef_frames: Vec, + order: Vec, + offsets: Vec, } impl DDASpectrumReader { diff --git a/src/file_readers/mini_tdf_reader.rs b/src/file_readers/mini_tdf_reader.rs index e0f3098..5978c9f 100644 --- a/src/file_readers/mini_tdf_reader.rs +++ b/src/file_readers/mini_tdf_reader.rs @@ -64,8 +64,12 @@ impl MiniTDFReader { } pub fn read_single_spectrum(&self, index: usize) -> Spectrum { - let mut spectrum: Spectrum = - Self::create_from_tdf_blob_reader(&self, index); + let mut spectrum = Spectrum::default(); + spectrum.index = index; + let blob = self.blob_reader.get_blob(index).unwrap(); + if !blob.is_empty() { + Self::update_from_tdf_blob(&mut spectrum, blob) + } let precursor = self.precursor_reader.get(index); spectrum.precursor = precursor; spectrum.index = precursor.index; @@ -102,16 +106,6 @@ impl MiniTDFReader { intensity_values.iter().map(|&x| x as f64).collect(); spectrum.mz_values = mz_values.to_vec(); } - - fn create_from_tdf_blob_reader(&self, index: usize) -> Spectrum { - let mut spectrum = Spectrum::default(); - spectrum.index = index; - let blob = self.blob_reader.get_blob(index).unwrap(); - if !blob.is_empty() { - Self::update_from_tdf_blob(&mut spectrum, blob) - } - spectrum - } } fn find_ms2spectrum_file( diff --git a/src/io/readers/file_readers/parquet_reader.rs b/src/io/readers/file_readers/parquet_reader.rs index 8140c33..7409826 100644 --- a/src/io/readers/file_readers/parquet_reader.rs +++ b/src/io/readers/file_readers/parquet_reader.rs @@ -1,13 +1,10 @@ pub mod precursors; -use parquet::{ - file::reader::{FileReader, SerializedFileReader}, - record::Field, -}; +use parquet::file::reader::{FileReader, SerializedFileReader}; use std::{fs::File, io, path::Path}; pub trait ReadableParquetTable { - fn update_from_parquet_file(&mut self, name: &String, field: &Field); + fn update_from_parquet_file(&mut self, name: &str, field: String); fn from_parquet_file( file_name: impl AsRef, @@ -23,7 +20,10 @@ pub trait ReadableParquetTable { .map(|record| { let mut result = Self::default(); for (name, field) in record.get_column_iter() { - result.update_from_parquet_file(name, field); + result.update_from_parquet_file( + name.to_string().as_str(), + field.to_string(), + ); } result }) diff --git a/src/io/readers/file_readers/parquet_reader/precursors.rs b/src/io/readers/file_readers/parquet_reader/precursors.rs index 1a4799b..19ba985 100644 --- a/src/io/readers/file_readers/parquet_reader/precursors.rs +++ b/src/io/readers/file_readers/parquet_reader/precursors.rs @@ -1,5 +1,3 @@ -use parquet::record::Field; - use super::ReadableParquetTable; #[derive(Default, Debug, PartialEq)] @@ -16,33 +14,21 @@ pub struct ParquetPrecursor { } impl ReadableParquetTable for ParquetPrecursor { - fn update_from_parquet_file(&mut self, name: &String, field: &Field) { - match name.to_string().as_str() { - "Id" => self.index = field.to_string().parse().unwrap_or_default(), - "RetentionTime" => { - self.rt = field.to_string().parse().unwrap_or_default() - }, - "MonoisotopicMz" => { - self.mz = field.to_string().parse().unwrap_or_default() - }, - "Charge" => { - self.charge = field.to_string().parse().unwrap_or_default() - }, - "Intensity" => { - self.intensity = field.to_string().parse().unwrap_or_default() - }, - "ooK0" => self.im = field.to_string().parse().unwrap_or_default(), + fn update_from_parquet_file(&mut self, name: &str, field: String) { + match name { + "Id" => self.index = field.parse().unwrap_or_default(), + "RetentionTime" => self.rt = field.parse().unwrap_or_default(), + "MonoisotopicMz" => self.mz = field.parse().unwrap_or_default(), + "Charge" => self.charge = field.parse().unwrap_or_default(), + "Intensity" => self.intensity = field.parse().unwrap_or_default(), + "ooK0" => self.im = field.parse().unwrap_or_default(), "MS1ParentFrameId" => { self.frame_index = - field.to_string().parse::().unwrap_or_default() - as usize - }, - "BinaryOffset" => { - self.offset = field.to_string().parse().unwrap_or_default() + field.parse::().unwrap_or_default() as usize }, + "BinaryOffset" => self.offset = field.parse().unwrap_or_default(), "CollisionEnergy" => { - self.collision_energy = - field.to_string().parse().unwrap_or_default() + self.collision_energy = field.parse().unwrap_or_default() }, _ => {}, } From f87273d7271bf4f224e1277519e4637029176c31 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 25 Jun 2024 17:33:44 +0200 Subject: [PATCH 072/109] CHORE: simplify parquet table reading --- src/io/readers/file_readers/parquet_reader.rs | 8 +++++-- .../file_readers/parquet_reader/precursors.rs | 23 +++++++++---------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/io/readers/file_readers/parquet_reader.rs b/src/io/readers/file_readers/parquet_reader.rs index 7409826..c881761 100644 --- a/src/io/readers/file_readers/parquet_reader.rs +++ b/src/io/readers/file_readers/parquet_reader.rs @@ -1,10 +1,14 @@ pub mod precursors; use parquet::file::reader::{FileReader, SerializedFileReader}; -use std::{fs::File, io, path::Path}; +use std::{fs::File, io, path::Path, str::FromStr}; pub trait ReadableParquetTable { - fn update_from_parquet_file(&mut self, name: &str, field: String); + fn update_from_parquet_file(&mut self, key: &str, value: String); + + fn parse_default_field(field: String) -> T { + field.parse().unwrap_or_default() + } fn from_parquet_file( file_name: impl AsRef, diff --git a/src/io/readers/file_readers/parquet_reader/precursors.rs b/src/io/readers/file_readers/parquet_reader/precursors.rs index 19ba985..3ce5302 100644 --- a/src/io/readers/file_readers/parquet_reader/precursors.rs +++ b/src/io/readers/file_readers/parquet_reader/precursors.rs @@ -14,21 +14,20 @@ pub struct ParquetPrecursor { } impl ReadableParquetTable for ParquetPrecursor { - fn update_from_parquet_file(&mut self, name: &str, field: String) { - match name { - "Id" => self.index = field.parse().unwrap_or_default(), - "RetentionTime" => self.rt = field.parse().unwrap_or_default(), - "MonoisotopicMz" => self.mz = field.parse().unwrap_or_default(), - "Charge" => self.charge = field.parse().unwrap_or_default(), - "Intensity" => self.intensity = field.parse().unwrap_or_default(), - "ooK0" => self.im = field.parse().unwrap_or_default(), + fn update_from_parquet_file(&mut self, key: &str, value: String) { + match key { + "Id" => self.index = Self::parse_default_field(value), + "RetentionTime" => self.rt = Self::parse_default_field(value), + "MonoisotopicMz" => self.mz = Self::parse_default_field(value), + "Charge" => self.charge = Self::parse_default_field(value), + "Intensity" => self.intensity = Self::parse_default_field(value), + "ooK0" => self.im = Self::parse_default_field(value), "MS1ParentFrameId" => { - self.frame_index = - field.parse::().unwrap_or_default() as usize + self.frame_index = Self::parse_default_field(value) }, - "BinaryOffset" => self.offset = field.parse().unwrap_or_default(), + "BinaryOffset" => self.offset = Self::parse_default_field(value), "CollisionEnergy" => { - self.collision_energy = field.parse().unwrap_or_default() + self.collision_energy = Self::parse_default_field(value) }, _ => {}, } From 76f6f9b639c9445953de8c8f2ebc3e3d43b3d34f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 26 Jun 2024 11:42:27 +0200 Subject: [PATCH 073/109] FEATL simplifying frame reader --- src/file_readers.rs | 70 +++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 43 deletions(-) diff --git a/src/file_readers.rs b/src/file_readers.rs index 5878531..1caae44 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -5,17 +5,16 @@ use std::{fs, path::PathBuf}; use crate::{io::readers::FrameReader, Error}; +use crate::ms_data::{Frame, Spectrum}; use dda_reader::DDASpectrumReader; use mini_tdf_reader::MiniTDFReader; use rayon::iter::ParallelIterator; -use { - // self::file_formats::FileFormat, - crate::ms_data::{Frame, Spectrum}, -}; /// A reader to read [frames](crate::ms_data::Frame) and [spectra](crate::ms_data::Spectrum). pub struct FileReader { format: FileFormat, + frame_reader: Option, + // spectrum_reader: Option, } ///NOTE: The functions to read a single frame or spectrum are not optimized. @@ -24,27 +23,44 @@ pub struct FileReader { impl FileReader { pub fn new>(path_name: T) -> Result { let format: FileFormat = FileFormat::parse(path_name)?; - Ok(Self { format }) + let frame_reader = match &format { + FileFormat::DFolder(path) => Some(FrameReader::new(&path)), + FileFormat::MS2Folder(_) => None, + }; + Ok(Self { + format, + frame_reader, + }) } pub fn read_single_frame(&self, index: usize) -> Frame { - self.format.read_single_frame(index) + self.frame_reader.as_ref().unwrap().get(index) } pub fn read_all_frames(&self) -> Vec { - self.format.read_all_frames() + self.frame_reader + .as_ref() + .unwrap() + .parallel_filter(|_| true) + .collect() } - /// NOTE: The returned vec contains all frames to not disrupt indexing. /// MS2 frames are set to unknown and not read. pub fn read_all_ms1_frames(&self) -> Vec { - self.format.read_all_ms1_frames() + self.frame_reader + .as_ref() + .unwrap() + .parallel_filter(|x| x.msms_type == 0) + .collect() } - /// NOTE: The returned vec contains all frames to not disrupt indexing. /// MS1 frames are set to unknown and not read. pub fn read_all_ms2_frames(&self) -> Vec { - self.format.read_all_ms2_frames() + self.frame_reader + .as_ref() + .unwrap() + .parallel_filter(|x| x.msms_type != 0) + .collect() } pub fn read_single_spectrum(&self, index: usize) -> Spectrum { @@ -109,38 +125,6 @@ impl FileFormat { Ok(()) } - fn get_frame_reader(&self) -> FrameReader { - let path = match &self { - Self::DFolder(path) => path, - Self::MS2Folder(path) => panic!( - "Folder {:} is not frame readable", - path.to_str().unwrap_or_default().to_string() - ), - }; - let frame_reader: FrameReader = FrameReader::new(&path); - frame_reader - } - - pub fn read_single_frame(&self, index: usize) -> Frame { - self.get_frame_reader().get(index) - } - - pub fn read_all_frames(&self) -> Vec { - self.get_frame_reader().parallel_filter(|_| true).collect() - } - - pub fn read_all_ms1_frames(&self) -> Vec { - self.get_frame_reader() - .parallel_filter(|x| x.msms_type == 0) - .collect() - } - - pub fn read_all_ms2_frames(&self) -> Vec { - self.get_frame_reader() - .parallel_filter(|x| x.msms_type != 0) - .collect() - } - pub fn read_single_spectrum(&self, index: usize) -> Spectrum { match &self { Self::DFolder(path) => DDASpectrumReader::new( From e806627be023b24a4ecd0712591751a44d8fa05c Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 26 Jun 2024 15:27:06 +0200 Subject: [PATCH 074/109] FEAT: fully refactored minitdf spectrum reader --- src/file_readers.rs | 86 ++++++-------- src/file_readers/mini_tdf_reader.rs | 137 ---------------------- src/io/readers/spectrum_reader.rs | 16 ++- src/io/readers/spectrum_reader/minitdf.rs | 81 ++++++++++++- 4 files changed, 129 insertions(+), 191 deletions(-) delete mode 100644 src/file_readers/mini_tdf_reader.rs diff --git a/src/file_readers.rs b/src/file_readers.rs index 1caae44..21341c6 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -1,24 +1,24 @@ mod dda_reader; -mod mini_tdf_reader; use std::{fs, path::PathBuf}; +use crate::io::readers::file_readers::sql_reader::frames::SqlFrame; +use crate::io::readers::SpectrumReader; use crate::{io::readers::FrameReader, Error}; use crate::ms_data::{Frame, Spectrum}; use dda_reader::DDASpectrumReader; -use mini_tdf_reader::MiniTDFReader; use rayon::iter::ParallelIterator; /// A reader to read [frames](crate::ms_data::Frame) and [spectra](crate::ms_data::Spectrum). pub struct FileReader { - format: FileFormat, frame_reader: Option, - // spectrum_reader: Option, + tdf_spectrum_reader: Option, + minitdf_spectrum_reader: Option, } -///NOTE: The functions to read a single frame or spectrum are not optimized. -/// In case many frames or spectra are required, it is best to use +///NOTE: The function to read a single spectrum is not optimized. +/// In case many spectra are required, it is best to use /// any of the functions that directly return a `Vec`. impl FileReader { pub fn new>(path_name: T) -> Result { @@ -27,9 +27,20 @@ impl FileReader { FileFormat::DFolder(path) => Some(FrameReader::new(&path)), FileFormat::MS2Folder(_) => None, }; + let tdf_spectrum_reader = match &format { + FileFormat::DFolder(path) => Some(DDASpectrumReader::new( + path.to_str().unwrap_or_default().to_string(), + )), + FileFormat::MS2Folder(_) => None, + }; + let minitdf_spectrum_reader = match &format { + FileFormat::DFolder(_) => None, + FileFormat::MS2Folder(path) => Some(SpectrumReader::new(path)), + }; Ok(Self { - format, frame_reader, + tdf_spectrum_reader, + minitdf_spectrum_reader, }) } @@ -37,34 +48,34 @@ impl FileReader { self.frame_reader.as_ref().unwrap().get(index) } - pub fn read_all_frames(&self) -> Vec { + fn read_multiple_frames<'a, F: Fn(&SqlFrame) -> bool + Sync + Send + 'a>( + &self, + predicate: F, + ) -> Vec { self.frame_reader .as_ref() .unwrap() - .parallel_filter(|_| true) + .parallel_filter(|x| predicate(x)) .collect() } - /// MS2 frames are set to unknown and not read. + pub fn read_all_frames(&self) -> Vec { + self.read_multiple_frames(|_| true) + } + pub fn read_all_ms1_frames(&self) -> Vec { - self.frame_reader - .as_ref() - .unwrap() - .parallel_filter(|x| x.msms_type == 0) - .collect() + self.read_multiple_frames(|x| x.msms_type == 0) } - /// MS1 frames are set to unknown and not read. pub fn read_all_ms2_frames(&self) -> Vec { - self.frame_reader - .as_ref() - .unwrap() - .parallel_filter(|x| x.msms_type != 0) - .collect() + self.read_multiple_frames(|x| x.msms_type != 0) } pub fn read_single_spectrum(&self, index: usize) -> Spectrum { - self.format.read_single_spectrum(index) + match &self.tdf_spectrum_reader { + Some(reader) => reader.read_single_spectrum(index), + None => self.minitdf_spectrum_reader.as_ref().unwrap().get(index), + } } ///NOTE: ddaPASEF MS2 spectra are automatically calibrated with @@ -72,7 +83,10 @@ impl FileReader { /// Hence, reading spectra individually through `read_single_spectrum` /// might yield slightly different mz values. pub fn read_all_spectra(&self) -> Vec { - self.format.read_all_spectra() + match &self.tdf_spectrum_reader { + Some(reader) => reader.read_all_spectra(), + None => self.minitdf_spectrum_reader.as_ref().unwrap().get_all(), + } } } @@ -124,32 +138,6 @@ impl FileFormat { } Ok(()) } - - pub fn read_single_spectrum(&self, index: usize) -> Spectrum { - match &self { - Self::DFolder(path) => DDASpectrumReader::new( - path.to_str().unwrap_or_default().to_string(), - ) - .read_single_spectrum(index), - Self::MS2Folder(path) => MiniTDFReader::new( - path.to_str().unwrap_or_default().to_string(), - ) - .read_single_spectrum(index), - } - } - - pub fn read_all_spectra(&self) -> Vec { - match &self { - Self::DFolder(path) => DDASpectrumReader::new( - path.to_str().unwrap_or_default().to_string(), - ) - .read_all_spectra(), - Self::MS2Folder(path) => MiniTDFReader::new( - path.to_str().unwrap_or_default().to_string(), - ) - .read_all_spectra(), - } - } } fn folder_contains_extension( diff --git a/src/file_readers/mini_tdf_reader.rs b/src/file_readers/mini_tdf_reader.rs deleted file mode 100644 index 5978c9f..0000000 --- a/src/file_readers/mini_tdf_reader.rs +++ /dev/null @@ -1,137 +0,0 @@ -use crate::{ - file_readers::FileFormatError, - io::readers::{ - file_readers::{ - parquet_reader::{ - precursors::ParquetPrecursor, ReadableParquetTable, - }, - tdf_blob_reader::{IndexedTdfBlobReader, TdfBlob, TdfBlobError}, - }, - PrecursorReader, - }, -}; -use std::fs; -use {crate::ms_data::Spectrum, rayon::prelude::*, std::path::PathBuf}; - -#[derive(Debug)] -pub struct MiniTDFReader { - pub path_name: String, - precursor_reader: PrecursorReader, - blob_reader: IndexedTdfBlobReader, -} - -impl MiniTDFReader { - pub fn new(path_name: String) -> Self { - let parquet_file_name = Self::read_parquet_file_name(&path_name); - let precursor_reader = PrecursorReader::new(&parquet_file_name); - let offsets = Self::get_offsets(&parquet_file_name); - let blob_reader = - Self::get_spectrum_reader(&path_name, offsets).unwrap(); - Self { - path_name, - precursor_reader, - blob_reader, - } - } - - fn read_parquet_file_name(path_name: &String) -> String { - let mut path: PathBuf = PathBuf::from(&path_name); - let ms2_parquet_file = - find_ms2spectrum_file(&path_name, "parquet".to_owned()).unwrap(); - path.push(ms2_parquet_file); - path.to_string_lossy().into_owned() - } - - fn get_offsets(parquet_file_name: &String) -> Vec { - let parquet_precursors = - ParquetPrecursor::from_parquet_file(&parquet_file_name).unwrap(); - parquet_precursors - .iter() - .map(|x| x.offset as usize) - .collect() - } - - fn get_spectrum_reader( - path_name: &String, - offsets: Vec, - ) -> Result { - let mut path: PathBuf = PathBuf::from(&path_name); - let ms2_bin_file = - find_ms2spectrum_file(&path_name, "bin".to_owned()).unwrap(); - path.push(ms2_bin_file); - let file_name: String = path.to_string_lossy().into_owned(); - IndexedTdfBlobReader::new(String::from(&file_name), offsets) - } - - pub fn read_single_spectrum(&self, index: usize) -> Spectrum { - let mut spectrum = Spectrum::default(); - spectrum.index = index; - let blob = self.blob_reader.get_blob(index).unwrap(); - if !blob.is_empty() { - Self::update_from_tdf_blob(&mut spectrum, blob) - } - let precursor = self.precursor_reader.get(index); - spectrum.precursor = precursor; - spectrum.index = precursor.index; - spectrum - } - - pub fn read_all_spectra(&self) -> Vec { - let size: usize = self.precursor_reader.len(); - let mut spectra: Vec = (0..size) - .into_par_iter() - .map(|index| self.read_single_spectrum(index)) - .collect(); - spectra.sort_by(|a, b| { - let x = b.precursor.index as f64; - let y = a.precursor.index as f64; - y.total_cmp(&x) - }); - spectra - } - - fn update_from_tdf_blob(spectrum: &mut Spectrum, blob: TdfBlob) { - let size: usize = blob.len(); - let spectrum_data: Vec = (0..size).map(|i| blob.get(i)).collect(); - let scan_count: usize = blob.len() / 3; - let tof_indices_bytes: &[u32] = - &spectrum_data[..scan_count as usize * 2]; - let intensities_bytes: &[u32] = - &spectrum_data[scan_count as usize * 2..]; - let mz_values: &[f64] = - bytemuck::cast_slice::(tof_indices_bytes); - let intensity_values: &[f32] = - bytemuck::cast_slice::(intensities_bytes); - spectrum.intensities = - intensity_values.iter().map(|&x| x as f64).collect(); - spectrum.mz_values = mz_values.to_vec(); - } -} - -fn find_ms2spectrum_file( - ms2_dir_path: &str, - extension: String, -) -> Result { - let files = fs::read_dir(ms2_dir_path).unwrap(); - for file in files { - let filename = file - .unwrap() - .path() - .file_name() - .unwrap() - .to_str() - .unwrap() - .to_owned(); - if filename - .ends_with(std::format!("ms2spectrum.{}", extension).as_str()) - { - return Ok(filename); - } - } - let err = match extension.as_str() { - "parquet" => FileFormatError::MetadataFilesAreMissing, - "bin" => FileFormatError::BinaryFilesAreMissing, - _ => FileFormatError::BinaryFilesAreMissing, - }; - return Err(err); -} diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 58fef84..fe084ad 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -5,6 +5,7 @@ use core::fmt; use std::path::{Path, PathBuf}; use minitdf::MiniTDFSpectrumReader; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; use tdf::TDFSpectrumReader; use crate::ms_data::Spectrum; @@ -23,7 +24,7 @@ impl SpectrumReader { pub fn new(path: impl AsRef) -> Self { let spectrum_reader: Box = match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("parquet") => Box::new(MiniTDFSpectrumReader::new(path)), + Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)), Some("tdf") => Box::new(TDFSpectrumReader::new(path)), _ => panic!(), }; @@ -34,6 +35,19 @@ impl SpectrumReader { self.spectrum_reader.get(index) } + pub fn get_all(&self) -> Vec { + let mut spectra: Vec = (0..self.len()) + .into_par_iter() + .map(|index| self.get(index)) + .collect(); + spectra.sort_by(|a, b| { + let x = b.precursor.index as f64; + let y = a.precursor.index as f64; + y.total_cmp(&x) + }); + spectra + } + pub fn get_path(&self) -> PathBuf { self.spectrum_reader.get_path() } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 8b32992..2daa548 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -1,32 +1,105 @@ -use std::path::{Path, PathBuf}; +use std::{ + fs, + path::{Path, PathBuf}, +}; -use crate::ms_data::Spectrum; +use crate::{ + io::readers::{ + file_readers::{ + parquet_reader::{ + precursors::ParquetPrecursor, ReadableParquetTable, + }, + tdf_blob_reader::IndexedTdfBlobReader, + }, + PrecursorReader, + }, + ms_data::Spectrum, +}; use super::SpectrumReaderTrait; #[derive(Debug)] pub struct MiniTDFSpectrumReader { path: PathBuf, + precursor_reader: PrecursorReader, + blob_reader: IndexedTdfBlobReader, } impl MiniTDFSpectrumReader { pub fn new(path: impl AsRef) -> Self { + let parquet_file_name = + find_extension(&path, "ms2spectrum.parquet").unwrap(); + let precursor_reader = PrecursorReader::new(&parquet_file_name); + let offsets = Self::get_offsets(&parquet_file_name); + let bin_file_name = find_extension(&path, "bin").unwrap(); + let blob_reader = + IndexedTdfBlobReader::new(&bin_file_name, offsets).unwrap(); Self { path: path.as_ref().to_path_buf(), + precursor_reader, + blob_reader, } } + + fn get_offsets(parquet_file_name: impl AsRef) -> Vec { + ParquetPrecursor::from_parquet_file(&parquet_file_name) + .unwrap() + .iter() + .map(|x| x.offset as usize) + .collect() + } } impl SpectrumReaderTrait for MiniTDFSpectrumReader { fn get(&self, index: usize) -> Spectrum { - Spectrum::default() + let mut spectrum = Spectrum::default(); + spectrum.index = index; + let blob = self.blob_reader.get_blob(index).unwrap(); + if !blob.is_empty() { + let size: usize = blob.len(); + let spectrum_data: Vec = + (0..size).map(|i| blob.get(i)).collect(); + let scan_count: usize = blob.len() / 3; + let tof_indices_bytes: &[u32] = + &spectrum_data[..scan_count as usize * 2]; + let intensities_bytes: &[u32] = + &spectrum_data[scan_count as usize * 2..]; + let mz_values: &[f64] = + bytemuck::cast_slice::(tof_indices_bytes); + let intensity_values: &[f32] = + bytemuck::cast_slice::(intensities_bytes); + spectrum.intensities = + intensity_values.iter().map(|&x| x as f64).collect(); + spectrum.mz_values = mz_values.to_vec(); + } + let precursor = self.precursor_reader.get(index); + spectrum.precursor = precursor; + spectrum.index = precursor.index; + spectrum } fn len(&self) -> usize { - 0 //TODO + self.precursor_reader.len() } fn get_path(&self) -> PathBuf { self.path.clone() } } + +fn find_extension(path: impl AsRef, extension: &str) -> Option { + let extension_lower = extension.to_lowercase(); + for entry in fs::read_dir(&path).ok()? { + if let Ok(entry) = entry { + let file_path = entry.path(); + if let Some(file_name) = + file_path.file_name().and_then(|name| name.to_str()) + { + if file_name.to_lowercase().ends_with(&extension_lower) { + return Some(file_path); + } + } + } + } + None +} From 308b89149daaf97592be010e87d6317b5da26d5b Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 26 Jun 2024 15:32:03 +0200 Subject: [PATCH 075/109] FEAT: made extension finder more generic --- src/io/readers/spectrum_reader/minitdf.rs | 23 ++------------------- src/utils.rs | 25 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 2daa548..2643d73 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -1,7 +1,4 @@ -use std::{ - fs, - path::{Path, PathBuf}, -}; +use std::path::{Path, PathBuf}; use crate::{ io::readers::{ @@ -14,6 +11,7 @@ use crate::{ PrecursorReader, }, ms_data::Spectrum, + utils::find_extension, }; use super::SpectrumReaderTrait; @@ -86,20 +84,3 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { self.path.clone() } } - -fn find_extension(path: impl AsRef, extension: &str) -> Option { - let extension_lower = extension.to_lowercase(); - for entry in fs::read_dir(&path).ok()? { - if let Ok(entry) = entry { - let file_path = entry.path(); - if let Some(file_name) = - file_path.file_name().and_then(|name| name.to_str()) - { - if file_name.to_lowercase().ends_with(&extension_lower) { - return Some(file_path); - } - } - } - } - None -} diff --git a/src/utils.rs b/src/utils.rs index 9aebe98..7021ffd 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1 +1,26 @@ +use std::{ + fs, + path::{Path, PathBuf}, +}; + pub mod vec_utils; + +pub fn find_extension( + path: impl AsRef, + extension: &str, +) -> Option { + let extension_lower = extension.to_lowercase(); + for entry in fs::read_dir(&path).ok()? { + if let Ok(entry) = entry { + let file_path = entry.path(); + if let Some(file_name) = + file_path.file_name().and_then(|name| name.to_str()) + { + if file_name.to_lowercase().ends_with(&extension_lower) { + return Some(file_path); + } + } + } + } + None +} From 1558d0c3abf3cecd5a912fad9fd28bde3e9d5d7c Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 26 Jun 2024 17:03:10 +0200 Subject: [PATCH 076/109] FEAT: started on refactoring of TDFSpectrumReader --- src/file_readers.rs | 34 ++--- src/file_readers/dda_reader.rs | 166 ---------------------- src/io/readers/spectrum_reader.rs | 22 +-- src/io/readers/spectrum_reader/minitdf.rs | 16 +-- src/io/readers/spectrum_reader/tdf.rs | 143 ++++++++++++++++++- 5 files changed, 169 insertions(+), 212 deletions(-) delete mode 100644 src/file_readers/dda_reader.rs diff --git a/src/file_readers.rs b/src/file_readers.rs index 21341c6..78bb379 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -1,5 +1,3 @@ -mod dda_reader; - use std::{fs, path::PathBuf}; use crate::io::readers::file_readers::sql_reader::frames::SqlFrame; @@ -7,14 +5,12 @@ use crate::io::readers::SpectrumReader; use crate::{io::readers::FrameReader, Error}; use crate::ms_data::{Frame, Spectrum}; -use dda_reader::DDASpectrumReader; use rayon::iter::ParallelIterator; /// A reader to read [frames](crate::ms_data::Frame) and [spectra](crate::ms_data::Spectrum). pub struct FileReader { frame_reader: Option, - tdf_spectrum_reader: Option, - minitdf_spectrum_reader: Option, + spectrum_reader: Option, } ///NOTE: The function to read a single spectrum is not optimized. @@ -27,20 +23,18 @@ impl FileReader { FileFormat::DFolder(path) => Some(FrameReader::new(&path)), FileFormat::MS2Folder(_) => None, }; - let tdf_spectrum_reader = match &format { - FileFormat::DFolder(path) => Some(DDASpectrumReader::new( - path.to_str().unwrap_or_default().to_string(), - )), - FileFormat::MS2Folder(_) => None, - }; - let minitdf_spectrum_reader = match &format { - FileFormat::DFolder(_) => None, + let spectrum_reader = match &format { + FileFormat::DFolder(path) => { + let mut reader = SpectrumReader::new(path); + reader.calibrate(); + Some(reader) + }, FileFormat::MS2Folder(path) => Some(SpectrumReader::new(path)), }; + Ok(Self { frame_reader, - tdf_spectrum_reader, - minitdf_spectrum_reader, + spectrum_reader, }) } @@ -72,10 +66,7 @@ impl FileReader { } pub fn read_single_spectrum(&self, index: usize) -> Spectrum { - match &self.tdf_spectrum_reader { - Some(reader) => reader.read_single_spectrum(index), - None => self.minitdf_spectrum_reader.as_ref().unwrap().get(index), - } + self.spectrum_reader.as_ref().unwrap().get(index) } ///NOTE: ddaPASEF MS2 spectra are automatically calibrated with @@ -83,10 +74,7 @@ impl FileReader { /// Hence, reading spectra individually through `read_single_spectrum` /// might yield slightly different mz values. pub fn read_all_spectra(&self) -> Vec { - match &self.tdf_spectrum_reader { - Some(reader) => reader.read_all_spectra(), - None => self.minitdf_spectrum_reader.as_ref().unwrap().get_all(), - } + self.spectrum_reader.as_ref().unwrap().get_all() } } diff --git a/src/file_readers/dda_reader.rs b/src/file_readers/dda_reader.rs deleted file mode 100644 index 0d0db1b..0000000 --- a/src/file_readers/dda_reader.rs +++ /dev/null @@ -1,166 +0,0 @@ -use std::path::Path; - -use crate::{ - calibration::Tof2MzCalibrator, - domain_converters::Tof2MzConverter, - // file_readers::ReadableSpectra, - io::readers::{ - file_readers::sql_reader::{ - pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, - }, - FrameReader, MetadataReader, PrecursorReader, - }, - ms_data::{ - Frame, Precursor, RawProcessedSpectrumState, RawSpectrum, - RawSpectrumProcessor, Spectrum, - }, - utils::vec_utils::{argsort, group_and_sum}, -}; - -use rayon::prelude::*; - -// use self::precursors::PrecursorReader; - -const SMOOTHING_WINDOW: u32 = 1; -const CENTROIDING_WINDOW: u32 = 1; - -#[derive(Debug)] -pub struct DDASpectrumReader { - pub path_name: String, - precursor_reader: PrecursorReader, - mz_reader: Tof2MzConverter, - ms2_frames: Vec, - pasef_frames: Vec, - order: Vec, - offsets: Vec, -} - -impl DDASpectrumReader { - pub fn new(path_name: String) -> Self { - let frame_reader: FrameReader = FrameReader::new(&path_name); - let sql_path = Path::new(&path_name).join("analysis.tdf"); - let metadata = MetadataReader::new(&sql_path); - let mz_reader: Tof2MzConverter = metadata.mz_converter; - let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); - let pasef_frames = - SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); - let ms2_frames: Vec = - frame_reader.parallel_filter(|x| x.msms_type != 0).collect(); - let precursor_reader: PrecursorReader = PrecursorReader::new(&sql_path); - let pasef_precursors = - &pasef_frames.iter().map(|x| x.precursor).collect(); - let order: Vec = argsort(&pasef_precursors); - let mut offsets: Vec = - Vec::with_capacity(precursor_reader.len() + 1); - offsets.push(0); - for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { - let second_index: usize = order[offset + 1]; - if pasef_precursors[index] != pasef_precursors[second_index] { - offsets.push(offset + 1) - } - } - offsets.push(order.len()); - Self { - path_name, - precursor_reader, - mz_reader, - ms2_frames, - pasef_frames, - order, - offsets, - } - } - - pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { - let start: usize = self.offsets[index]; - let end: usize = self.offsets[index + 1]; - let selection: &[usize] = &self.order[start..end]; - let mut tof_indices: Vec = vec![]; - let mut intensities: Vec = vec![]; - for &index in selection.iter() { - let frame_index: usize = self.pasef_frames[index].frame - 1; - // TODO OPTIMIZE!!!!! - let frame: &Frame = &self - .ms2_frames - .iter() - .find(|&x| x.index == frame_index + 1) - .unwrap(); - if frame.intensities.len() == 0 { - continue; - } - let scan_start: usize = self.pasef_frames[index].scan_start; - let scan_end: usize = self.pasef_frames[index].scan_end; - let offset_start: usize = frame.scan_offsets[scan_start] as usize; - let offset_end: usize = frame.scan_offsets[scan_end] as usize; - let tof_selection: &[u32] = - &frame.tof_indices[offset_start..offset_end]; - let intensity_selection: &[u32] = - &frame.intensities[offset_start..offset_end]; - tof_indices.extend(tof_selection); - intensities.extend(intensity_selection); - } - let (raw_tof_indices, raw_intensities) = group_and_sum( - tof_indices, - intensities.iter().map(|x| *x as u64).collect(), - ); - let raw_spectrum = RawSpectrum { - tof_indices: raw_tof_indices, - intensities: raw_intensities, - processed_state: RawProcessedSpectrumState::Profile, - index: index, - }; - let spectrum_processer = RawSpectrumProcessor { raw_spectrum }; - spectrum_processer - .smooth(SMOOTHING_WINDOW) - .centroid(CENTROIDING_WINDOW) - .raw_spectrum - } - - pub fn process_single_raw_spectrum( - &self, - raw_spectrum: RawSpectrum, - mz_reader: &Tof2MzConverter, - ) -> Spectrum { - let index: usize = raw_spectrum.index as usize; - let spectrum_processer = RawSpectrumProcessor { raw_spectrum }; - let spectrum = spectrum_processer - .finalize(self.precursor_reader.get(index), mz_reader); - spectrum - } - - pub fn read_single_spectrum(&self, index: usize) -> Spectrum { - let raw_spectrum = self.read_single_raw_spectrum(index); - self.process_single_raw_spectrum(raw_spectrum, &self.mz_reader) - } - - pub fn read_all_spectra(&self) -> Vec { - let raw_spectra: Vec = (0..self.precursor_reader.len()) - .into_par_iter() - .map(|index| self.read_single_raw_spectrum(index)) - .collect(); - let precursors: Vec = (0..self.precursor_reader.len()) - .map(|index| self.precursor_reader.get(index)) - .collect(); - let hits = Tof2MzCalibrator::find_unfragmented_precursors( - &raw_spectra, - &self.mz_reader, - &precursors, - 0.1, - ); - let temp_mz_reader: Tof2MzConverter; - let mz_reader: &Tof2MzConverter; - if hits.len() >= 2 { - temp_mz_reader = Tof2MzConverter::from_pairs(&hits); - mz_reader = &temp_mz_reader; - } else { - mz_reader = &self.mz_reader - } - let spectra: Vec = raw_spectra - .into_par_iter() - .map(|spectrum| { - self.process_single_raw_spectrum(spectrum, &mz_reader) - }) - .collect(); - spectra - } -} diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index fe084ad..0ae9717 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -2,10 +2,9 @@ mod minitdf; mod tdf; use core::fmt; -use std::path::{Path, PathBuf}; - use minitdf::MiniTDFSpectrumReader; use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use std::path::{Path, PathBuf}; use tdf::TDFSpectrumReader; use crate::ms_data::Spectrum; @@ -25,7 +24,7 @@ impl SpectrumReader { let spectrum_reader: Box = match path.as_ref().extension().and_then(|e| e.to_str()) { Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)), - Some("tdf") => Box::new(TDFSpectrumReader::new(path)), + Some("d") => Box::new(TDFSpectrumReader::new(path)), _ => panic!(), }; Self { spectrum_reader } @@ -35,6 +34,14 @@ impl SpectrumReader { self.spectrum_reader.get(index) } + pub fn get_path(&self) -> PathBuf { + self.spectrum_reader.get_path() + } + + pub fn len(&self) -> usize { + self.spectrum_reader.len() + } + pub fn get_all(&self) -> Vec { let mut spectra: Vec = (0..self.len()) .into_par_iter() @@ -48,12 +55,8 @@ impl SpectrumReader { spectra } - pub fn get_path(&self) -> PathBuf { - self.spectrum_reader.get_path() - } - - pub fn len(&self) -> usize { - self.spectrum_reader.len() + pub fn calibrate(&mut self) { + self.spectrum_reader.calibrate(); } } @@ -61,4 +64,5 @@ trait SpectrumReaderTrait: Sync { fn get(&self, index: usize) -> Spectrum; fn get_path(&self) -> PathBuf; fn len(&self) -> usize; + fn calibrate(&mut self); } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 2643d73..f237972 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -28,7 +28,11 @@ impl MiniTDFSpectrumReader { let parquet_file_name = find_extension(&path, "ms2spectrum.parquet").unwrap(); let precursor_reader = PrecursorReader::new(&parquet_file_name); - let offsets = Self::get_offsets(&parquet_file_name); + let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name) + .unwrap() + .iter() + .map(|x| x.offset as usize) + .collect(); let bin_file_name = find_extension(&path, "bin").unwrap(); let blob_reader = IndexedTdfBlobReader::new(&bin_file_name, offsets).unwrap(); @@ -38,14 +42,6 @@ impl MiniTDFSpectrumReader { blob_reader, } } - - fn get_offsets(parquet_file_name: impl AsRef) -> Vec { - ParquetPrecursor::from_parquet_file(&parquet_file_name) - .unwrap() - .iter() - .map(|x| x.offset as usize) - .collect() - } } impl SpectrumReaderTrait for MiniTDFSpectrumReader { @@ -83,4 +79,6 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { fn get_path(&self) -> PathBuf { self.path.clone() } + + fn calibrate(&mut self) {} } diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index d3bb41f..0bf42e7 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -1,32 +1,165 @@ +use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; -use crate::ms_data::Spectrum; +use crate::{ + calibration::Tof2MzCalibrator, + domain_converters::Tof2MzConverter, + io::readers::{ + file_readers::sql_reader::{ + pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, + }, + FrameReader, MetadataReader, PrecursorReader, + }, + ms_data::{ + Frame, Precursor, RawProcessedSpectrumState, RawSpectrum, + RawSpectrumProcessor, Spectrum, + }, + utils::{ + find_extension, + vec_utils::{argsort, group_and_sum}, + }, +}; use super::SpectrumReaderTrait; +const SMOOTHING_WINDOW: u32 = 1; +const CENTROIDING_WINDOW: u32 = 1; + #[derive(Debug)] pub struct TDFSpectrumReader { path: PathBuf, + precursor_reader: PrecursorReader, + mz_reader: Tof2MzConverter, + ms2_frames: Vec, + pasef_frames: Vec, + order: Vec, + offsets: Vec, } impl TDFSpectrumReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new(path_name: impl AsRef) -> Self { + let frame_reader: FrameReader = FrameReader::new(&path_name); + let sql_path = find_extension(&path_name, "analysis.tdf").unwrap(); + let metadata = MetadataReader::new(&sql_path); + let mz_reader: Tof2MzConverter = metadata.mz_converter; + let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); + let pasef_frames = + SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); + let ms2_frames: Vec = + frame_reader.parallel_filter(|x| x.msms_type != 0).collect(); + let precursor_reader: PrecursorReader = PrecursorReader::new(&sql_path); + let pasef_precursors = + &pasef_frames.iter().map(|x| x.precursor).collect(); + let order: Vec = argsort(&pasef_precursors); + let mut offsets: Vec = + Vec::with_capacity(precursor_reader.len() + 1); + offsets.push(0); + for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { + let second_index: usize = order[offset + 1]; + if pasef_precursors[index] != pasef_precursors[second_index] { + offsets.push(offset + 1) + } + } + offsets.push(order.len()); Self { - path: path.as_ref().to_path_buf(), + path: path_name.as_ref().to_path_buf(), + precursor_reader, + mz_reader, + ms2_frames, + pasef_frames, + order, + offsets, + } + } + + pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { + let start: usize = self.offsets[index]; + let end: usize = self.offsets[index + 1]; + let selection: &[usize] = &self.order[start..end]; + let mut tof_indices: Vec = vec![]; + let mut intensities: Vec = vec![]; + for &index in selection.iter() { + let frame_index: usize = self.pasef_frames[index].frame - 1; + // TODO OPTIMIZE!!!!! + let frame: &Frame = &self + .ms2_frames + .iter() + .find(|&x| x.index == frame_index + 1) + .unwrap(); + if frame.intensities.len() == 0 { + continue; + } + let scan_start: usize = self.pasef_frames[index].scan_start; + let scan_end: usize = self.pasef_frames[index].scan_end; + let offset_start: usize = frame.scan_offsets[scan_start] as usize; + let offset_end: usize = frame.scan_offsets[scan_end] as usize; + let tof_selection: &[u32] = + &frame.tof_indices[offset_start..offset_end]; + let intensity_selection: &[u32] = + &frame.intensities[offset_start..offset_end]; + tof_indices.extend(tof_selection); + intensities.extend(intensity_selection); } + let (raw_tof_indices, raw_intensities) = group_and_sum( + tof_indices, + intensities.iter().map(|x| *x as u64).collect(), + ); + let raw_spectrum = RawSpectrum { + tof_indices: raw_tof_indices, + intensities: raw_intensities, + processed_state: RawProcessedSpectrumState::Profile, + index: index, + }; + let spectrum_processer = RawSpectrumProcessor { raw_spectrum }; + spectrum_processer + .smooth(SMOOTHING_WINDOW) + .centroid(CENTROIDING_WINDOW) + .raw_spectrum + } + + pub fn process_single_raw_spectrum( + &self, + raw_spectrum: RawSpectrum, + mz_reader: &Tof2MzConverter, + ) -> Spectrum { + let index: usize = raw_spectrum.index as usize; + let spectrum_processer = RawSpectrumProcessor { raw_spectrum }; + let spectrum = spectrum_processer + .finalize(self.precursor_reader.get(index), mz_reader); + spectrum } } impl SpectrumReaderTrait for TDFSpectrumReader { fn get(&self, index: usize) -> Spectrum { - Spectrum::default() + let raw_spectrum = self.read_single_raw_spectrum(index); + self.process_single_raw_spectrum(raw_spectrum, &self.mz_reader) } fn len(&self) -> usize { - 0 //TODO + self.precursor_reader.len() } fn get_path(&self) -> PathBuf { self.path.clone() } + + fn calibrate(&mut self) { + let raw_spectra: Vec = (0..self.precursor_reader.len()) + .into_par_iter() + .map(|index| self.read_single_raw_spectrum(index)) + .collect(); + let precursors: Vec = (0..self.precursor_reader.len()) + .map(|index| self.precursor_reader.get(index)) + .collect(); + let hits = Tof2MzCalibrator::find_unfragmented_precursors( + &raw_spectra, + &self.mz_reader, + &precursors, + 0.1, + ); + if hits.len() >= 2 { + self.mz_reader = Tof2MzConverter::from_pairs(&hits); + } + } } From 6266264bce7a2e9fcd9044cceabb37e9eed84a1e Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 26 Jun 2024 17:08:17 +0200 Subject: [PATCH 077/109] CHORE: unify path reading functions --- src/io/readers/frame_reader.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 119a485..1ebcea4 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -2,7 +2,10 @@ use std::path::{Path, PathBuf}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; -use crate::ms_data::{AcquisitionType, Frame, MSLevel}; +use crate::{ + ms_data::{AcquisitionType, Frame, MSLevel}, + utils::find_extension, +}; use super::file_readers::{ sql_reader::{frames::SqlFrame, ReadableSqlTable, SqlReader}, @@ -19,10 +22,10 @@ pub struct FrameReader { impl FrameReader { pub fn new(path: impl AsRef) -> Self { - let sql_path = path.as_ref().join("analysis.tdf"); + let sql_path = find_extension(&path, "analysis.tdf").unwrap(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); let sql_frames = SqlFrame::from_sql_reader(&tdf_sql_reader).unwrap(); - let bin_path = path.as_ref().join("analysis.tdf_bin"); + let bin_path = find_extension(&path, "analysis.tdf_bin").unwrap(); let tdf_bin_reader: TdfBlobReader = TdfBlobReader::new(bin_path).unwrap(); let acquisition = if sql_frames.iter().any(|x| x.msms_type == 8) { From d8114ad8da325aa120bfa2e2fb04b5489bbd4b49 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 1 Jul 2024 11:11:11 +0200 Subject: [PATCH 078/109] FIX: properly process empty frames --- src/io/readers/frame_reader.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 1ebcea4..3516d95 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -56,10 +56,11 @@ impl FrameReader { pub fn get(&self, index: usize) -> Frame { let mut frame: Frame = Frame::default(); let sql_frame = &self.sql_frames[index]; - let blob = self - .tdf_bin_reader - .get_blob(sql_frame.binary_offset) - .unwrap(); + frame.index = sql_frame.id; + let blob = match self.tdf_bin_reader.get_blob(sql_frame.binary_offset) { + Ok(blob) => blob, + Err(_) => return frame, + }; let scan_count: usize = blob.get(0) as usize; let peak_count: usize = (blob.len() - scan_count) / 2; frame.scan_offsets = read_scan_offsets(scan_count, peak_count, &blob); @@ -71,7 +72,6 @@ impl FrameReader { &frame.scan_offsets, ); frame.ms_level = MSLevel::read_from_msms_type(sql_frame.msms_type); - frame.index = sql_frame.id; frame.rt = sql_frame.rt; frame.acquisition_type = self.acquisition; frame.intensity_correction_factor = 1.0 / sql_frame.accumulation_time; From da15c25edc6adfc393b8aa01c1e37b17525fffb6 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 11:29:06 +0200 Subject: [PATCH 079/109] CHORE: simplification of vec utils filter with mask --- src/utils/vec_utils.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/utils/vec_utils.rs b/src/utils/vec_utils.rs index e6e894e..724fc3c 100644 --- a/src/utils/vec_utils.rs +++ b/src/utils/vec_utils.rs @@ -61,9 +61,8 @@ pub fn find_sparse_local_maxima_mask( } pub fn filter_with_mask(vec: &Vec, mask: &Vec) -> Vec { - vec.iter() - .zip(mask.iter()) - .filter(|(_, y_elem)| **y_elem) - .map(|(&x_elem, _)| x_elem) + (0..vec.len()) + .filter(|&x| mask[x]) + .map(|x| vec[x]) .collect() } From 332e9aa5e4fbc22fbfc9596247fac99ad25c4c52 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 11:29:48 +0200 Subject: [PATCH 080/109] FEAT: added reading of frame groups and quad settings to sql reader --- src/io/readers/file_readers/sql_reader.rs | 2 ++ .../file_readers/sql_reader/frame_groups.rs | 20 +++++++++++++ .../file_readers/sql_reader/quad_settings.rs | 28 +++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 src/io/readers/file_readers/sql_reader/frame_groups.rs create mode 100644 src/io/readers/file_readers/sql_reader/quad_settings.rs diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index 76bdb5b..61cffc6 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -1,7 +1,9 @@ +pub mod frame_groups; pub mod frames; pub mod metadata; pub mod pasef_frame_msms; pub mod precursors; +pub mod quad_settings; use std::{collections::HashMap, path::Path}; diff --git a/src/io/readers/file_readers/sql_reader/frame_groups.rs b/src/io/readers/file_readers/sql_reader/frame_groups.rs new file mode 100644 index 0000000..a46e72e --- /dev/null +++ b/src/io/readers/file_readers/sql_reader/frame_groups.rs @@ -0,0 +1,20 @@ +use super::ReadableSqlTable; + +#[derive(Debug, PartialEq)] +pub struct SqlWindowGroup { + pub frame: usize, + pub window_group: u8, +} + +impl ReadableSqlTable for SqlWindowGroup { + fn get_sql_query() -> String { + "SELECT Frame, WindowGroup FROM DiaFrameMsMsInfo".to_string() + } + + fn from_sql_row(row: &rusqlite::Row) -> Self { + Self { + frame: row.get(0).unwrap_or_default(), + window_group: row.get(1).unwrap_or_default(), + } + } +} diff --git a/src/io/readers/file_readers/sql_reader/quad_settings.rs b/src/io/readers/file_readers/sql_reader/quad_settings.rs new file mode 100644 index 0000000..d7d69b4 --- /dev/null +++ b/src/io/readers/file_readers/sql_reader/quad_settings.rs @@ -0,0 +1,28 @@ +use super::ReadableSqlTable; + +#[derive(Debug, PartialEq)] +pub struct SqlQuadSettings { + pub window_group: usize, + pub scan_start: usize, + pub scan_end: usize, + pub mz_center: f64, + pub mz_width: f64, + pub collision_energy: f64, +} + +impl ReadableSqlTable for SqlQuadSettings { + fn get_sql_query() -> String { + "SELECT WindowGroup, ScanNumBegin, ScanNumEnd, IsolationMz, IsolationWidth, CollisionEnergy FROM DiaFrameMsMsWindows".to_string() + } + + fn from_sql_row(row: &rusqlite::Row) -> Self { + Self { + window_group: row.get(0).unwrap_or_default(), + scan_start: row.get(1).unwrap_or_default(), + scan_end: row.get(2).unwrap_or_default(), + mz_center: row.get(3).unwrap_or_default(), + mz_width: row.get(4).unwrap_or_default(), + collision_energy: row.get(5).unwrap_or_default(), + } + } +} From cd47e8a5c86e384c1407bbe8342076fa829a27de Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 11:30:16 +0200 Subject: [PATCH 081/109] CHORE: udated quad settings types --- src/ms_data/quadrupole.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ms_data/quadrupole.rs b/src/ms_data/quadrupole.rs index 84e1ced..19c8d94 100644 --- a/src/ms_data/quadrupole.rs +++ b/src/ms_data/quadrupole.rs @@ -1,10 +1,10 @@ /// The quadrupole settings used for fragmentation. #[derive(Debug, Default, Clone, PartialEq)] pub struct QuadrupoleSettings { - is_used: bool, - scan_starts: Vec, - scan_ends: Vec, - isolation_mz: Vec, - isolation_width: Vec, - collision_energy: Vec, + pub index: usize, + pub scan_starts: Vec, + pub scan_ends: Vec, + pub isolation_mz: Vec, + pub isolation_width: Vec, + pub collision_energy: Vec, } From f300bb4085fedcc4ce59f2ce5b6bf973d2449290 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 11:31:04 +0200 Subject: [PATCH 082/109] FEAT: added frame_group to frames --- src/ms_data/frames.rs | 1 + tests/frame_readers.rs | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/src/ms_data/frames.rs b/src/ms_data/frames.rs index be2f2e2..86454f1 100644 --- a/src/ms_data/frames.rs +++ b/src/ms_data/frames.rs @@ -13,6 +13,7 @@ pub struct Frame { pub ms_level: MSLevel, pub quadrupole_settings: Arc, pub intensity_correction_factor: f64, + pub window_group: u8, } /// The MS level used. diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 28bcff8..8804a32 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -31,6 +31,7 @@ fn tdf_reader_frames1() { quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, intensity_correction_factor: 1.0 / 100.0, + window_group: 0, }, // Frame::default(), Frame { @@ -43,6 +44,7 @@ fn tdf_reader_frames1() { quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, intensity_correction_factor: 1.0 / 100.0, + window_group: 0, }, // Frame::default(), ]; @@ -73,6 +75,7 @@ fn tdf_reader_frames2() { quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, intensity_correction_factor: 1.0 / 100.0, + window_group: 0, }, // Frame::default(), Frame { @@ -85,9 +88,12 @@ fn tdf_reader_frames2() { quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, intensity_correction_factor: 1.0 / 100.0, + window_group: 0, }, ]; for i in 0..expected.len() { assert_eq!(&frames[i], &expected[i]) } } + +// TODO test for DIA From 4bb2ce60ffe23357d45c242a0bc20ebea5eaa522 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 11:31:33 +0200 Subject: [PATCH 083/109] PERF: removed inline from tdf blobs to let compiler decide --- src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs b/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs index e661cd0..b75d494 100644 --- a/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs +++ b/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs @@ -10,7 +10,6 @@ impl TdfBlob { Self { bytes } } - #[inline(always)] pub fn get(&self, index: usize) -> u32 { debug_assert!(index < self.len()); Self::concatenate_bytes( @@ -21,7 +20,6 @@ impl TdfBlob { ) } - #[inline(always)] fn concatenate_bytes(b1: u8, b2: u8, b3: u8, b4: u8) -> u32 { b1 as u32 | ((b2 as u32) << 8) From 7d7e40d6a03584f4422c1e26c45eb8d1c3b9db5d Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 11:32:46 +0200 Subject: [PATCH 084/109] DOCS: Updated file readerd docs and removed always calibrate option --- src/file_readers.rs | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/file_readers.rs b/src/file_readers.rs index 78bb379..420fe6e 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -13,9 +13,6 @@ pub struct FileReader { spectrum_reader: Option, } -///NOTE: The function to read a single spectrum is not optimized. -/// In case many spectra are required, it is best to use -/// any of the functions that directly return a `Vec`. impl FileReader { pub fn new>(path_name: T) -> Result { let format: FileFormat = FileFormat::parse(path_name)?; @@ -25,13 +22,12 @@ impl FileReader { }; let spectrum_reader = match &format { FileFormat::DFolder(path) => { - let mut reader = SpectrumReader::new(path); - reader.calibrate(); + let reader = SpectrumReader::new(path); + // reader.calibrate(); Some(reader) }, FileFormat::MS2Folder(path) => Some(SpectrumReader::new(path)), }; - Ok(Self { frame_reader, spectrum_reader, @@ -69,10 +65,6 @@ impl FileReader { self.spectrum_reader.as_ref().unwrap().get(index) } - ///NOTE: ddaPASEF MS2 spectra are automatically calibrated with - /// all unfragmented precursor signals. - /// Hence, reading spectra individually through `read_single_spectrum` - /// might yield slightly different mz values. pub fn read_all_spectra(&self) -> Vec { self.spectrum_reader.as_ref().unwrap().get_all() } From a0e6427406931f68c2bc2b27b8d2b7a97bd07a09 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 11:33:42 +0200 Subject: [PATCH 085/109] FEAT: Added quad setting (dia) data to frame reader --- src/io/readers/frame_reader.rs | 93 ++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 4 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 3516d95..5da8917 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -1,14 +1,21 @@ -use std::path::{Path, PathBuf}; +use std::{ + path::{Path, PathBuf}, + sync::Arc, + vec, +}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use crate::{ - ms_data::{AcquisitionType, Frame, MSLevel}, - utils::find_extension, + ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, + utils::{find_extension, vec_utils::argsort}, }; use super::file_readers::{ - sql_reader::{frames::SqlFrame, ReadableSqlTable, SqlReader}, + sql_reader::{ + frame_groups::SqlWindowGroup, frames::SqlFrame, + quad_settings::SqlQuadSettings, ReadableSqlTable, SqlReader, + }, tdf_blob_reader::{TdfBlob, TdfBlobReader}, }; @@ -18,6 +25,8 @@ pub struct FrameReader { tdf_bin_reader: TdfBlobReader, sql_frames: Vec, acquisition: AcquisitionType, + window_groups: Vec, + quadrupole_settings: Vec>, } impl FrameReader { @@ -35,11 +44,78 @@ impl FrameReader { } else { AcquisitionType::Unknown }; + let mut window_groups = vec![0; sql_frames.len()]; + let mut quadrupole_settings: Vec; + if acquisition == AcquisitionType::DIAPASEF { + for window_group in + SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap() + { + window_groups[window_group.frame - 1] = + window_group.window_group; + } + let sql_quadrupole_settings = + SqlQuadSettings::from_sql_reader(&tdf_sql_reader).unwrap(); + let window_group_count = + *window_groups.iter().max().unwrap() as usize; + quadrupole_settings = (0..window_group_count) + .map(|window_group| { + let mut quad = QuadrupoleSettings::default(); + quad.index = window_group + 1; + quad + }) + .collect(); + for window_group in sql_quadrupole_settings { + let group = window_group.window_group - 1; + quadrupole_settings[group] + .scan_starts + .push(window_group.scan_start); + quadrupole_settings[group] + .scan_ends + .push(window_group.scan_end); + quadrupole_settings[group] + .collision_energy + .push(window_group.collision_energy); + quadrupole_settings[group] + .isolation_mz + .push(window_group.mz_center); + quadrupole_settings[group] + .isolation_width + .push(window_group.mz_width); + } + quadrupole_settings = quadrupole_settings + .into_iter() + .map(|mut window| { + let order = argsort(&window.scan_starts); + window.isolation_mz = + order.iter().map(|&i| window.isolation_mz[i]).collect(); + window.isolation_width = order + .iter() + .map(|&i| window.isolation_width[i]) + .collect(); + window.collision_energy = order + .iter() + .map(|&i| window.collision_energy[i]) + .collect(); + window.scan_starts = + order.iter().map(|&i| window.scan_starts[i]).collect(); + window.scan_ends = + order.iter().map(|&i| window.scan_ends[i]).collect(); + window + }) + .collect(); + } else { + quadrupole_settings = vec![]; + } Self { path: path.as_ref().to_path_buf(), tdf_bin_reader, sql_frames, acquisition, + window_groups, + quadrupole_settings: quadrupole_settings + .into_iter() + .map(|x| Arc::new(x)) + .collect(), } } @@ -75,6 +151,15 @@ impl FrameReader { frame.rt = sql_frame.rt; frame.acquisition_type = self.acquisition; frame.intensity_correction_factor = 1.0 / sql_frame.accumulation_time; + // TODO: implement intensity reader + if (self.acquisition == AcquisitionType::DIAPASEF) + & (frame.ms_level == MSLevel::MS2) + { + let window_group = self.window_groups[index]; + frame.window_group = window_group; + frame.quadrupole_settings = + self.quadrupole_settings[window_group as usize - 1].clone(); + } frame } From 9371c8efe118d31266ce34ef5a057421dac3592c Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 11:34:47 +0200 Subject: [PATCH 086/109] FEAT: reformatted spectrum reader (still not done) --- src/calibration.rs | 28 ---- src/io/readers/spectrum_reader/tdf.rs | 222 +++++++++++++++++--------- src/lib.rs | 1 - src/ms_data/spectra.rs | 107 ------------- 4 files changed, 148 insertions(+), 210 deletions(-) delete mode 100644 src/calibration.rs diff --git a/src/calibration.rs b/src/calibration.rs deleted file mode 100644 index 2e08ad2..0000000 --- a/src/calibration.rs +++ /dev/null @@ -1,28 +0,0 @@ -use crate::{ - domain_converters::{ConvertableDomain, Tof2MzConverter}, - ms_data::{Precursor, RawSpectrum}, -}; - -pub struct Tof2MzCalibrator; - -impl Tof2MzCalibrator { - pub fn find_unfragmented_precursors( - spectra: &Vec, - mz_reader: &Tof2MzConverter, - precursors: &Vec, - tolerance: f64, - ) -> Vec<(f64, u32)> { - let mut hits: Vec<(f64, u32)> = vec![]; - for (index, spectrum) in spectra.iter().enumerate() { - let precursor_mz: f64 = precursors[index].mz; - for &tof_index in spectrum.tof_indices.iter() { - let mz = mz_reader.convert(tof_index); - if (mz - precursor_mz).abs() < tolerance { - let hit = (precursor_mz, tof_index); - hits.push(hit); - } - } - } - hits - } -} diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 0bf42e7..a8c8934 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -2,21 +2,20 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; use crate::{ - calibration::Tof2MzCalibrator, - domain_converters::Tof2MzConverter, + domain_converters::{ConvertableDomain, Tof2MzConverter}, io::readers::{ file_readers::sql_reader::{ pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, }, FrameReader, MetadataReader, PrecursorReader, }, - ms_data::{ - Frame, Precursor, RawProcessedSpectrumState, RawSpectrum, - RawSpectrumProcessor, Spectrum, - }, + ms_data::{Precursor, Spectrum}, utils::{ find_extension, - vec_utils::{argsort, group_and_sum}, + vec_utils::{ + argsort, filter_with_mask, find_sparse_local_maxima_mask, + group_and_sum, + }, }, }; @@ -24,16 +23,15 @@ use super::SpectrumReaderTrait; const SMOOTHING_WINDOW: u32 = 1; const CENTROIDING_WINDOW: u32 = 1; +const CALIBRATION_TOLERANCE: f64 = 0.1; #[derive(Debug)] pub struct TDFSpectrumReader { path: PathBuf, precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, - ms2_frames: Vec, - pasef_frames: Vec, - order: Vec, - offsets: Vec, + frame_reader: FrameReader, + spectrum_frame_index_reader: SpectrumFrameIndexReader, } impl TDFSpectrumReader { @@ -43,54 +41,32 @@ impl TDFSpectrumReader { let metadata = MetadataReader::new(&sql_path); let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); - let pasef_frames = - SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); - let ms2_frames: Vec = - frame_reader.parallel_filter(|x| x.msms_type != 0).collect(); let precursor_reader: PrecursorReader = PrecursorReader::new(&sql_path); - let pasef_precursors = - &pasef_frames.iter().map(|x| x.precursor).collect(); - let order: Vec = argsort(&pasef_precursors); - let mut offsets: Vec = - Vec::with_capacity(precursor_reader.len() + 1); - offsets.push(0); - for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { - let second_index: usize = order[offset + 1]; - if pasef_precursors[index] != pasef_precursors[second_index] { - offsets.push(offset + 1) - } - } - offsets.push(order.len()); + let spectrum_frame_index_reader = + SpectrumFrameIndexReader::new(&tdf_sql_reader); Self { path: path_name.as_ref().to_path_buf(), precursor_reader, mz_reader, - ms2_frames, - pasef_frames, - order, - offsets, + frame_reader, + spectrum_frame_index_reader, } } pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { - let start: usize = self.offsets[index]; - let end: usize = self.offsets[index + 1]; - let selection: &[usize] = &self.order[start..end]; let mut tof_indices: Vec = vec![]; let mut intensities: Vec = vec![]; - for &index in selection.iter() { - let frame_index: usize = self.pasef_frames[index].frame - 1; - // TODO OPTIMIZE!!!!! - let frame: &Frame = &self - .ms2_frames - .iter() - .find(|&x| x.index == frame_index + 1) - .unwrap(); + for pasef_frame in self + .spectrum_frame_index_reader + .iterate_over_pasef_frames(index) + { + let frame_index: usize = pasef_frame.frame - 1; + let frame = self.frame_reader.get(frame_index); if frame.intensities.len() == 0 { continue; } - let scan_start: usize = self.pasef_frames[index].scan_start; - let scan_end: usize = self.pasef_frames[index].scan_end; + let scan_start: usize = pasef_frame.scan_start; + let scan_end: usize = pasef_frame.scan_end; let offset_start: usize = frame.scan_offsets[scan_start] as usize; let offset_end: usize = frame.scan_offsets[scan_end] as usize; let tof_selection: &[u32] = @@ -107,33 +83,20 @@ impl TDFSpectrumReader { let raw_spectrum = RawSpectrum { tof_indices: raw_tof_indices, intensities: raw_intensities, - processed_state: RawProcessedSpectrumState::Profile, index: index, }; - let spectrum_processer = RawSpectrumProcessor { raw_spectrum }; - spectrum_processer + raw_spectrum .smooth(SMOOTHING_WINDOW) .centroid(CENTROIDING_WINDOW) - .raw_spectrum - } - - pub fn process_single_raw_spectrum( - &self, - raw_spectrum: RawSpectrum, - mz_reader: &Tof2MzConverter, - ) -> Spectrum { - let index: usize = raw_spectrum.index as usize; - let spectrum_processer = RawSpectrumProcessor { raw_spectrum }; - let spectrum = spectrum_processer - .finalize(self.precursor_reader.get(index), mz_reader); - spectrum } } impl SpectrumReaderTrait for TDFSpectrumReader { fn get(&self, index: usize) -> Spectrum { let raw_spectrum = self.read_single_raw_spectrum(index); - self.process_single_raw_spectrum(raw_spectrum, &self.mz_reader) + let spectrum = raw_spectrum + .finalize(self.precursor_reader.get(index), &self.mz_reader); + spectrum } fn len(&self) -> usize { @@ -145,21 +108,132 @@ impl SpectrumReaderTrait for TDFSpectrumReader { } fn calibrate(&mut self) { - let raw_spectra: Vec = (0..self.precursor_reader.len()) + let hits: Vec<(f64, u32)> = (0..self.precursor_reader.len()) .into_par_iter() - .map(|index| self.read_single_raw_spectrum(index)) - .collect(); - let precursors: Vec = (0..self.precursor_reader.len()) - .map(|index| self.precursor_reader.get(index)) - .collect(); - let hits = Tof2MzCalibrator::find_unfragmented_precursors( - &raw_spectra, - &self.mz_reader, - &precursors, - 0.1, - ); + .map(|index| { + let spectrum = self.read_single_raw_spectrum(index); + let precursor = self.precursor_reader.get(index); + let precursor_mz: f64 = precursor.mz; + let mut result: Vec<(f64, u32)> = vec![]; + for &tof_index in spectrum.tof_indices.iter() { + let mz = self.mz_reader.convert(tof_index); + if (mz - precursor_mz).abs() < CALIBRATION_TOLERANCE { + let hit = (precursor_mz, tof_index); + result.push(hit); + } + } + result + }) + .reduce(Vec::new, |mut acc, mut vec| { + acc.append(&mut vec); // Concatenate vectors + acc + }); if hits.len() >= 2 { self.mz_reader = Tof2MzConverter::from_pairs(&hits); } } } + +#[derive(Debug)] +struct SpectrumFrameIndexReader { + order: Vec, + offsets: Vec, + pasef_frames: Vec, +} + +impl SpectrumFrameIndexReader { + fn new(tdf_sql_reader: &SqlReader) -> Self { + let pasef_frames = + SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); + let pasef_precursors = + &pasef_frames.iter().map(|x| x.precursor).collect(); + let order: Vec = argsort(&pasef_precursors); + let max_precursor = pasef_precursors.iter().max().unwrap(); + let mut offsets: Vec = Vec::with_capacity(max_precursor + 1); + offsets.push(0); + for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { + let second_index: usize = order[offset + 1]; + if pasef_precursors[index] != pasef_precursors[second_index] { + offsets.push(offset + 1) + } + } + offsets.push(order.len()); + Self { + order, + offsets, + pasef_frames, + } + } + + fn iterate_over_pasef_frames( + &self, + index: usize, + ) -> impl Iterator { + let start: usize = self.offsets[index]; + let end: usize = self.offsets[index + 1]; + self.order[start..end] + .iter() + .map(|&x| &self.pasef_frames[x]) + } +} + +#[derive(Debug, PartialEq, Default, Clone)] +pub(crate) struct RawSpectrum { + pub tof_indices: Vec, + pub intensities: Vec, + pub index: usize, +} + +impl RawSpectrum { + pub fn smooth(mut self, window: u32) -> Self { + let mut smooth_intensities: Vec = self.intensities.clone(); + for (current_index, current_tof) in self.tof_indices.iter().enumerate() + { + let current_intensity: u64 = self.intensities[current_index]; + for (_next_index, next_tof) in + self.tof_indices[current_index + 1..].iter().enumerate() + { + let next_index: usize = _next_index + current_index + 1; + let next_intensity: u64 = self.intensities[next_index]; + if (next_tof - current_tof) <= window { + smooth_intensities[current_index] += next_intensity; + smooth_intensities[next_index] += current_intensity; + } else { + break; + } + } + } + self.intensities = smooth_intensities; + self + } + + pub fn centroid(mut self, window: u32) -> Self { + let local_maxima: Vec = find_sparse_local_maxima_mask( + &self.tof_indices, + &self.intensities, + window, + ); + self.tof_indices = filter_with_mask(&self.tof_indices, &local_maxima); + self.intensities = filter_with_mask(&self.intensities, &local_maxima); + self + } + + pub fn finalize( + &self, + precursor: Precursor, + mz_reader: &Tof2MzConverter, + ) -> Spectrum { + let index = self.index; + let spectrum: Spectrum = Spectrum { + mz_values: self + .tof_indices + .iter() + .map(|&x| mz_reader.convert(x)) + .collect(), + intensities: self.intensities.iter().map(|x| *x as f64).collect(), + precursor: precursor, + index: index, + }; + spectrum + } +} diff --git a/src/lib.rs b/src/lib.rs index fafc8a4..085bee8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,7 +21,6 @@ //! * *.ms2spectrum.bin //! * *.ms2spectrum.parquet -mod calibration; pub mod domain_converters; mod errors; mod file_readers; diff --git a/src/ms_data/spectra.rs b/src/ms_data/spectra.rs index 7828f01..90d6e0a 100644 --- a/src/ms_data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -1,112 +1,5 @@ -use crate::{ - domain_converters::{ConvertableDomain, Tof2MzConverter}, - utils::vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, -}; - use super::Precursor; -pub(crate) struct RawSpectrumProcessor { - pub raw_spectrum: RawSpectrum, -} - -impl RawSpectrumProcessor { - pub fn smooth(mut self, window: u32) -> Self { - let mut smooth_intensities: Vec = - self.raw_spectrum.intensities.clone(); - for (current_index, current_tof) in - self.raw_spectrum.tof_indices.iter().enumerate() - { - let current_intensity: u64 = - self.raw_spectrum.intensities[current_index]; - for (_next_index, next_tof) in self.raw_spectrum.tof_indices - [current_index + 1..] - .iter() - .enumerate() - { - let next_index: usize = _next_index + current_index + 1; - let next_intensity: u64 = - self.raw_spectrum.intensities[next_index]; - if (next_tof - current_tof) <= window { - smooth_intensities[current_index] += next_intensity; - smooth_intensities[next_index] += current_intensity; - } else { - break; - } - } - } - self.raw_spectrum.intensities = smooth_intensities; - self.raw_spectrum.processed_state = - RawProcessedSpectrumState::SmoothedProfile; - self - } - - pub fn centroid(mut self, window: u32) -> Self { - let local_maxima: Vec = self.find_local_maxima(window); - self.raw_spectrum.tof_indices = - filter_with_mask(&self.raw_spectrum.tof_indices, &local_maxima); - self.raw_spectrum.intensities = - filter_with_mask(&self.raw_spectrum.intensities, &local_maxima); - self.raw_spectrum.processed_state = - RawProcessedSpectrumState::Centroided; - self - } - - fn find_local_maxima(&self, window: u32) -> Vec { - find_sparse_local_maxima_mask( - &self.raw_spectrum.tof_indices, - &self.raw_spectrum.intensities, - window, - ) - } - - pub fn finalize( - &self, - precursor: Precursor, - mz_reader: &Tof2MzConverter, - ) -> Spectrum { - let index = self.raw_spectrum.index; - let spectrum: Spectrum = Spectrum { - mz_values: self - .raw_spectrum - .tof_indices - .iter() - .map(|&x| mz_reader.convert(x)) - .collect(), - intensities: self - .raw_spectrum - .intensities - .iter() - .map(|x| *x as f64) - .collect(), - precursor: precursor, - index: index, - }; - spectrum - } -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) enum RawProcessedSpectrumState { - Profile, - SmoothedProfile, - Centroided, - Unprocessed, -} - -impl Default for RawProcessedSpectrumState { - fn default() -> Self { - Self::Unprocessed - } -} - -#[derive(Debug, PartialEq, Default, Clone)] -pub(crate) struct RawSpectrum { - pub tof_indices: Vec, - pub intensities: Vec, - pub processed_state: RawProcessedSpectrumState, - pub index: usize, -} - /// An MS2 spectrum with centroided mz values and summed intensities. #[derive(Debug, PartialEq, Default)] pub struct Spectrum { From 668d74ebc4ee3b465f43e48af7524dec328dc18f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 11:46:35 +0200 Subject: [PATCH 087/109] FIX: read im values from file rather than hardcoded --- src/io/readers/metadata_reader.rs | 11 +++++++++-- tests/spectrum_readers.rs | 6 +++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/io/readers/metadata_reader.rs b/src/io/readers/metadata_reader.rs index fdc0d6a..0717646 100644 --- a/src/io/readers/metadata_reader.rs +++ b/src/io/readers/metadata_reader.rs @@ -22,7 +22,7 @@ impl MetadataReader { Metadata { path: path.as_ref().to_path_buf(), rt_converter: get_rt_converter(&tdf_sql_reader), - im_converter: get_im_converter(&sql_metadata), + im_converter: get_im_converter(&sql_metadata, &tdf_sql_reader), mz_converter: get_mz_converter(&sql_metadata), } } @@ -61,8 +61,15 @@ fn get_mz_converter(sql_metadata: &HashMap) -> Tof2MzConverter { fn get_im_converter( sql_metadata: &HashMap, + tdf_sql_reader: &SqlReader, ) -> Scan2ImConverter { - let scan_max_index: u32 = 927; //TODO + let scan_counts: Vec = tdf_sql_reader + .read_column_from_table("NumScans", "Frames") + .unwrap(); + let scan_max_index = *scan_counts.iter().max().unwrap(); + println!("{:?}", scan_counts); + println!("{}", scan_max_index); + // let scan_max_index = 927; let im_min: f64 = sql_metadata .get("OneOverK0AcqRangeLower") .unwrap() diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index fe149c2..2a9501f 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -74,7 +74,7 @@ fn tdf_reader_dda() { precursor: Precursor { mz: 500.0, rt: 0.2, - im: 1.4989212513484358, + im: 1.25, charge: 2, intensity: 10.0, index: 1, @@ -89,7 +89,7 @@ fn tdf_reader_dda() { precursor: Precursor { mz: 501.0, rt: 0.2, - im: 1.4978425026968716, + im: 1.0, charge: 3, intensity: 10.0, index: 2, @@ -104,7 +104,7 @@ fn tdf_reader_dda() { precursor: Precursor { mz: 502.0, rt: 0.4, - im: 1.4989212513484358, + im: 1.25, charge: 2, intensity: 10.0, index: 3, From 0ddd915b4f78cac6c8703d8ce799ff168e88dc64 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 13:09:02 +0200 Subject: [PATCH 088/109] FIX: remove print from metadata --- src/io/readers/metadata_reader.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/io/readers/metadata_reader.rs b/src/io/readers/metadata_reader.rs index 0717646..81c8e11 100644 --- a/src/io/readers/metadata_reader.rs +++ b/src/io/readers/metadata_reader.rs @@ -67,8 +67,6 @@ fn get_im_converter( .read_column_from_table("NumScans", "Frames") .unwrap(); let scan_max_index = *scan_counts.iter().max().unwrap(); - println!("{:?}", scan_counts); - println!("{}", scan_max_index); // let scan_max_index = 927; let im_min: f64 = sql_metadata .get("OneOverK0AcqRangeLower") From bdb7eedd44eba1a9f65e2ea4a028c8896b29c132 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 13:26:19 +0200 Subject: [PATCH 089/109] FEAT: added CE to spectrum rather than precursor --- src/io/readers/precursor_reader/minitdf.rs | 1 - src/io/readers/precursor_reader/tdf.rs | 8 +------- src/io/readers/spectrum_reader/minitdf.rs | 9 +++++++++ src/io/readers/spectrum_reader/tdf.rs | 5 +++++ src/io/writers/mgf.rs | 2 +- src/ms_data/precursors.rs | 1 - src/ms_data/spectra.rs | 1 + tests/spectrum_readers.rs | 10 +++++----- 8 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/io/readers/precursor_reader/minitdf.rs b/src/io/readers/precursor_reader/minitdf.rs index c5e1728..f9ef67f 100644 --- a/src/io/readers/precursor_reader/minitdf.rs +++ b/src/io/readers/precursor_reader/minitdf.rs @@ -37,7 +37,6 @@ impl PrecursorReaderTrait for MiniTDFPrecursorReader { intensity: x.intensity, index: x.index, frame_index: x.frame_index, - collision_energy: x.collision_energy, } } diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 95b2417..964d466 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -52,14 +52,8 @@ impl PrecursorReaderTrait for TDFPrecursorReader { precursor.im = self.im_converter.convert(scan_id); precursor.charge = sql_precursor.charge; precursor.intensity = sql_precursor.intensity; - precursor.index = index + 1; //TODO; + precursor.index = index + 1; precursor.frame_index = frame_id; - // TODO OPTIMIZE!!!!! - // precursor.collision_energy = pasef_frames - // .iter() - // .find(|&x| x.precursor == index + 1) - // .unwrap() - // .collision_energy; precursor } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index f237972..dee7691 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -21,6 +21,7 @@ pub struct MiniTDFSpectrumReader { path: PathBuf, precursor_reader: PrecursorReader, blob_reader: IndexedTdfBlobReader, + collision_energies: Vec, } impl MiniTDFSpectrumReader { @@ -33,6 +34,12 @@ impl MiniTDFSpectrumReader { .iter() .map(|x| x.offset as usize) .collect(); + let collision_energies = + ParquetPrecursor::from_parquet_file(&parquet_file_name) + .unwrap() + .iter() + .map(|x| x.collision_energy) + .collect(); let bin_file_name = find_extension(&path, "bin").unwrap(); let blob_reader = IndexedTdfBlobReader::new(&bin_file_name, offsets).unwrap(); @@ -40,6 +47,7 @@ impl MiniTDFSpectrumReader { path: path.as_ref().to_path_buf(), precursor_reader, blob_reader, + collision_energies, } } } @@ -69,6 +77,7 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { let precursor = self.precursor_reader.get(index); spectrum.precursor = precursor; spectrum.index = precursor.index; + spectrum.collision_energy = self.collision_energies[index]; spectrum } diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index a8c8934..a6f6b02 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -56,10 +56,12 @@ impl TDFSpectrumReader { pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { let mut tof_indices: Vec = vec![]; let mut intensities: Vec = vec![]; + let mut collision_energy = 0.0; for pasef_frame in self .spectrum_frame_index_reader .iterate_over_pasef_frames(index) { + collision_energy = pasef_frame.collision_energy; let frame_index: usize = pasef_frame.frame - 1; let frame = self.frame_reader.get(frame_index); if frame.intensities.len() == 0 { @@ -84,6 +86,7 @@ impl TDFSpectrumReader { tof_indices: raw_tof_indices, intensities: raw_intensities, index: index, + collision_energy, }; raw_spectrum .smooth(SMOOTHING_WINDOW) @@ -182,6 +185,7 @@ pub(crate) struct RawSpectrum { pub tof_indices: Vec, pub intensities: Vec, pub index: usize, + pub collision_energy: f64, } impl RawSpectrum { @@ -233,6 +237,7 @@ impl RawSpectrum { intensities: self.intensities.iter().map(|x| *x as f64).collect(), precursor: precursor, index: index, + collision_energy: self.collision_energy, }; spectrum } diff --git a/src/io/writers/mgf.rs b/src/io/writers/mgf.rs index f16b1bc..12470d6 100644 --- a/src/io/writers/mgf.rs +++ b/src/io/writers/mgf.rs @@ -34,7 +34,7 @@ impl MGFEntry { let title = precursor.index; let ms2_data = format!( "TITLE=index:{}, im:{:.4}, intensity:{:.4}, frame:{}, ce:{:.4}\nPEPMASS={:.4}\nCHARGE={}\nRT={:.2}\n", - title, precursor.im, precursor.intensity, precursor.frame_index, precursor.collision_energy, precursor.mz, precursor.charge, precursor.rt + title, precursor.im, precursor.intensity, precursor.frame_index, spectrum.collision_energy, precursor.mz, precursor.charge, precursor.rt ); ms2_data } diff --git a/src/ms_data/precursors.rs b/src/ms_data/precursors.rs index d4c1ae4..726088a 100644 --- a/src/ms_data/precursors.rs +++ b/src/ms_data/precursors.rs @@ -8,5 +8,4 @@ pub struct Precursor { pub intensity: f64, pub index: usize, pub frame_index: usize, - pub collision_energy: f64, } diff --git a/src/ms_data/spectra.rs b/src/ms_data/spectra.rs index 90d6e0a..84fc1b0 100644 --- a/src/ms_data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -7,4 +7,5 @@ pub struct Spectrum { pub intensities: Vec, pub precursor: Precursor, pub index: usize, + pub collision_energy: f64, } diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 2a9501f..467d48a 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -32,9 +32,9 @@ fn minitdf_reader() { intensity: 0.0, index: 1, frame_index: 1, - collision_energy: 0.0, }, index: 1, + collision_energy: 0.0, }, Spectrum { mz_values: vec![1100.0, 1200.002, 1300.03, 1400.4], @@ -47,9 +47,9 @@ fn minitdf_reader() { intensity: 0.0, index: 2, frame_index: 2, - collision_energy: 0.0, }, index: 2, + collision_energy: 0.0, }, ]; for i in 0..spectra.len() { @@ -79,9 +79,9 @@ fn tdf_reader_dda() { intensity: 10.0, index: 1, frame_index: 1, - collision_energy: 0.0, }, index: 0, + collision_energy: 0.0, }, Spectrum { mz_values: vec![169.5419900362706, 695.6972509397959], @@ -94,9 +94,9 @@ fn tdf_reader_dda() { intensity: 10.0, index: 2, frame_index: 1, - collision_energy: 0.0, }, index: 1, + collision_energy: 0.0, }, Spectrum { mz_values: vec![827.1915846690921], @@ -109,9 +109,9 @@ fn tdf_reader_dda() { intensity: 10.0, index: 3, frame_index: 3, - collision_energy: 0.0, }, index: 2, + collision_energy: 0.0, }, ]; for i in 0..spectra.len() { From bff0a73077fb2b1690b55d760ac5d26d08e3230b Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 13:32:14 +0200 Subject: [PATCH 090/109] FEAT: explicit intensity reader in frame for ICC data --- src/io/readers/frame_reader.rs | 1 - src/ms_data/frames.rs | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 5da8917..bbc3b9e 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -151,7 +151,6 @@ impl FrameReader { frame.rt = sql_frame.rt; frame.acquisition_type = self.acquisition; frame.intensity_correction_factor = 1.0 / sql_frame.accumulation_time; - // TODO: implement intensity reader if (self.acquisition == AcquisitionType::DIAPASEF) & (frame.ms_level == MSLevel::MS2) { diff --git a/src/ms_data/frames.rs b/src/ms_data/frames.rs index 86454f1..c4858e0 100644 --- a/src/ms_data/frames.rs +++ b/src/ms_data/frames.rs @@ -16,6 +16,12 @@ pub struct Frame { pub window_group: u8, } +impl Frame { + pub fn get_corrected_intensity(&self, index: usize) -> f64 { + self.intensity_correction_factor * self.intensities[index] as f64 + } +} + /// The MS level used. #[derive(Debug, PartialEq, Default, Clone, Copy)] pub enum MSLevel { From 6d8ab0e09d6d1bf4e0d04e6e2a4adf26acbd0c52 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 13:35:57 +0200 Subject: [PATCH 091/109] CHORE: technical fix of tdf blob reader without effect --- src/io/readers/file_readers/tdf_blob_reader.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index b37415b..490efc1 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -65,7 +65,7 @@ impl TdfBlobReader { byte_count: usize, offset: usize, ) -> Result { - if (byte_count <= (HEADER_SIZE * U32_SIZE)) + if (byte_count < (HEADER_SIZE * U32_SIZE)) || ((offset + byte_count) > self.len()) { return Err(TdfBlobError::ByteCount( From 22074591fdb0232073143df47467baecbb037f21 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 15:47:16 +0200 Subject: [PATCH 092/109] CHORE: simplified spectral ordering --- src/io/readers/spectrum_reader.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 0ae9717..0082ca3 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -47,11 +47,7 @@ impl SpectrumReader { .into_par_iter() .map(|index| self.get(index)) .collect(); - spectra.sort_by(|a, b| { - let x = b.precursor.index as f64; - let y = a.precursor.index as f64; - y.total_cmp(&x) - }); + spectra.sort_by_key(|x| x.precursor.index); spectra } From 149cfab396ef86cd185b0018d325371d346f95cd Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 15:48:01 +0200 Subject: [PATCH 093/109] FEAT: added timscompressiontype to metadata --- src/io/readers/metadata_reader.rs | 6 ++++++ src/ms_data/acquisition.rs | 2 +- src/ms_data/metadata.rs | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/io/readers/metadata_reader.rs b/src/io/readers/metadata_reader.rs index 81c8e11..a439239 100644 --- a/src/io/readers/metadata_reader.rs +++ b/src/io/readers/metadata_reader.rs @@ -19,11 +19,17 @@ impl MetadataReader { let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); let sql_metadata: HashMap = SqlMetadata::from_sql_reader(&tdf_sql_reader).unwrap(); + let compression_type = sql_metadata + .get("TimsCompressionType") + .unwrap() + .parse() + .unwrap(); Metadata { path: path.as_ref().to_path_buf(), rt_converter: get_rt_converter(&tdf_sql_reader), im_converter: get_im_converter(&sql_metadata, &tdf_sql_reader), mz_converter: get_mz_converter(&sql_metadata), + compression_type, } } } diff --git a/src/ms_data/acquisition.rs b/src/ms_data/acquisition.rs index 790c503..b3e7be0 100644 --- a/src/ms_data/acquisition.rs +++ b/src/ms_data/acquisition.rs @@ -3,7 +3,7 @@ pub enum AcquisitionType { DDAPASEF, DIAPASEF, - // DiagonalDIAPASEF, + DiagonalDIAPASEF, // PRMPASEF, /// Default value. #[default] diff --git a/src/ms_data/metadata.rs b/src/ms_data/metadata.rs index 8da4852..55766a5 100644 --- a/src/ms_data/metadata.rs +++ b/src/ms_data/metadata.rs @@ -11,4 +11,5 @@ pub struct Metadata { pub rt_converter: Frame2RtConverter, pub im_converter: Scan2ImConverter, pub mz_converter: Tof2MzConverter, + pub compression_type: u8, } From 40a39f47652206fe8fd1bdc46a98478bb916f7ef Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 17:10:36 +0200 Subject: [PATCH 094/109] CHORE: made todos for refactoring --- src/file_readers.rs | 2 ++ src/io/readers/file_readers/tdf_blob_reader.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/src/file_readers.rs b/src/file_readers.rs index 420fe6e..9f1c9f5 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -14,6 +14,7 @@ pub struct FileReader { } impl FileReader { + // TODO refactor out pub fn new>(path_name: T) -> Result { let format: FileFormat = FileFormat::parse(path_name)?; let frame_reader = match &format { @@ -76,6 +77,7 @@ pub enum FileFormat { } impl FileFormat { + // TODO make into proper struct pub fn parse( input: impl AsRef, ) -> Result { diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index 490efc1..fad363c 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -18,6 +18,7 @@ pub struct TdfBlobReader { } impl TdfBlobReader { + // TODO parse compression1 pub fn new(file_name: impl AsRef) -> Result { let path: PathBuf = file_name.as_ref().to_path_buf(); let file: File = File::open(&path)?; From 5405b6764e7581ef9bd57c964a88f3af611e28da Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 17:10:58 +0200 Subject: [PATCH 095/109] CHORE: made todo for refactoring --- src/io/readers/frame_reader.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index bbc3b9e..2cc52da 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -35,12 +35,12 @@ impl FrameReader { let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); let sql_frames = SqlFrame::from_sql_reader(&tdf_sql_reader).unwrap(); let bin_path = find_extension(&path, "analysis.tdf_bin").unwrap(); - let tdf_bin_reader: TdfBlobReader = - TdfBlobReader::new(bin_path).unwrap(); + let tdf_bin_reader = TdfBlobReader::new(bin_path).unwrap(); let acquisition = if sql_frames.iter().any(|x| x.msms_type == 8) { AcquisitionType::DDAPASEF } else if sql_frames.iter().any(|x| x.msms_type == 9) { AcquisitionType::DIAPASEF + // TODO: can also be diagonalpasef } else { AcquisitionType::Unknown }; From a27e4553ccfbec62296c5a27b28caf9870844ae2 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 17:11:27 +0200 Subject: [PATCH 096/109] CHORE: refactored tdf spectrum reader --- src/io/readers/spectrum_reader/tdf.rs | 178 +++--------------- src/io/readers/spectrum_reader/tdf/dda.rs | 91 +++++++++ .../spectrum_reader/tdf/raw_spectra.rs | 68 +++++++ 3 files changed, 180 insertions(+), 157 deletions(-) create mode 100644 src/io/readers/spectrum_reader/tdf/dda.rs create mode 100644 src/io/readers/spectrum_reader/tdf/raw_spectra.rs diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index a6f6b02..8c3cecc 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -1,22 +1,19 @@ +mod dda; +mod raw_spectra; + +use dda::SpectrumFrameIndexReader; +use raw_spectra::RawSpectrum; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; use crate::{ domain_converters::{ConvertableDomain, Tof2MzConverter}, io::readers::{ - file_readers::sql_reader::{ - pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, - }, - FrameReader, MetadataReader, PrecursorReader, - }, - ms_data::{Precursor, Spectrum}, - utils::{ - find_extension, - vec_utils::{ - argsort, filter_with_mask, find_sparse_local_maxima_mask, - group_and_sum, - }, + file_readers::sql_reader::SqlReader, FrameReader, MetadataReader, + PrecursorReader, }, + ms_data::{AcquisitionType, Spectrum}, + utils::find_extension, }; use super::SpectrumReaderTrait; @@ -30,7 +27,6 @@ pub struct TDFSpectrumReader { path: PathBuf, precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, - frame_reader: FrameReader, spectrum_frame_index_reader: SpectrumFrameIndexReader, } @@ -41,53 +37,27 @@ impl TDFSpectrumReader { let metadata = MetadataReader::new(&sql_path); let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); - let precursor_reader: PrecursorReader = PrecursorReader::new(&sql_path); - let spectrum_frame_index_reader = - SpectrumFrameIndexReader::new(&tdf_sql_reader); + let precursor_reader; + let spectrum_frame_index_reader; + if frame_reader.get_acquisition() == AcquisitionType::DDAPASEF { + precursor_reader = PrecursorReader::new(&sql_path); + spectrum_frame_index_reader = + SpectrumFrameIndexReader::new(&tdf_sql_reader, frame_reader); + } else { + // TODO parse diaPASEF + panic!("Not DDA") + } Self { path: path_name.as_ref().to_path_buf(), precursor_reader, mz_reader, - frame_reader, spectrum_frame_index_reader, } } pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { - let mut tof_indices: Vec = vec![]; - let mut intensities: Vec = vec![]; - let mut collision_energy = 0.0; - for pasef_frame in self - .spectrum_frame_index_reader - .iterate_over_pasef_frames(index) - { - collision_energy = pasef_frame.collision_energy; - let frame_index: usize = pasef_frame.frame - 1; - let frame = self.frame_reader.get(frame_index); - if frame.intensities.len() == 0 { - continue; - } - let scan_start: usize = pasef_frame.scan_start; - let scan_end: usize = pasef_frame.scan_end; - let offset_start: usize = frame.scan_offsets[scan_start] as usize; - let offset_end: usize = frame.scan_offsets[scan_end] as usize; - let tof_selection: &[u32] = - &frame.tof_indices[offset_start..offset_end]; - let intensity_selection: &[u32] = - &frame.intensities[offset_start..offset_end]; - tof_indices.extend(tof_selection); - intensities.extend(intensity_selection); - } - let (raw_tof_indices, raw_intensities) = group_and_sum( - tof_indices, - intensities.iter().map(|x| *x as u64).collect(), - ); - let raw_spectrum = RawSpectrum { - tof_indices: raw_tof_indices, - intensities: raw_intensities, - index: index, - collision_energy, - }; + let raw_spectrum = + self.spectrum_frame_index_reader.get_raw_spectrum(index); raw_spectrum .smooth(SMOOTHING_WINDOW) .centroid(CENTROIDING_WINDOW) @@ -136,109 +106,3 @@ impl SpectrumReaderTrait for TDFSpectrumReader { } } } - -#[derive(Debug)] -struct SpectrumFrameIndexReader { - order: Vec, - offsets: Vec, - pasef_frames: Vec, -} - -impl SpectrumFrameIndexReader { - fn new(tdf_sql_reader: &SqlReader) -> Self { - let pasef_frames = - SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); - let pasef_precursors = - &pasef_frames.iter().map(|x| x.precursor).collect(); - let order: Vec = argsort(&pasef_precursors); - let max_precursor = pasef_precursors.iter().max().unwrap(); - let mut offsets: Vec = Vec::with_capacity(max_precursor + 1); - offsets.push(0); - for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { - let second_index: usize = order[offset + 1]; - if pasef_precursors[index] != pasef_precursors[second_index] { - offsets.push(offset + 1) - } - } - offsets.push(order.len()); - Self { - order, - offsets, - pasef_frames, - } - } - - fn iterate_over_pasef_frames( - &self, - index: usize, - ) -> impl Iterator { - let start: usize = self.offsets[index]; - let end: usize = self.offsets[index + 1]; - self.order[start..end] - .iter() - .map(|&x| &self.pasef_frames[x]) - } -} - -#[derive(Debug, PartialEq, Default, Clone)] -pub(crate) struct RawSpectrum { - pub tof_indices: Vec, - pub intensities: Vec, - pub index: usize, - pub collision_energy: f64, -} - -impl RawSpectrum { - pub fn smooth(mut self, window: u32) -> Self { - let mut smooth_intensities: Vec = self.intensities.clone(); - for (current_index, current_tof) in self.tof_indices.iter().enumerate() - { - let current_intensity: u64 = self.intensities[current_index]; - for (_next_index, next_tof) in - self.tof_indices[current_index + 1..].iter().enumerate() - { - let next_index: usize = _next_index + current_index + 1; - let next_intensity: u64 = self.intensities[next_index]; - if (next_tof - current_tof) <= window { - smooth_intensities[current_index] += next_intensity; - smooth_intensities[next_index] += current_intensity; - } else { - break; - } - } - } - self.intensities = smooth_intensities; - self - } - - pub fn centroid(mut self, window: u32) -> Self { - let local_maxima: Vec = find_sparse_local_maxima_mask( - &self.tof_indices, - &self.intensities, - window, - ); - self.tof_indices = filter_with_mask(&self.tof_indices, &local_maxima); - self.intensities = filter_with_mask(&self.intensities, &local_maxima); - self - } - - pub fn finalize( - &self, - precursor: Precursor, - mz_reader: &Tof2MzConverter, - ) -> Spectrum { - let index = self.index; - let spectrum: Spectrum = Spectrum { - mz_values: self - .tof_indices - .iter() - .map(|&x| mz_reader.convert(x)) - .collect(), - intensities: self.intensities.iter().map(|x| *x as f64).collect(), - precursor: precursor, - index: index, - collision_energy: self.collision_energy, - }; - spectrum - } -} diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs new file mode 100644 index 0000000..76c6c33 --- /dev/null +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -0,0 +1,91 @@ +use crate::{ + io::readers::{ + file_readers::sql_reader::{ + pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, + }, + FrameReader, + }, + utils::vec_utils::{argsort, group_and_sum}, +}; + +use super::raw_spectra::RawSpectrum; + +#[derive(Debug)] +pub struct SpectrumFrameIndexReader { + order: Vec, + offsets: Vec, + pasef_frames: Vec, + frame_reader: FrameReader, +} + +impl SpectrumFrameIndexReader { + pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { + let pasef_frames = + SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); + let pasef_precursors = + &pasef_frames.iter().map(|x| x.precursor).collect(); + let order: Vec = argsort(&pasef_precursors); + let max_precursor = pasef_precursors.iter().max().unwrap(); + let mut offsets: Vec = Vec::with_capacity(max_precursor + 1); + offsets.push(0); + for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { + let second_index: usize = order[offset + 1]; + if pasef_precursors[index] != pasef_precursors[second_index] { + offsets.push(offset + 1) + } + } + offsets.push(order.len()); + Self { + order, + offsets, + pasef_frames, + frame_reader, + } + } + + pub fn iterate_over_pasef_frames( + &self, + index: usize, + ) -> impl Iterator { + let start: usize = self.offsets[index]; + let end: usize = self.offsets[index + 1]; + self.order[start..end] + .iter() + .map(|&x| &self.pasef_frames[x]) + } + + pub fn get_raw_spectrum(&self, index: usize) -> RawSpectrum { + let mut collision_energy = 0.0; + let mut tof_indices: Vec = vec![]; + let mut intensities: Vec = vec![]; + for pasef_frame in self.iterate_over_pasef_frames(index) { + collision_energy = pasef_frame.collision_energy; + let frame_index: usize = pasef_frame.frame - 1; + let frame = self.frame_reader.get(frame_index); + if frame.intensities.len() == 0 { + continue; + } + let scan_start: usize = pasef_frame.scan_start; + let scan_end: usize = pasef_frame.scan_end; + let offset_start: usize = frame.scan_offsets[scan_start] as usize; + let offset_end: usize = frame.scan_offsets[scan_end] as usize; + let tof_selection: &[u32] = + &frame.tof_indices[offset_start..offset_end]; + let intensity_selection: &[u32] = + &frame.intensities[offset_start..offset_end]; + tof_indices.extend(tof_selection); + intensities.extend(intensity_selection); + } + let (raw_tof_indices, raw_intensities) = group_and_sum( + tof_indices, + intensities.iter().map(|x| *x as u64).collect(), + ); + let raw_spectrum = RawSpectrum { + tof_indices: raw_tof_indices, + intensities: raw_intensities, + index: index, + collision_energy, + }; + raw_spectrum + } +} diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs new file mode 100644 index 0000000..c667e19 --- /dev/null +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -0,0 +1,68 @@ +use crate::{ + domain_converters::{ConvertableDomain, Tof2MzConverter}, + ms_data::{Precursor, Spectrum}, + utils::vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, +}; + +#[derive(Debug, PartialEq, Default, Clone)] +pub(crate) struct RawSpectrum { + pub tof_indices: Vec, + pub intensities: Vec, + pub index: usize, + pub collision_energy: f64, +} + +impl RawSpectrum { + pub fn smooth(mut self, window: u32) -> Self { + let mut smooth_intensities: Vec = self.intensities.clone(); + for (current_index, current_tof) in self.tof_indices.iter().enumerate() + { + let current_intensity: u64 = self.intensities[current_index]; + for (_next_index, next_tof) in + self.tof_indices[current_index + 1..].iter().enumerate() + { + let next_index: usize = _next_index + current_index + 1; + let next_intensity: u64 = self.intensities[next_index]; + if (next_tof - current_tof) <= window { + smooth_intensities[current_index] += next_intensity; + smooth_intensities[next_index] += current_intensity; + } else { + break; + } + } + } + self.intensities = smooth_intensities; + self + } + + pub fn centroid(mut self, window: u32) -> Self { + let local_maxima: Vec = find_sparse_local_maxima_mask( + &self.tof_indices, + &self.intensities, + window, + ); + self.tof_indices = filter_with_mask(&self.tof_indices, &local_maxima); + self.intensities = filter_with_mask(&self.intensities, &local_maxima); + self + } + + pub fn finalize( + &self, + precursor: Precursor, + mz_reader: &Tof2MzConverter, + ) -> Spectrum { + let index = self.index; + let spectrum: Spectrum = Spectrum { + mz_values: self + .tof_indices + .iter() + .map(|&x| mz_reader.convert(x)) + .collect(), + intensities: self.intensities.iter().map(|x| *x as f64).collect(), + precursor: precursor, + index: index, + collision_energy: self.collision_energy, + }; + spectrum + } +} From ccd8e3a7ba9ea5223dc4b81a3021fbacf3a6f001 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 17:21:35 +0200 Subject: [PATCH 097/109] FEAT: added isolation width and mz to spectra --- .../file_readers/sql_reader/pasef_frame_msms.rs | 8 ++++---- src/io/readers/spectrum_reader/minitdf.rs | 2 ++ src/io/readers/spectrum_reader/tdf/dda.rs | 6 ++++++ src/io/readers/spectrum_reader/tdf/raw_spectra.rs | 4 ++++ src/ms_data/spectra.rs | 2 ++ tests/spectrum_readers.rs | 10 ++++++++++ 6 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs b/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs index 33fe4b6..51e09cd 100644 --- a/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs +++ b/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs @@ -5,8 +5,8 @@ pub struct SqlPasefFrameMsMs { pub frame: usize, pub scan_start: usize, pub scan_end: usize, - pub mz_center: f64, - pub mz_width: f64, + pub isolation_mz: f64, + pub isolation_width: f64, pub collision_energy: f64, pub precursor: usize, } @@ -21,8 +21,8 @@ impl ReadableSqlTable for SqlPasefFrameMsMs { frame: row.get(0).unwrap_or_default(), scan_start: row.get(1).unwrap_or_default(), scan_end: row.get(2).unwrap_or_default(), - mz_center: row.get(3).unwrap_or_default(), - mz_width: row.get(4).unwrap_or_default(), + isolation_mz: row.get(3).unwrap_or_default(), + isolation_width: row.get(4).unwrap_or_default(), collision_energy: row.get(5).unwrap_or_default(), precursor: row.get(6).unwrap_or_default(), } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index dee7691..937f7b5 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -78,6 +78,8 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { spectrum.precursor = precursor; spectrum.index = precursor.index; spectrum.collision_energy = self.collision_energies[index]; + spectrum.isolation_mz = 0.0; //TODO + spectrum.isolation_width = 0.0; //TODO spectrum } diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index 76c6c33..a013f70 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -56,10 +56,14 @@ impl SpectrumFrameIndexReader { pub fn get_raw_spectrum(&self, index: usize) -> RawSpectrum { let mut collision_energy = 0.0; + let mut isolation_mz = 0.0; + let mut isolation_width = 0.0; let mut tof_indices: Vec = vec![]; let mut intensities: Vec = vec![]; for pasef_frame in self.iterate_over_pasef_frames(index) { collision_energy = pasef_frame.collision_energy; + isolation_mz = pasef_frame.isolation_mz; + isolation_width = pasef_frame.isolation_width; let frame_index: usize = pasef_frame.frame - 1; let frame = self.frame_reader.get(frame_index); if frame.intensities.len() == 0 { @@ -85,6 +89,8 @@ impl SpectrumFrameIndexReader { intensities: raw_intensities, index: index, collision_energy, + isolation_mz, + isolation_width, }; raw_spectrum } diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index c667e19..43534cd 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -10,6 +10,8 @@ pub(crate) struct RawSpectrum { pub intensities: Vec, pub index: usize, pub collision_energy: f64, + pub isolation_mz: f64, + pub isolation_width: f64, } impl RawSpectrum { @@ -62,6 +64,8 @@ impl RawSpectrum { precursor: precursor, index: index, collision_energy: self.collision_energy, + isolation_mz: self.isolation_mz, + isolation_width: self.isolation_width, }; spectrum } diff --git a/src/ms_data/spectra.rs b/src/ms_data/spectra.rs index 84fc1b0..f02ddf6 100644 --- a/src/ms_data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -8,4 +8,6 @@ pub struct Spectrum { pub precursor: Precursor, pub index: usize, pub collision_energy: f64, + pub isolation_mz: f64, + pub isolation_width: f64, } diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 467d48a..4696444 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -35,6 +35,8 @@ fn minitdf_reader() { }, index: 1, collision_energy: 0.0, + isolation_mz: 0.0, + isolation_width: 0.0, }, Spectrum { mz_values: vec![1100.0, 1200.002, 1300.03, 1400.4], @@ -50,6 +52,8 @@ fn minitdf_reader() { }, index: 2, collision_energy: 0.0, + isolation_mz: 0.0, + isolation_width: 0.0, }, ]; for i in 0..spectra.len() { @@ -82,6 +86,8 @@ fn tdf_reader_dda() { }, index: 0, collision_energy: 0.0, + isolation_mz: 500.5, + isolation_width: 2.0, }, Spectrum { mz_values: vec![169.5419900362706, 695.6972509397959], @@ -97,6 +103,8 @@ fn tdf_reader_dda() { }, index: 1, collision_energy: 0.0, + isolation_mz: 501.5, + isolation_width: 2.0, }, Spectrum { mz_values: vec![827.1915846690921], @@ -112,6 +120,8 @@ fn tdf_reader_dda() { }, index: 2, collision_energy: 0.0, + isolation_mz: 502.5, + isolation_width: 2.0, }, ]; for i in 0..spectra.len() { From c22682aa2ed3d65d1f2ee753cb67fa734402a017 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 17:25:37 +0200 Subject: [PATCH 098/109] FEAT: renamed raw spextrum reader --- src/io/readers/spectrum_reader/tdf.rs | 6 +++--- src/io/readers/spectrum_reader/tdf/dda.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 8c3cecc..1f9a6c1 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -1,7 +1,7 @@ mod dda; mod raw_spectra; -use dda::SpectrumFrameIndexReader; +use dda::RawSpectrumReader; use raw_spectra::RawSpectrum; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; @@ -27,7 +27,7 @@ pub struct TDFSpectrumReader { path: PathBuf, precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, - spectrum_frame_index_reader: SpectrumFrameIndexReader, + spectrum_frame_index_reader: RawSpectrumReader, } impl TDFSpectrumReader { @@ -42,7 +42,7 @@ impl TDFSpectrumReader { if frame_reader.get_acquisition() == AcquisitionType::DDAPASEF { precursor_reader = PrecursorReader::new(&sql_path); spectrum_frame_index_reader = - SpectrumFrameIndexReader::new(&tdf_sql_reader, frame_reader); + RawSpectrumReader::new(&tdf_sql_reader, frame_reader); } else { // TODO parse diaPASEF panic!("Not DDA") diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index a013f70..153f752 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -11,14 +11,14 @@ use crate::{ use super::raw_spectra::RawSpectrum; #[derive(Debug)] -pub struct SpectrumFrameIndexReader { +pub struct RawSpectrumReader { order: Vec, offsets: Vec, pasef_frames: Vec, frame_reader: FrameReader, } -impl SpectrumFrameIndexReader { +impl RawSpectrumReader { pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { let pasef_frames = SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); From 14783d927a3b90e87c19fa40c21bf9077438af73 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 17:26:39 +0200 Subject: [PATCH 099/109] CHORE set reminders to update error handling ands docs --- src/file_readers.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/file_readers.rs b/src/file_readers.rs index 9f1c9f5..b14d0fd 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -15,6 +15,8 @@ pub struct FileReader { impl FileReader { // TODO refactor out + // TODO proper error handling + // TODO update docs pub fn new>(path_name: T) -> Result { let format: FileFormat = FileFormat::parse(path_name)?; let frame_reader = match &format { From d24e6b551977ce767b4cd3190dbac7cbbfc87342 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 3 Jul 2024 17:28:08 +0200 Subject: [PATCH 100/109] DEBUG: updated main to move out of timsrust --- src/main.rs | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/src/main.rs b/src/main.rs index 129041f..9f150ee 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,30 +1,42 @@ +// use rayon::iter::ParallelIterator; use std::env; +use timsrust::io::readers::FrameReader; use timsrust::io::writers::MGFEntry; +use timsrust::ms_data::Frame; use timsrust::{ms_data::Spectrum, FileReader}; fn quick_test() { + // TODO move quick test out to separate program let args: Vec = env::args().collect(); let d_folder_name: &str = &args[1]; let x = FileReader::new(d_folder_name.to_string()).unwrap(); - let dda_spectra: Vec = x.read_all_spectra(); let spectrum_index: usize; if args.len() >= 3 { spectrum_index = args[2].parse().unwrap_or(0); } else { spectrum_index = 10; } - // println!("precursor {:?}", dda_spectra[spectrum_index].precursor); - // _ = MGFEntry::write_header(&dda_spectra[spectrum_index]); - println!("{}", MGFEntry::write(&dda_spectra[spectrum_index])); - // println!("{}", MGFEntry::write_header(&dda_spectra[spectrum_index])); - // println!("{}", MGFEntry::write_peaks(&dda_spectra[spectrum_index])); - // println!("mz values {:?}", dda_spectra[spectrum_index].mz_values); - // println!( - // "intensity values {:?}", - // dda_spectra[spectrum_index].intensities - // ); - // println!("{:?}", dda_spectra[spectrum_index].as_mgf_entry()); - // MGFWriter::write_spectra(d_folder_name, &dda_spectra); + let dda_spectra: Vec = x.read_all_spectra(); + let spectrum = &dda_spectra[spectrum_index]; + // let spectrum: &Spectrum = &x.read_single_spectrum(spectrum_index); + // // // println!("precursor {:?}", spectrum.precursor); + // // // _ = MGFEntry::write_header(spectrum); + println!("{}", MGFEntry::write(spectrum)); + // // // println!("{}", MGFEntry::write_header(spectrum)); + // // // println!("{}", MGFEntry::write_peaks(spectrum)); + // // // println!("mz values {:?}", spectrum.mz_values); + // // // println!( + // // // "intensity values {:?}", + // // // spectrum.intensities + // // // ); + // // // println!("{:?}", spectrum.as_mgf_entry()); + // // // MGFWriter::write_spectra(d_folder_name, &dda_spectra); + // let frame = x.read_single_frame(2); + let x = FrameReader::new(d_folder_name); + // let frames: Vec = x.parallel_filter(|x| x.msms_type != 0).collect(); + let frame: Frame = x.get(200); + // let frame = &frames[200 - 2]; + println!("{:?}", frame); } fn main() { From f1b31b707e13b86a0ee6da69ff945fe3f4815f22 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 4 Jul 2024 10:46:45 +0200 Subject: [PATCH 101/109] CHORE: added todo to refactor frame reader --- src/io/readers/frame_reader.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 2cc52da..6ba2fd6 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -30,6 +30,7 @@ pub struct FrameReader { } impl FrameReader { + // TODO refactor/simplify pub fn new(path: impl AsRef) -> Self { let sql_path = find_extension(&path, "analysis.tdf").unwrap(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); From d49ba7749857a82da9278351e51411fccf6ba3a2 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 5 Jul 2024 10:43:35 +0200 Subject: [PATCH 102/109] FEAT: implemented a dia spectrum reader --- src/io/readers/precursor_reader/tdf.rs | 73 +++++----- src/io/readers/precursor_reader/tdf/dda.rs | 67 +++++++++ src/io/readers/precursor_reader/tdf/dia.rs | 136 ++++++++++++++++++ src/io/readers/spectrum_reader/tdf.rs | 30 ++-- src/io/readers/spectrum_reader/tdf/dda.rs | 10 +- src/io/readers/spectrum_reader/tdf/dia.rs | 125 ++++++++++++++++ .../spectrum_reader/tdf/raw_spectra.rs | 47 +++++- 7 files changed, 426 insertions(+), 62 deletions(-) create mode 100644 src/io/readers/precursor_reader/tdf/dda.rs create mode 100644 src/io/readers/precursor_reader/tdf/dia.rs create mode 100644 src/io/readers/spectrum_reader/tdf/dia.rs diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 964d466..8619a1e 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -1,67 +1,60 @@ +mod dda; +mod dia; + use std::path::{Path, PathBuf}; +use dda::DDATDFPrecursorReader; +use dia::DIATDFPrecursorReader; + use crate::{ - domain_converters::{ - ConvertableDomain, Frame2RtConverter, Scan2ImConverter, - }, - io::readers::{ - file_readers::sql_reader::{ - precursors::SqlPrecursor, ReadableSqlTable, SqlReader, - }, - MetadataReader, - }, - ms_data::Precursor, + io::readers::file_readers::sql_reader::SqlReader, + ms_data::{AcquisitionType, Precursor}, }; use super::PrecursorReaderTrait; -#[derive(Debug)] pub struct TDFPrecursorReader { - path: PathBuf, - sql_precursors: Vec, - rt_converter: Frame2RtConverter, - im_converter: Scan2ImConverter, + precursor_reader: Box, } impl TDFPrecursorReader { pub fn new(path: impl AsRef) -> Self { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); - let metadata = MetadataReader::new(&path); - let rt_converter: Frame2RtConverter = metadata.rt_converter; - let im_converter: Scan2ImConverter = metadata.im_converter; - let sql_precursors = - SqlPrecursor::from_sql_reader(&tdf_sql_reader).unwrap(); - Self { - path: path.as_ref().to_path_buf(), - sql_precursors, - rt_converter, - im_converter, - } + let sql_frames: Vec = tdf_sql_reader + .read_column_from_table("ScanMode", "Frames") + .unwrap(); + let acquisition_type = if sql_frames.iter().any(|&x| x == 8) { + AcquisitionType::DDAPASEF + } else if sql_frames.iter().any(|&x| x == 9) { + AcquisitionType::DIAPASEF + } else { + AcquisitionType::Unknown + }; + let precursor_reader: Box = + match acquisition_type { + AcquisitionType::DDAPASEF => { + Box::new(DDATDFPrecursorReader::new(path)) + }, + AcquisitionType::DIAPASEF => { + Box::new(DIATDFPrecursorReader::new(path)) + }, + _ => panic!(), + }; + Self { precursor_reader } } } impl PrecursorReaderTrait for TDFPrecursorReader { fn get(&self, index: usize) -> Precursor { - let mut precursor: Precursor = Precursor::default(); - let sql_precursor = &self.sql_precursors[index]; - let frame_id: usize = sql_precursor.precursor_frame; - let scan_id: f64 = sql_precursor.scan_average; - precursor.mz = sql_precursor.mz; - precursor.rt = self.rt_converter.convert(frame_id as u32); - precursor.im = self.im_converter.convert(scan_id); - precursor.charge = sql_precursor.charge; - precursor.intensity = sql_precursor.intensity; - precursor.index = index + 1; - precursor.frame_index = frame_id; - precursor + self.precursor_reader.get(index) } fn len(&self) -> usize { - self.sql_precursors.len() + self.precursor_reader.len() } fn get_path(&self) -> PathBuf { - self.path.clone() + self.precursor_reader.get_path() } } diff --git a/src/io/readers/precursor_reader/tdf/dda.rs b/src/io/readers/precursor_reader/tdf/dda.rs new file mode 100644 index 0000000..c299793 --- /dev/null +++ b/src/io/readers/precursor_reader/tdf/dda.rs @@ -0,0 +1,67 @@ +use std::path::{Path, PathBuf}; + +use crate::{ + domain_converters::{ + ConvertableDomain, Frame2RtConverter, Scan2ImConverter, + }, + io::readers::{ + file_readers::sql_reader::{ + precursors::SqlPrecursor, ReadableSqlTable, SqlReader, + }, + MetadataReader, + }, + ms_data::Precursor, +}; + +use super::PrecursorReaderTrait; + +#[derive(Debug)] +pub struct DDATDFPrecursorReader { + path: PathBuf, + sql_precursors: Vec, + rt_converter: Frame2RtConverter, + im_converter: Scan2ImConverter, +} + +impl DDATDFPrecursorReader { + pub fn new(path: impl AsRef) -> Self { + let sql_path = path.as_ref(); + let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); + let metadata = MetadataReader::new(&path); + let rt_converter: Frame2RtConverter = metadata.rt_converter; + let im_converter: Scan2ImConverter = metadata.im_converter; + let sql_precursors = + SqlPrecursor::from_sql_reader(&tdf_sql_reader).unwrap(); + Self { + path: path.as_ref().to_path_buf(), + sql_precursors, + rt_converter, + im_converter, + } + } +} + +impl PrecursorReaderTrait for DDATDFPrecursorReader { + fn get(&self, index: usize) -> Precursor { + let sql_precursor = &self.sql_precursors[index]; + let frame_id: usize = sql_precursor.precursor_frame; + let scan_id: f64 = sql_precursor.scan_average; + Precursor { + mz: sql_precursor.mz, + rt: self.rt_converter.convert(frame_id as u32), + im: self.im_converter.convert(scan_id), + charge: sql_precursor.charge, + intensity: sql_precursor.intensity, + index: index + 1, + frame_index: frame_id, + } + } + + fn len(&self) -> usize { + self.sql_precursors.len() + } + + fn get_path(&self) -> PathBuf { + self.path.clone() + } +} diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs new file mode 100644 index 0000000..91d82bd --- /dev/null +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -0,0 +1,136 @@ +use std::path::{Path, PathBuf}; + +use crate::{ + domain_converters::{ + ConvertableDomain, Frame2RtConverter, Scan2ImConverter, + }, + io::readers::{ + file_readers::sql_reader::{ + frame_groups::SqlWindowGroup, quad_settings::SqlQuadSettings, + ReadableSqlTable, SqlReader, + }, + MetadataReader, + }, + ms_data::{Precursor, QuadrupoleSettings}, + utils::vec_utils::argsort, +}; + +use super::PrecursorReaderTrait; + +#[derive(Debug)] +pub struct DIATDFPrecursorReader { + path: PathBuf, + expanded_quadrupole_settings: Vec, + rt_converter: Frame2RtConverter, + im_converter: Scan2ImConverter, +} + +impl DIATDFPrecursorReader { + pub fn new(path: impl AsRef) -> Self { + // TODO: refactor or even better: recycle + let sql_path = path.as_ref(); + let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); + let metadata = MetadataReader::new(&path); + let rt_converter: Frame2RtConverter = metadata.rt_converter; + let im_converter: Scan2ImConverter = metadata.im_converter; + let window_groups = + SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); + let mut quadrupole_settings: Vec; + let sql_quadrupole_settings = + SqlQuadSettings::from_sql_reader(&tdf_sql_reader).unwrap(); + let window_group_count = + window_groups.iter().map(|x| x.window_group).max().unwrap() + as usize; + quadrupole_settings = (0..window_group_count) + .map(|window_group| { + let mut quad = QuadrupoleSettings::default(); + quad.index = window_group + 1; + quad + }) + .collect(); + for window_group in sql_quadrupole_settings { + let group = window_group.window_group - 1; + quadrupole_settings[group] + .scan_starts + .push(window_group.scan_start); + quadrupole_settings[group] + .scan_ends + .push(window_group.scan_end); + quadrupole_settings[group] + .collision_energy + .push(window_group.collision_energy); + quadrupole_settings[group] + .isolation_mz + .push(window_group.mz_center); + quadrupole_settings[group] + .isolation_width + .push(window_group.mz_width); + } + quadrupole_settings = quadrupole_settings + .into_iter() + .map(|mut window| { + let order = argsort(&window.scan_starts); + window.isolation_mz = + order.iter().map(|&i| window.isolation_mz[i]).collect(); + window.isolation_width = + order.iter().map(|&i| window.isolation_width[i]).collect(); + window.collision_energy = + order.iter().map(|&i| window.collision_energy[i]).collect(); + window.scan_starts = + order.iter().map(|&i| window.scan_starts[i]).collect(); + window.scan_ends = + order.iter().map(|&i| window.scan_ends[i]).collect(); + window + }) + .collect(); + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + for sub_window in 0..group.isolation_mz.len() { + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![group.scan_starts[sub_window]], + scan_ends: vec![group.scan_ends[sub_window]], + isolation_mz: vec![group.isolation_mz[sub_window]], + isolation_width: vec![group.isolation_width[sub_window]], + collision_energy: vec![group.collision_energy[sub_window]], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + Self { + path: path.as_ref().to_path_buf(), + expanded_quadrupole_settings, + rt_converter, + im_converter, + } + } +} + +impl PrecursorReaderTrait for DIATDFPrecursorReader { + fn get(&self, index: usize) -> Precursor { + let quad_settings = &self.expanded_quadrupole_settings[index]; + let scan_id = (quad_settings.scan_starts[0] + + quad_settings.scan_ends[0]) as f32 + / 2.0; + Precursor { + mz: quad_settings.isolation_mz[0], + rt: self.rt_converter.convert(quad_settings.index as u32 - 1), + im: self.im_converter.convert(scan_id), + charge: 0, //TODO + intensity: 0.0, //TODO + index: index, + frame_index: quad_settings.index, + } + } + + fn len(&self) -> usize { + self.expanded_quadrupole_settings.len() + } + + fn get_path(&self) -> PathBuf { + self.path.clone() + } +} diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 1f9a6c1..fe1cbcc 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -1,8 +1,8 @@ mod dda; +mod dia; mod raw_spectra; -use dda::RawSpectrumReader; -use raw_spectra::RawSpectrum; +use raw_spectra::{RawSpectrum, RawSpectrumReader}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; @@ -12,7 +12,7 @@ use crate::{ file_readers::sql_reader::SqlReader, FrameReader, MetadataReader, PrecursorReader, }, - ms_data::{AcquisitionType, Spectrum}, + ms_data::Spectrum, utils::find_extension, }; @@ -27,7 +27,7 @@ pub struct TDFSpectrumReader { path: PathBuf, precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, - spectrum_frame_index_reader: RawSpectrumReader, + raw_spectrum_reader: RawSpectrumReader, } impl TDFSpectrumReader { @@ -37,27 +37,23 @@ impl TDFSpectrumReader { let metadata = MetadataReader::new(&sql_path); let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); - let precursor_reader; - let spectrum_frame_index_reader; - if frame_reader.get_acquisition() == AcquisitionType::DDAPASEF { - precursor_reader = PrecursorReader::new(&sql_path); - spectrum_frame_index_reader = - RawSpectrumReader::new(&tdf_sql_reader, frame_reader); - } else { - // TODO parse diaPASEF - panic!("Not DDA") - } + let precursor_reader = PrecursorReader::new(&sql_path); + let acquisition_type = frame_reader.get_acquisition(); + let raw_spectrum_reader = RawSpectrumReader::new( + &tdf_sql_reader, + frame_reader, + acquisition_type, + ); Self { path: path_name.as_ref().to_path_buf(), precursor_reader, mz_reader, - spectrum_frame_index_reader, + raw_spectrum_reader, } } pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { - let raw_spectrum = - self.spectrum_frame_index_reader.get_raw_spectrum(index); + let raw_spectrum = self.raw_spectrum_reader.get(index); raw_spectrum .smooth(SMOOTHING_WINDOW) .centroid(CENTROIDING_WINDOW) diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index 153f752..c5d9eb8 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -8,17 +8,17 @@ use crate::{ utils::vec_utils::{argsort, group_and_sum}, }; -use super::raw_spectra::RawSpectrum; +use super::raw_spectra::{RawSpectrum, RawSpectrumReaderTrait}; #[derive(Debug)] -pub struct RawSpectrumReader { +pub struct DDARawSpectrumReader { order: Vec, offsets: Vec, pasef_frames: Vec, frame_reader: FrameReader, } -impl RawSpectrumReader { +impl DDARawSpectrumReader { pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { let pasef_frames = SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); @@ -53,8 +53,10 @@ impl RawSpectrumReader { .iter() .map(|&x| &self.pasef_frames[x]) } +} - pub fn get_raw_spectrum(&self, index: usize) -> RawSpectrum { +impl RawSpectrumReaderTrait for DDARawSpectrumReader { + fn get(&self, index: usize) -> RawSpectrum { let mut collision_energy = 0.0; let mut isolation_mz = 0.0; let mut isolation_width = 0.0; diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs new file mode 100644 index 0000000..97d55dd --- /dev/null +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -0,0 +1,125 @@ +use crate::{ + io::readers::{ + file_readers::sql_reader::{ + frame_groups::SqlWindowGroup, quad_settings::SqlQuadSettings, + ReadableSqlTable, SqlReader, + }, + FrameReader, + }, + ms_data::QuadrupoleSettings, + utils::vec_utils::{argsort, group_and_sum}, +}; + +use super::raw_spectra::{RawSpectrum, RawSpectrumReaderTrait}; + +#[derive(Debug)] +pub struct DIARawSpectrumReader { + expanded_quadrupole_settings: Vec, + frame_reader: FrameReader, +} + +impl DIARawSpectrumReader { + pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { + let window_groups = + SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); + let mut quadrupole_settings: Vec; + let sql_quadrupole_settings = + SqlQuadSettings::from_sql_reader(&tdf_sql_reader).unwrap(); + let window_group_count = + window_groups.iter().map(|x| x.window_group).max().unwrap() + as usize; + quadrupole_settings = (0..window_group_count) + .map(|window_group| { + let mut quad = QuadrupoleSettings::default(); + quad.index = window_group + 1; + quad + }) + .collect(); + for window_group in sql_quadrupole_settings { + let group = window_group.window_group - 1; + quadrupole_settings[group] + .scan_starts + .push(window_group.scan_start); + quadrupole_settings[group] + .scan_ends + .push(window_group.scan_end); + quadrupole_settings[group] + .collision_energy + .push(window_group.collision_energy); + quadrupole_settings[group] + .isolation_mz + .push(window_group.mz_center); + quadrupole_settings[group] + .isolation_width + .push(window_group.mz_width); + } + quadrupole_settings = quadrupole_settings + .into_iter() + .map(|mut window| { + let order = argsort(&window.scan_starts); + window.isolation_mz = + order.iter().map(|&i| window.isolation_mz[i]).collect(); + window.isolation_width = + order.iter().map(|&i| window.isolation_width[i]).collect(); + window.collision_energy = + order.iter().map(|&i| window.collision_energy[i]).collect(); + window.scan_starts = + order.iter().map(|&i| window.scan_starts[i]).collect(); + window.scan_ends = + order.iter().map(|&i| window.scan_ends[i]).collect(); + window + }) + .collect(); + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + for sub_window in 0..group.isolation_mz.len() { + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![group.scan_starts[sub_window]], + scan_ends: vec![group.scan_ends[sub_window]], + isolation_mz: vec![group.isolation_mz[sub_window]], + isolation_width: vec![group.isolation_width[sub_window]], + collision_energy: vec![group.collision_energy[sub_window]], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + Self { + expanded_quadrupole_settings, + frame_reader, + } + } +} + +impl RawSpectrumReaderTrait for DIARawSpectrumReader { + fn get(&self, index: usize) -> RawSpectrum { + let quad_settings = &self.expanded_quadrupole_settings[index]; + let collision_energy = quad_settings.collision_energy[0]; + let isolation_mz = quad_settings.isolation_mz[0]; + let isolation_width = quad_settings.isolation_width[0]; + let scan_start = quad_settings.scan_starts[0]; + let scan_end = quad_settings.scan_ends[0]; + let frame_index = quad_settings.index - 1; + let frame = self.frame_reader.get(frame_index); + let offset_start = frame.scan_offsets[scan_start] as usize; + let offset_end = frame.scan_offsets[scan_end] as usize; + let tof_indices = &frame.tof_indices[offset_start..offset_end]; + let intensities = &frame.intensities[offset_start..offset_end]; + let (raw_tof_indices, raw_intensities) = group_and_sum( + tof_indices.iter().map(|x| *x).collect(), + intensities.iter().map(|x| *x as u64).collect(), + ); + let raw_spectrum = RawSpectrum { + tof_indices: raw_tof_indices, + intensities: raw_intensities, + index: index, + collision_energy, + isolation_mz, + isolation_width, + }; + raw_spectrum + } +} diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index 43534cd..156b6a4 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -1,9 +1,14 @@ +use core::fmt; + use crate::{ domain_converters::{ConvertableDomain, Tof2MzConverter}, - ms_data::{Precursor, Spectrum}, + io::readers::{file_readers::sql_reader::SqlReader, FrameReader}, + ms_data::{AcquisitionType, Precursor, Spectrum}, utils::vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, }; +use super::{dda::DDARawSpectrumReader, dia::DIARawSpectrumReader}; + #[derive(Debug, PartialEq, Default, Clone)] pub(crate) struct RawSpectrum { pub tof_indices: Vec, @@ -70,3 +75,43 @@ impl RawSpectrum { spectrum } } + +pub struct RawSpectrumReader { + raw_spectrum_reader: Box, +} + +impl fmt::Debug for RawSpectrumReader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "RawSpectrumReader {{ /* fields omitted */ }}") + } +} + +impl RawSpectrumReader { + pub fn new( + tdf_sql_reader: &SqlReader, + frame_reader: FrameReader, + acquisition_type: AcquisitionType, + ) -> Self { + let raw_spectrum_reader: Box = + match acquisition_type { + AcquisitionType::DDAPASEF => Box::new( + DDARawSpectrumReader::new(tdf_sql_reader, frame_reader), + ), + AcquisitionType::DIAPASEF => Box::new( + DIARawSpectrumReader::new(tdf_sql_reader, frame_reader), + ), + _ => panic!(), + }; + Self { + raw_spectrum_reader, + } + } + + pub fn get(&self, index: usize) -> RawSpectrum { + self.raw_spectrum_reader.get(index) + } +} + +pub trait RawSpectrumReaderTrait: Sync { + fn get(&self, index: usize) -> RawSpectrum; +} From cd29750ee98d4ca49fa612fc7adbf87dc4ec6ae3 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 5 Jul 2024 11:31:03 +0200 Subject: [PATCH 103/109] FEAT: made proper quad_settings_reader that can be recycled --- src/io/readers.rs | 2 + src/io/readers/file_readers/sql_reader.rs | 12 +++- src/io/readers/frame_reader.rs | 69 ++++-------------- src/io/readers/precursor_reader/tdf/dia.rs | 57 ++------------- src/io/readers/quad_settings_reader.rs | 83 ++++++++++++++++++++++ src/io/readers/spectrum_reader/tdf/dia.rs | 57 ++------------- 6 files changed, 116 insertions(+), 164 deletions(-) create mode 100644 src/io/readers/quad_settings_reader.rs diff --git a/src/io/readers.rs b/src/io/readers.rs index b0d1ec0..03d5248 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -2,9 +2,11 @@ pub(crate) mod file_readers; mod frame_reader; mod metadata_reader; mod precursor_reader; +mod quad_settings_reader; mod spectrum_reader; pub use frame_reader::*; pub use metadata_reader::*; pub use precursor_reader::*; +pub use quad_settings_reader::*; pub use spectrum_reader::*; diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index 61cffc6..6704532 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -5,20 +5,24 @@ pub mod pasef_frame_msms; pub mod precursors; pub mod quad_settings; -use std::{collections::HashMap, path::Path}; +use std::{ + collections::HashMap, + path::{Path, PathBuf}, +}; use rusqlite::Connection; #[derive(Debug)] pub struct SqlReader { connection: Connection, + path: PathBuf, } impl SqlReader { pub fn open(file_name: impl AsRef) -> Result { let path = file_name.as_ref().to_path_buf(); let connection = Connection::open(&path)?; - Ok(Self { connection }) + Ok(Self { connection, path }) } pub fn read_column_from_table( @@ -35,6 +39,10 @@ impl SqlReader { let result = rows.collect::, _>>()?; Ok(result) } + + pub fn get_path(&self) -> PathBuf { + self.path.clone() + } } pub trait ReadableSqlTable { diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 6ba2fd6..a74cda6 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -8,15 +8,18 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use crate::{ ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, - utils::{find_extension, vec_utils::argsort}, + utils::find_extension, }; -use super::file_readers::{ - sql_reader::{ - frame_groups::SqlWindowGroup, frames::SqlFrame, - quad_settings::SqlQuadSettings, ReadableSqlTable, SqlReader, +use super::{ + file_readers::{ + sql_reader::{ + frame_groups::SqlWindowGroup, frames::SqlFrame, ReadableSqlTable, + SqlReader, + }, + tdf_blob_reader::{TdfBlob, TdfBlobReader}, }, - tdf_blob_reader::{TdfBlob, TdfBlobReader}, + QuadrupoleSettingsReader, }; #[derive(Debug)] @@ -46,7 +49,7 @@ impl FrameReader { AcquisitionType::Unknown }; let mut window_groups = vec![0; sql_frames.len()]; - let mut quadrupole_settings: Vec; + let quadrupole_settings; if acquisition == AcquisitionType::DIAPASEF { for window_group in SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap() @@ -54,56 +57,8 @@ impl FrameReader { window_groups[window_group.frame - 1] = window_group.window_group; } - let sql_quadrupole_settings = - SqlQuadSettings::from_sql_reader(&tdf_sql_reader).unwrap(); - let window_group_count = - *window_groups.iter().max().unwrap() as usize; - quadrupole_settings = (0..window_group_count) - .map(|window_group| { - let mut quad = QuadrupoleSettings::default(); - quad.index = window_group + 1; - quad - }) - .collect(); - for window_group in sql_quadrupole_settings { - let group = window_group.window_group - 1; - quadrupole_settings[group] - .scan_starts - .push(window_group.scan_start); - quadrupole_settings[group] - .scan_ends - .push(window_group.scan_end); - quadrupole_settings[group] - .collision_energy - .push(window_group.collision_energy); - quadrupole_settings[group] - .isolation_mz - .push(window_group.mz_center); - quadrupole_settings[group] - .isolation_width - .push(window_group.mz_width); - } - quadrupole_settings = quadrupole_settings - .into_iter() - .map(|mut window| { - let order = argsort(&window.scan_starts); - window.isolation_mz = - order.iter().map(|&i| window.isolation_mz[i]).collect(); - window.isolation_width = order - .iter() - .map(|&i| window.isolation_width[i]) - .collect(); - window.collision_energy = order - .iter() - .map(|&i| window.collision_energy[i]) - .collect(); - window.scan_starts = - order.iter().map(|&i| window.scan_starts[i]).collect(); - window.scan_ends = - order.iter().map(|&i| window.scan_ends[i]).collect(); - window - }) - .collect(); + quadrupole_settings = + QuadrupoleSettingsReader::new(tdf_sql_reader.get_path()); } else { quadrupole_settings = vec![]; } diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index 91d82bd..ed556e1 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -6,13 +6,11 @@ use crate::{ }, io::readers::{ file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, quad_settings::SqlQuadSettings, - ReadableSqlTable, SqlReader, + frame_groups::SqlWindowGroup, ReadableSqlTable, SqlReader, }, - MetadataReader, + MetadataReader, QuadrupoleSettingsReader, }, ms_data::{Precursor, QuadrupoleSettings}, - utils::vec_utils::argsort, }; use super::PrecursorReaderTrait; @@ -27,7 +25,6 @@ pub struct DIATDFPrecursorReader { impl DIATDFPrecursorReader { pub fn new(path: impl AsRef) -> Self { - // TODO: refactor or even better: recycle let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); let metadata = MetadataReader::new(&path); @@ -35,54 +32,8 @@ impl DIATDFPrecursorReader { let im_converter: Scan2ImConverter = metadata.im_converter; let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); - let mut quadrupole_settings: Vec; - let sql_quadrupole_settings = - SqlQuadSettings::from_sql_reader(&tdf_sql_reader).unwrap(); - let window_group_count = - window_groups.iter().map(|x| x.window_group).max().unwrap() - as usize; - quadrupole_settings = (0..window_group_count) - .map(|window_group| { - let mut quad = QuadrupoleSettings::default(); - quad.index = window_group + 1; - quad - }) - .collect(); - for window_group in sql_quadrupole_settings { - let group = window_group.window_group - 1; - quadrupole_settings[group] - .scan_starts - .push(window_group.scan_start); - quadrupole_settings[group] - .scan_ends - .push(window_group.scan_end); - quadrupole_settings[group] - .collision_energy - .push(window_group.collision_energy); - quadrupole_settings[group] - .isolation_mz - .push(window_group.mz_center); - quadrupole_settings[group] - .isolation_width - .push(window_group.mz_width); - } - quadrupole_settings = quadrupole_settings - .into_iter() - .map(|mut window| { - let order = argsort(&window.scan_starts); - window.isolation_mz = - order.iter().map(|&i| window.isolation_mz[i]).collect(); - window.isolation_width = - order.iter().map(|&i| window.isolation_width[i]).collect(); - window.collision_energy = - order.iter().map(|&i| window.collision_energy[i]).collect(); - window.scan_starts = - order.iter().map(|&i| window.scan_starts[i]).collect(); - window.scan_ends = - order.iter().map(|&i| window.scan_ends[i]).collect(); - window - }) - .collect(); + let quadrupole_settings = + QuadrupoleSettingsReader::new(tdf_sql_reader.get_path()); let mut expanded_quadrupole_settings: Vec = vec![]; for window_group in window_groups { let window = window_group.window_group; diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs new file mode 100644 index 0000000..16367a5 --- /dev/null +++ b/src/io/readers/quad_settings_reader.rs @@ -0,0 +1,83 @@ +use std::path::Path; + +use crate::{ms_data::QuadrupoleSettings, utils::vec_utils::argsort}; + +use super::file_readers::sql_reader::{ + quad_settings::SqlQuadSettings, ReadableSqlTable, SqlReader, +}; + +pub struct QuadrupoleSettingsReader { + quadrupole_settings: Vec, + sql_quadrupole_settings: Vec, +} + +impl QuadrupoleSettingsReader { + pub fn new(path: impl AsRef) -> Vec { + let sql_path = path.as_ref(); + let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); + let sql_quadrupole_settings = + SqlQuadSettings::from_sql_reader(&tdf_sql_reader).unwrap(); + let window_group_count = sql_quadrupole_settings + .iter() + .map(|x| x.window_group) + .max() + .unwrap() as usize; + let quadrupole_settings = (0..window_group_count) + .map(|window_group| { + let mut quad = QuadrupoleSettings::default(); + quad.index = window_group + 1; + quad + }) + .collect(); + let mut quad_reader = Self { + quadrupole_settings, + sql_quadrupole_settings, + }; + quad_reader.update_from_sql_quadrupole_settings(); + quad_reader.resort_groups(); + quad_reader.quadrupole_settings + } + + fn update_from_sql_quadrupole_settings(&mut self) { + for window_group in self.sql_quadrupole_settings.iter() { + let group = window_group.window_group - 1; + self.quadrupole_settings[group] + .scan_starts + .push(window_group.scan_start); + self.quadrupole_settings[group] + .scan_ends + .push(window_group.scan_end); + self.quadrupole_settings[group] + .collision_energy + .push(window_group.collision_energy); + self.quadrupole_settings[group] + .isolation_mz + .push(window_group.mz_center); + self.quadrupole_settings[group] + .isolation_width + .push(window_group.mz_width); + } + } + + fn resort_groups(&mut self) { + self.quadrupole_settings = self + .quadrupole_settings + .iter() + .map(|_window| { + let mut window = _window.clone(); + let order = argsort(&window.scan_starts); + window.isolation_mz = + order.iter().map(|&i| window.isolation_mz[i]).collect(); + window.isolation_width = + order.iter().map(|&i| window.isolation_width[i]).collect(); + window.collision_energy = + order.iter().map(|&i| window.collision_energy[i]).collect(); + window.scan_starts = + order.iter().map(|&i| window.scan_starts[i]).collect(); + window.scan_ends = + order.iter().map(|&i| window.scan_ends[i]).collect(); + window + }) + .collect(); + } +} diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 97d55dd..493152b 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,13 +1,12 @@ use crate::{ io::readers::{ file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, quad_settings::SqlQuadSettings, - ReadableSqlTable, SqlReader, + frame_groups::SqlWindowGroup, ReadableSqlTable, SqlReader, }, - FrameReader, + FrameReader, QuadrupoleSettingsReader, }, ms_data::QuadrupoleSettings, - utils::vec_utils::{argsort, group_and_sum}, + utils::vec_utils::group_and_sum, }; use super::raw_spectra::{RawSpectrum, RawSpectrumReaderTrait}; @@ -22,54 +21,8 @@ impl DIARawSpectrumReader { pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); - let mut quadrupole_settings: Vec; - let sql_quadrupole_settings = - SqlQuadSettings::from_sql_reader(&tdf_sql_reader).unwrap(); - let window_group_count = - window_groups.iter().map(|x| x.window_group).max().unwrap() - as usize; - quadrupole_settings = (0..window_group_count) - .map(|window_group| { - let mut quad = QuadrupoleSettings::default(); - quad.index = window_group + 1; - quad - }) - .collect(); - for window_group in sql_quadrupole_settings { - let group = window_group.window_group - 1; - quadrupole_settings[group] - .scan_starts - .push(window_group.scan_start); - quadrupole_settings[group] - .scan_ends - .push(window_group.scan_end); - quadrupole_settings[group] - .collision_energy - .push(window_group.collision_energy); - quadrupole_settings[group] - .isolation_mz - .push(window_group.mz_center); - quadrupole_settings[group] - .isolation_width - .push(window_group.mz_width); - } - quadrupole_settings = quadrupole_settings - .into_iter() - .map(|mut window| { - let order = argsort(&window.scan_starts); - window.isolation_mz = - order.iter().map(|&i| window.isolation_mz[i]).collect(); - window.isolation_width = - order.iter().map(|&i| window.isolation_width[i]).collect(); - window.collision_energy = - order.iter().map(|&i| window.collision_energy[i]).collect(); - window.scan_starts = - order.iter().map(|&i| window.scan_starts[i]).collect(); - window.scan_ends = - order.iter().map(|&i| window.scan_ends[i]).collect(); - window - }) - .collect(); + let quadrupole_settings = + QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path()); let mut expanded_quadrupole_settings: Vec = vec![]; for window_group in window_groups { let window = window_group.window_group; From 0eb51ddd183c0a991f56aeb0e1bc456aebaf7b84 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 5 Jul 2024 12:32:50 +0200 Subject: [PATCH 104/109] FEAT: implemente None option for precursor charge and intensity --- src/io/readers/frame_reader.rs | 1 - src/io/readers/precursor_reader/minitdf.rs | 4 ++-- src/io/readers/precursor_reader/tdf/dda.rs | 4 ++-- src/io/readers/precursor_reader/tdf/dia.rs | 4 ++-- src/io/writers/mgf.rs | 4 +++- src/ms_data/precursors.rs | 4 ++-- tests/spectrum_readers.rs | 20 ++++++++++---------- 7 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index a74cda6..e13b6be 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -33,7 +33,6 @@ pub struct FrameReader { } impl FrameReader { - // TODO refactor/simplify pub fn new(path: impl AsRef) -> Self { let sql_path = find_extension(&path, "analysis.tdf").unwrap(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); diff --git a/src/io/readers/precursor_reader/minitdf.rs b/src/io/readers/precursor_reader/minitdf.rs index f9ef67f..0d5ee06 100644 --- a/src/io/readers/precursor_reader/minitdf.rs +++ b/src/io/readers/precursor_reader/minitdf.rs @@ -33,8 +33,8 @@ impl PrecursorReaderTrait for MiniTDFPrecursorReader { mz: x.mz, rt: x.rt, im: x.im, - charge: x.charge, - intensity: x.intensity, + charge: Some(x.charge), + intensity: Some(x.intensity), index: x.index, frame_index: x.frame_index, } diff --git a/src/io/readers/precursor_reader/tdf/dda.rs b/src/io/readers/precursor_reader/tdf/dda.rs index c299793..763307a 100644 --- a/src/io/readers/precursor_reader/tdf/dda.rs +++ b/src/io/readers/precursor_reader/tdf/dda.rs @@ -50,8 +50,8 @@ impl PrecursorReaderTrait for DDATDFPrecursorReader { mz: sql_precursor.mz, rt: self.rt_converter.convert(frame_id as u32), im: self.im_converter.convert(scan_id), - charge: sql_precursor.charge, - intensity: sql_precursor.intensity, + charge: Some(sql_precursor.charge), + intensity: Some(sql_precursor.intensity), index: index + 1, frame_index: frame_id, } diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index ed556e1..d604769 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -70,8 +70,8 @@ impl PrecursorReaderTrait for DIATDFPrecursorReader { mz: quad_settings.isolation_mz[0], rt: self.rt_converter.convert(quad_settings.index as u32 - 1), im: self.im_converter.convert(scan_id), - charge: 0, //TODO - intensity: 0.0, //TODO + charge: None, + intensity: None, index: index, frame_index: quad_settings.index, } diff --git a/src/io/writers/mgf.rs b/src/io/writers/mgf.rs index 12470d6..30ab5d2 100644 --- a/src/io/writers/mgf.rs +++ b/src/io/writers/mgf.rs @@ -32,9 +32,11 @@ impl MGFEntry { pub fn write_header(spectrum: &Spectrum) -> String { let precursor = spectrum.precursor; let title = precursor.index; + let intensity = precursor.intensity.unwrap_or(0.0); + let charge = precursor.charge.unwrap_or(0); let ms2_data = format!( "TITLE=index:{}, im:{:.4}, intensity:{:.4}, frame:{}, ce:{:.4}\nPEPMASS={:.4}\nCHARGE={}\nRT={:.2}\n", - title, precursor.im, precursor.intensity, precursor.frame_index, spectrum.collision_energy, precursor.mz, precursor.charge, precursor.rt + title, precursor.im, intensity, precursor.frame_index, spectrum.collision_energy, precursor.mz, charge, precursor.rt ); ms2_data } diff --git a/src/ms_data/precursors.rs b/src/ms_data/precursors.rs index 726088a..4f99d59 100644 --- a/src/ms_data/precursors.rs +++ b/src/ms_data/precursors.rs @@ -4,8 +4,8 @@ pub struct Precursor { pub mz: f64, pub rt: f64, pub im: f64, - pub charge: usize, - pub intensity: f64, + pub charge: Option, + pub intensity: Option, pub index: usize, pub frame_index: usize, } diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 4696444..fb3b9e9 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -28,8 +28,8 @@ fn minitdf_reader() { mz: 123.4567, rt: 12.345, im: 1.234, - charge: 1, - intensity: 0.0, + charge: Some(1), + intensity: Some(0.0), index: 1, frame_index: 1, }, @@ -45,8 +45,8 @@ fn minitdf_reader() { mz: 987.6543, rt: 9.876, im: 0.9876, - charge: 2, - intensity: 0.0, + charge: Some(2), + intensity: Some(0.0), index: 2, frame_index: 2, }, @@ -79,8 +79,8 @@ fn tdf_reader_dda() { mz: 500.0, rt: 0.2, im: 1.25, - charge: 2, - intensity: 10.0, + charge: Some(2), + intensity: Some(10.0), index: 1, frame_index: 1, }, @@ -96,8 +96,8 @@ fn tdf_reader_dda() { mz: 501.0, rt: 0.2, im: 1.0, - charge: 3, - intensity: 10.0, + charge: Some(3), + intensity: Some(10.0), index: 2, frame_index: 1, }, @@ -113,8 +113,8 @@ fn tdf_reader_dda() { mz: 502.0, rt: 0.4, im: 1.25, - charge: 2, - intensity: 10.0, + charge: Some(2), + intensity: Some(10.0), index: 3, frame_index: 3, }, From 3b0636ee26a2613e4e76658bd41cfae4c5846c47 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 5 Jul 2024 12:54:47 +0200 Subject: [PATCH 105/109] FEAT: removed manual testing main in favor of showing timsrust is a library rather than executable --- src/main.rs | 44 -------------------------------------------- 1 file changed, 44 deletions(-) delete mode 100644 src/main.rs diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 9f150ee..0000000 --- a/src/main.rs +++ /dev/null @@ -1,44 +0,0 @@ -// use rayon::iter::ParallelIterator; -use std::env; -use timsrust::io::readers::FrameReader; -use timsrust::io::writers::MGFEntry; -use timsrust::ms_data::Frame; -use timsrust::{ms_data::Spectrum, FileReader}; - -fn quick_test() { - // TODO move quick test out to separate program - let args: Vec = env::args().collect(); - let d_folder_name: &str = &args[1]; - let x = FileReader::new(d_folder_name.to_string()).unwrap(); - let spectrum_index: usize; - if args.len() >= 3 { - spectrum_index = args[2].parse().unwrap_or(0); - } else { - spectrum_index = 10; - } - let dda_spectra: Vec = x.read_all_spectra(); - let spectrum = &dda_spectra[spectrum_index]; - // let spectrum: &Spectrum = &x.read_single_spectrum(spectrum_index); - // // // println!("precursor {:?}", spectrum.precursor); - // // // _ = MGFEntry::write_header(spectrum); - println!("{}", MGFEntry::write(spectrum)); - // // // println!("{}", MGFEntry::write_header(spectrum)); - // // // println!("{}", MGFEntry::write_peaks(spectrum)); - // // // println!("mz values {:?}", spectrum.mz_values); - // // // println!( - // // // "intensity values {:?}", - // // // spectrum.intensities - // // // ); - // // // println!("{:?}", spectrum.as_mgf_entry()); - // // // MGFWriter::write_spectra(d_folder_name, &dda_spectra); - // let frame = x.read_single_frame(2); - let x = FrameReader::new(d_folder_name); - // let frames: Vec = x.parallel_filter(|x| x.msms_type != 0).collect(); - let frame: Frame = x.get(200); - // let frame = &frames[200 - 2]; - println!("{:?}", frame); -} - -fn main() { - quick_test(); -} From 2a6ff12a95cb031c132605f01fc4873beee54a49 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 5 Jul 2024 13:15:57 +0200 Subject: [PATCH 106/109] FEAT: option to filter spectra by top n peaks --- src/ms_data/spectra.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/ms_data/spectra.rs b/src/ms_data/spectra.rs index f02ddf6..364ce55 100644 --- a/src/ms_data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -11,3 +11,34 @@ pub struct Spectrum { pub isolation_mz: f64, pub isolation_width: f64, } + +impl Spectrum { + pub fn get_top_n(&self, top_n: usize) -> Self { + let mut indexed: Vec<(f64, usize)> = + self.intensities.iter().cloned().zip(0..).collect(); + indexed.sort_by(|a, b| { + b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal) + }); + let mut top_indices: Vec = indexed + .iter() + .take(top_n) + .map(|&(_, index)| index) + .collect(); + top_indices.sort(); + Spectrum { + mz_values: top_indices + .iter() + .map(|&index| self.mz_values[index]) + .collect(), + intensities: top_indices + .iter() + .map(|&index| self.intensities[index]) + .collect(), + precursor: self.precursor, + index: self.index, + collision_energy: self.collision_energy, + isolation_mz: self.isolation_mz, + isolation_width: self.isolation_width, + } + } +} From 2ce3547e6e0ba651a75bc69e7e70a3906cc1f87f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 5 Jul 2024 13:24:56 +0200 Subject: [PATCH 107/109] FIX: top_n for small values --- src/ms_data/spectra.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/ms_data/spectra.rs b/src/ms_data/spectra.rs index 364ce55..bd9cf25 100644 --- a/src/ms_data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -13,7 +13,8 @@ pub struct Spectrum { } impl Spectrum { - pub fn get_top_n(&self, top_n: usize) -> Self { + pub fn get_top_n(&self, n: usize) -> Self { + let top_n = if n == 0 { self.len() } else { n }; let mut indexed: Vec<(f64, usize)> = self.intensities.iter().cloned().zip(0..).collect(); indexed.sort_by(|a, b| { @@ -41,4 +42,8 @@ impl Spectrum { isolation_width: self.isolation_width, } } + + pub fn len(&self) -> usize { + self.mz_values.len() + } } From 71a6536414e475814f25f3f633e4d38eb493ef2e Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 8 Jul 2024 15:35:42 +0200 Subject: [PATCH 108/109] FIX: implementation to take isoalation width and mz into account for minitdf. This is quite generix and could/should update in the future --- src/io/readers/spectrum_reader/minitdf.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 937f7b5..84a6f4a 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -78,8 +78,14 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { spectrum.precursor = precursor; spectrum.index = precursor.index; spectrum.collision_energy = self.collision_energies[index]; - spectrum.isolation_mz = 0.0; //TODO - spectrum.isolation_width = 0.0; //TODO + spectrum.isolation_mz = precursor.mz; //FIX? + spectrum.isolation_width = if precursor.mz <= 700.0 { + 2.0 + } else if precursor.mz >= 800.0 { + 3.0 + } else { + 2.0 + (precursor.mz - 700.0) / 100.0 + }; //FIX? spectrum } From 38b69d9d59bf1c166a459d4b74ea541611a07797 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 8 Jul 2024 15:42:23 +0200 Subject: [PATCH 109/109] FIX: failing tests --- tests/spectrum_readers.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index fb3b9e9..085013f 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -35,8 +35,8 @@ fn minitdf_reader() { }, index: 1, collision_energy: 0.0, - isolation_mz: 0.0, - isolation_width: 0.0, + isolation_mz: 123.4567, + isolation_width: 2.0, }, Spectrum { mz_values: vec![1100.0, 1200.002, 1300.03, 1400.4], @@ -52,8 +52,8 @@ fn minitdf_reader() { }, index: 2, collision_energy: 0.0, - isolation_mz: 0.0, - isolation_width: 0.0, + isolation_mz: 987.6543, + isolation_width: 3.0, }, ]; for i in 0..spectra.len() {