Skip to content

Commit

Permalink
Merge pull request #36 from MannLabs/spectrumreader_cleanup
Browse files Browse the repository at this point in the history
Spectrumreader cleanup
  • Loading branch information
sander-willems-bruker authored Dec 19, 2024
2 parents 1de3aeb + d4b71dd commit aca1ded
Show file tree
Hide file tree
Showing 29 changed files with 471 additions and 361 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Build
run: cargo build --verbose
run: cargo build --release --verbose
- name: File sizes
run: find src/ -name '*.rs' | xargs wc -l | sort -nr
- name: Run tests
Expand Down
2 changes: 1 addition & 1 deletion benches/speed_performance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rayon::iter::ParallelIterator;
#[cfg(feature = "tdf")]
use timsrust::readers::FrameReader;
use timsrust::readers::{SpectrumReader, SpectrumReaderConfig};
use timsrust::readers::SpectrumReader;

const DDA_TEST: &str =
"/mnt/d/data/mpib/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/";
Expand Down
1 change: 0 additions & 1 deletion src/domain_converters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,5 @@ pub use tof_to_mz::Tof2MzConverter;
/// Convert from one domain (e.g. Time of Flight) to another (m/z).
pub trait ConvertableDomain {
fn convert<T: Into<f64> + Copy>(&self, value: T) -> f64;

fn invert<T: Into<f64> + Copy>(&self, value: T) -> f64;
}
2 changes: 1 addition & 1 deletion src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use crate::io::readers::{
FrameReaderError, MetadataReaderError, QuadrupoleSettingsReaderError,
};
use crate::io::readers::{PrecursorReaderError, SpectrumReaderError};
use crate::{io::readers::PrecursorReaderError, readers::SpectrumReaderError};

/// An error that is produced by timsrust (uses [thiserror]).
#[derive(thiserror::Error, Debug)]
Expand Down
2 changes: 2 additions & 0 deletions src/io/readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ mod precursor_reader;
#[cfg(feature = "tdf")]
mod quad_settings_reader;
mod spectrum_reader;
mod timstof;

#[cfg(feature = "tdf")]
pub use frame_reader::*;
Expand All @@ -16,3 +17,4 @@ pub use precursor_reader::*;
#[cfg(feature = "tdf")]
pub use quad_settings_reader::*;
pub use spectrum_reader::*;
pub use timstof::*;
18 changes: 12 additions & 6 deletions src/io/readers/file_readers/parquet_reader.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
pub mod precursors;

use std::{fs::File, io, str::FromStr};

use parquet::file::reader::{FileReader, SerializedFileReader};
use std::{fs::File, io, path::Path, str::FromStr};

use crate::readers::TimsTofPathError;

pub trait ReadableParquetTable {
fn update_from_parquet_file(&mut self, key: &str, value: String);
Expand All @@ -11,12 +14,13 @@ pub trait ReadableParquetTable {
}

fn from_parquet_file(
file_name: impl AsRef<Path>,
) -> Result<Vec<Self>, ParquetError>
path: impl crate::readers::TimsTofPathLike,
) -> Result<Vec<Self>, ParquetReaderError>
where
Self: Sized + Default,
{
let file: File = File::open(file_name)?;
let path = path.to_timstof_path()?;
let file: File = File::open(path.ms2_parquet()?)?;
let reader: SerializedFileReader<File> =
SerializedFileReader::new(file)?;
reader
Expand All @@ -36,9 +40,11 @@ pub trait ReadableParquetTable {
}

#[derive(Debug, thiserror::Error)]
pub enum ParquetError {
pub enum ParquetReaderError {
#[error("{0}")]
IO(#[from] io::Error),
#[error("Cannot iterate over row {0}")]
ParquetIO(#[from] parquet::errors::ParquetError),
ParquetError(#[from] parquet::errors::ParquetError),
#[error("{0}")]
TimsTofPathError(#[from] TimsTofPathError),
}
40 changes: 20 additions & 20 deletions src/io/readers/file_readers/sql_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,29 @@ pub mod pasef_frame_msms;
pub mod precursors;
pub mod quad_settings;

use std::{
collections::HashMap,
path::{Path, PathBuf},
};
use std::collections::HashMap;

use rusqlite::{types::FromSql, Connection};

use crate::readers::{TimsTofPathError, TimsTofPathLike};

#[derive(Debug)]
pub struct SqlReader {
connection: Connection,
path: PathBuf,
}

impl SqlReader {
pub fn open(file_name: impl AsRef<Path>) -> Result<Self, SqlError> {
let path = file_name.as_ref().to_path_buf();
let connection = Connection::open(&path)?;
Ok(Self { connection, path })
pub fn open(path: impl TimsTofPathLike) -> Result<Self, SqlReaderError> {
let path = path.to_timstof_path()?;
let connection = Connection::open(&path.tdf()?)?;
Ok(Self { connection })
}

pub fn read_column_from_table<T: rusqlite::types::FromSql + Default>(
&self,
column_name: &str,
table_name: &str,
) -> Result<Vec<T>, SqlError> {
) -> Result<Vec<T>, SqlReaderError> {
let query = format!("SELECT {} FROM {}", column_name, table_name);
let mut stmt = self.connection.prepare(&query)?;
let rows = stmt.query_map([], |row| match row.get::<usize, T>(0) {
Expand All @@ -39,18 +37,14 @@ impl SqlReader {
let result = rows.collect::<Result<Vec<_>, _>>()?;
Ok(result)
}

pub fn get_path(&self) -> PathBuf {
self.path.clone()
}
}

pub trait ReadableSqlTable {
fn get_sql_query() -> String;

fn from_sql_row(row: &rusqlite::Row) -> Self;

fn from_sql_reader(reader: &SqlReader) -> Result<Vec<Self>, SqlError>
fn from_sql_reader(reader: &SqlReader) -> Result<Vec<Self>, SqlReaderError>
where
Self: Sized,
{
Expand All @@ -59,7 +53,9 @@ pub trait ReadableSqlTable {
let rows = stmt.query_map([], |row| Ok(Self::from_sql_row(row)))?;
let result = rows.collect::<Result<Vec<_>, _>>()?;
if result.len() == 0 {
Err(SqlError(rusqlite::Error::QueryReturnedNoRows))
Err(SqlReaderError::SqlError(
rusqlite::Error::QueryReturnedNoRows,
))
} else {
Ok(result)
}
Expand All @@ -71,7 +67,7 @@ pub trait ReadableSqlHashMap {

fn from_sql_reader(
reader: &SqlReader,
) -> Result<HashMap<String, String>, SqlError>
) -> Result<HashMap<String, String>, SqlReaderError>
where
Self: Sized,
{
Expand Down Expand Up @@ -99,6 +95,10 @@ impl ParseDefault for rusqlite::Row<'_> {
}
}

#[derive(thiserror::Error, Debug)]
#[error("{0}")]
pub struct SqlError(#[from] rusqlite::Error);
#[derive(Debug, thiserror::Error)]
pub enum SqlReaderError {
#[error("{0}")]
SqlError(#[from] rusqlite::Error),
#[error("{0}")]
TimsTofPathError(#[from] TimsTofPathError),
}
83 changes: 56 additions & 27 deletions src/io/readers/file_readers/tdf_blob_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,62 +3,87 @@ mod tdf_blobs;
use memmap2::Mmap;
use std::fs::File;
use std::io;
use std::path::Path;
pub use tdf_blobs::*;
use zstd::decode_all;

use crate::readers::{TimsTofFileType, TimsTofPathError, TimsTofPathLike};

const U32_SIZE: usize = std::mem::size_of::<u32>();
const HEADER_SIZE: usize = 2;

#[derive(Debug)]
pub struct TdfBlobReader {
mmap: Mmap,
global_file_offset: usize,
bin_file_reader: TdfBinFileReader,
}

impl TdfBlobReader {
// TODO parse compression1
pub fn new(
file_name: impl AsRef<Path>,
) -> Result<Self, TdfBlobReaderError> {
let path = file_name.as_ref().to_path_buf();
let file = File::open(&path)?;
let mmap = unsafe { Mmap::map(&file)? };
let reader = Self {
mmap,
global_file_offset: 0,
};
pub fn new(path: impl TimsTofPathLike) -> Result<Self, TdfBlobReaderError> {
let bin_file_reader = TdfBinFileReader::new(path)?;
let reader = Self { bin_file_reader };
Ok(reader)
}

pub fn get(&self, offset: usize) -> Result<TdfBlob, TdfBlobReaderError> {
let offset = self.global_file_offset + offset;
let offset = self.bin_file_reader.global_file_offset + offset;
let byte_count = self
.bin_file_reader
.get_byte_count(offset)
.ok_or(TdfBlobReaderError::InvalidOffset(offset))?;
let compressed_bytes = self
.get_compressed_bytes(offset, byte_count)
let data = self
.bin_file_reader
.get_data(offset, byte_count)
.ok_or(TdfBlobReaderError::CorruptData)?;
let bytes = decode_all(compressed_bytes)
.map_err(|_| TdfBlobReaderError::Decompression)?;
let bytes =
decode_all(data).map_err(|_| TdfBlobReaderError::Decompression)?;
let blob = TdfBlob::new(bytes)?;
Ok(blob)
}
}

#[derive(Debug)]
struct TdfBinFileReader {
mmap: Mmap,
global_file_offset: usize,
}

impl TdfBinFileReader {
// TODO parse compression1
fn new(path: impl TimsTofPathLike) -> Result<Self, TdfBlobReaderError> {
let path = path.to_timstof_path()?;
let bin_path = match path.file_type() {
#[cfg(feature = "tdf")]
TimsTofFileType::TDF => path.tdf_bin()?,
#[cfg(feature = "minitdf")]
TimsTofFileType::MiniTDF => path.ms2_bin()?,
};
let file = File::open(bin_path)?;
let mmap = unsafe { Mmap::map(&file)? };
let reader = Self {
mmap,
global_file_offset: 0,
};
Ok(reader)
}

fn get_byte_count(&self, offset: usize) -> Option<usize> {
let start = offset as usize;
let end = (offset + U32_SIZE) as usize;
let end = start + U32_SIZE as usize;
let raw_byte_count = self.mmap.get(start..end)?;
let byte_count =
u32::from_le_bytes(raw_byte_count.try_into().ok()?) as usize;
Some(byte_count)
}

fn get_compressed_bytes(
&self,
offset: usize,
byte_count: usize,
) -> Option<&[u8]> {
// fn get_scan_count(&self, offset: usize) -> Option<usize> {
// let start = (offset + U32_SIZE) as usize;
// let end = start + U32_SIZE as usize;
// let raw_scan_count = self.mmap.get(start..end)?;
// let scan_count =
// u32::from_le_bytes(raw_scan_count.try_into().ok()?) as usize;
// Some(scan_count)
// }

fn get_data(&self, offset: usize, byte_count: usize) -> Option<&[u8]> {
let start = offset + HEADER_SIZE * U32_SIZE;
let end = offset + byte_count;
self.mmap.get(start..end)
Expand All @@ -75,10 +100,10 @@ pub struct IndexedTdfBlobReader {
#[cfg(feature = "minitdf")]
impl IndexedTdfBlobReader {
pub fn new(
file_name: impl AsRef<Path>,
path: impl TimsTofPathLike,
binary_offsets: Vec<usize>,
) -> Result<Self, IndexedTdfBlobReaderError> {
let blob_reader = TdfBlobReader::new(file_name)?;
let blob_reader = TdfBlobReader::new(path)?;
let reader = Self {
binary_offsets,
blob_reader: blob_reader,
Expand Down Expand Up @@ -111,6 +136,10 @@ pub enum TdfBlobReaderError {
Decompression,
#[error("Invalid offset {0}")]
InvalidOffset(usize),
#[error("{0}")]
TimsTofPathError(#[from] TimsTofPathError),
#[error("No binary file found")]
NoBinary,
}

#[derive(Debug, thiserror::Error)]
Expand Down
Loading

0 comments on commit aca1ded

Please sign in to comment.