Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spectrumreader cleanup #36

Merged
merged 7 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Build
run: cargo build --verbose
run: cargo build --release --verbose
- name: File sizes
run: find src/ -name '*.rs' | xargs wc -l | sort -nr
- name: Run tests
Expand Down
2 changes: 1 addition & 1 deletion benches/speed_performance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rayon::iter::ParallelIterator;
#[cfg(feature = "tdf")]
use timsrust::readers::FrameReader;
use timsrust::readers::{SpectrumReader, SpectrumReaderConfig};
use timsrust::readers::SpectrumReader;

const DDA_TEST: &str =
"/mnt/d/data/mpib/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/";
Expand Down
1 change: 0 additions & 1 deletion src/domain_converters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,5 @@ pub use tof_to_mz::Tof2MzConverter;
/// Convert from one domain (e.g. Time of Flight) to another (m/z).
pub trait ConvertableDomain {
fn convert<T: Into<f64> + Copy>(&self, value: T) -> f64;

fn invert<T: Into<f64> + Copy>(&self, value: T) -> f64;
}
2 changes: 1 addition & 1 deletion src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use crate::io::readers::{
FrameReaderError, MetadataReaderError, QuadrupoleSettingsReaderError,
};
use crate::io::readers::{PrecursorReaderError, SpectrumReaderError};
use crate::{io::readers::PrecursorReaderError, readers::SpectrumReaderError};

/// An error that is produced by timsrust (uses [thiserror]).
#[derive(thiserror::Error, Debug)]
Expand Down
2 changes: 2 additions & 0 deletions src/io/readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ mod precursor_reader;
#[cfg(feature = "tdf")]
mod quad_settings_reader;
mod spectrum_reader;
mod timstof;

#[cfg(feature = "tdf")]
pub use frame_reader::*;
Expand All @@ -16,3 +17,4 @@ pub use precursor_reader::*;
#[cfg(feature = "tdf")]
pub use quad_settings_reader::*;
pub use spectrum_reader::*;
pub use timstof::*;
18 changes: 12 additions & 6 deletions src/io/readers/file_readers/parquet_reader.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
pub mod precursors;

use std::{fs::File, io, str::FromStr};

use parquet::file::reader::{FileReader, SerializedFileReader};
use std::{fs::File, io, path::Path, str::FromStr};

use crate::readers::TimsTofPathError;

pub trait ReadableParquetTable {
fn update_from_parquet_file(&mut self, key: &str, value: String);
Expand All @@ -11,12 +14,13 @@ pub trait ReadableParquetTable {
}

fn from_parquet_file(
file_name: impl AsRef<Path>,
) -> Result<Vec<Self>, ParquetError>
path: impl crate::readers::TimsTofPathLike,
) -> Result<Vec<Self>, ParquetReaderError>
where
Self: Sized + Default,
{
let file: File = File::open(file_name)?;
let path = path.to_timstof_path()?;
let file: File = File::open(path.ms2_parquet()?)?;
let reader: SerializedFileReader<File> =
SerializedFileReader::new(file)?;
reader
Expand All @@ -36,9 +40,11 @@ pub trait ReadableParquetTable {
}

#[derive(Debug, thiserror::Error)]
pub enum ParquetError {
pub enum ParquetReaderError {
#[error("{0}")]
IO(#[from] io::Error),
#[error("Cannot iterate over row {0}")]
ParquetIO(#[from] parquet::errors::ParquetError),
ParquetError(#[from] parquet::errors::ParquetError),
#[error("{0}")]
TimsTofPathError(#[from] TimsTofPathError),
}
40 changes: 20 additions & 20 deletions src/io/readers/file_readers/sql_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,29 @@ pub mod pasef_frame_msms;
pub mod precursors;
pub mod quad_settings;

use std::{
collections::HashMap,
path::{Path, PathBuf},
};
use std::collections::HashMap;

use rusqlite::{types::FromSql, Connection};

use crate::readers::{TimsTofPathError, TimsTofPathLike};

#[derive(Debug)]
pub struct SqlReader {
connection: Connection,
path: PathBuf,
}

impl SqlReader {
pub fn open(file_name: impl AsRef<Path>) -> Result<Self, SqlError> {
let path = file_name.as_ref().to_path_buf();
let connection = Connection::open(&path)?;
Ok(Self { connection, path })
pub fn open(path: impl TimsTofPathLike) -> Result<Self, SqlReaderError> {
let path = path.to_timstof_path()?;
let connection = Connection::open(&path.tdf()?)?;
Ok(Self { connection })
}

pub fn read_column_from_table<T: rusqlite::types::FromSql + Default>(
&self,
column_name: &str,
table_name: &str,
) -> Result<Vec<T>, SqlError> {
) -> Result<Vec<T>, SqlReaderError> {
let query = format!("SELECT {} FROM {}", column_name, table_name);
let mut stmt = self.connection.prepare(&query)?;
let rows = stmt.query_map([], |row| match row.get::<usize, T>(0) {
Expand All @@ -39,18 +37,14 @@ impl SqlReader {
let result = rows.collect::<Result<Vec<_>, _>>()?;
Ok(result)
}

pub fn get_path(&self) -> PathBuf {
self.path.clone()
}
}

pub trait ReadableSqlTable {
fn get_sql_query() -> String;

fn from_sql_row(row: &rusqlite::Row) -> Self;

fn from_sql_reader(reader: &SqlReader) -> Result<Vec<Self>, SqlError>
fn from_sql_reader(reader: &SqlReader) -> Result<Vec<Self>, SqlReaderError>
where
Self: Sized,
{
Expand All @@ -59,7 +53,9 @@ pub trait ReadableSqlTable {
let rows = stmt.query_map([], |row| Ok(Self::from_sql_row(row)))?;
let result = rows.collect::<Result<Vec<_>, _>>()?;
if result.len() == 0 {
Err(SqlError(rusqlite::Error::QueryReturnedNoRows))
Err(SqlReaderError::SqlError(
rusqlite::Error::QueryReturnedNoRows,
))
} else {
Ok(result)
}
Expand All @@ -71,7 +67,7 @@ pub trait ReadableSqlHashMap {

fn from_sql_reader(
reader: &SqlReader,
) -> Result<HashMap<String, String>, SqlError>
) -> Result<HashMap<String, String>, SqlReaderError>
where
Self: Sized,
{
Expand Down Expand Up @@ -99,6 +95,10 @@ impl ParseDefault for rusqlite::Row<'_> {
}
}

#[derive(thiserror::Error, Debug)]
#[error("{0}")]
pub struct SqlError(#[from] rusqlite::Error);
#[derive(Debug, thiserror::Error)]
pub enum SqlReaderError {
#[error("{0}")]
SqlError(#[from] rusqlite::Error),
#[error("{0}")]
TimsTofPathError(#[from] TimsTofPathError),
}
83 changes: 56 additions & 27 deletions src/io/readers/file_readers/tdf_blob_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,62 +3,87 @@ mod tdf_blobs;
use memmap2::Mmap;
use std::fs::File;
use std::io;
use std::path::Path;
pub use tdf_blobs::*;
use zstd::decode_all;

use crate::readers::{TimsTofFileType, TimsTofPathError, TimsTofPathLike};

const U32_SIZE: usize = std::mem::size_of::<u32>();
const HEADER_SIZE: usize = 2;

#[derive(Debug)]
pub struct TdfBlobReader {
mmap: Mmap,
global_file_offset: usize,
bin_file_reader: TdfBinFileReader,
}

impl TdfBlobReader {
// TODO parse compression1
pub fn new(
file_name: impl AsRef<Path>,
) -> Result<Self, TdfBlobReaderError> {
let path = file_name.as_ref().to_path_buf();
let file = File::open(&path)?;
let mmap = unsafe { Mmap::map(&file)? };
let reader = Self {
mmap,
global_file_offset: 0,
};
pub fn new(path: impl TimsTofPathLike) -> Result<Self, TdfBlobReaderError> {
let bin_file_reader = TdfBinFileReader::new(path)?;
let reader = Self { bin_file_reader };
Ok(reader)
}

pub fn get(&self, offset: usize) -> Result<TdfBlob, TdfBlobReaderError> {
let offset = self.global_file_offset + offset;
let offset = self.bin_file_reader.global_file_offset + offset;
let byte_count = self
.bin_file_reader
.get_byte_count(offset)
.ok_or(TdfBlobReaderError::InvalidOffset(offset))?;
let compressed_bytes = self
.get_compressed_bytes(offset, byte_count)
let data = self
.bin_file_reader
.get_data(offset, byte_count)
.ok_or(TdfBlobReaderError::CorruptData)?;
let bytes = decode_all(compressed_bytes)
.map_err(|_| TdfBlobReaderError::Decompression)?;
let bytes =
decode_all(data).map_err(|_| TdfBlobReaderError::Decompression)?;
let blob = TdfBlob::new(bytes)?;
Ok(blob)
}
}

#[derive(Debug)]
struct TdfBinFileReader {
mmap: Mmap,
global_file_offset: usize,
}

impl TdfBinFileReader {
// TODO parse compression1
fn new(path: impl TimsTofPathLike) -> Result<Self, TdfBlobReaderError> {
let path = path.to_timstof_path()?;
let bin_path = match path.file_type() {
#[cfg(feature = "tdf")]
TimsTofFileType::TDF => path.tdf_bin()?,
#[cfg(feature = "minitdf")]
TimsTofFileType::MiniTDF => path.ms2_bin()?,
};
let file = File::open(bin_path)?;
let mmap = unsafe { Mmap::map(&file)? };
let reader = Self {
mmap,
global_file_offset: 0,
};
Ok(reader)
}

fn get_byte_count(&self, offset: usize) -> Option<usize> {
let start = offset as usize;
let end = (offset + U32_SIZE) as usize;
let end = start + U32_SIZE as usize;
let raw_byte_count = self.mmap.get(start..end)?;
let byte_count =
u32::from_le_bytes(raw_byte_count.try_into().ok()?) as usize;
Some(byte_count)
}

fn get_compressed_bytes(
&self,
offset: usize,
byte_count: usize,
) -> Option<&[u8]> {
// fn get_scan_count(&self, offset: usize) -> Option<usize> {
// let start = (offset + U32_SIZE) as usize;
// let end = start + U32_SIZE as usize;
// let raw_scan_count = self.mmap.get(start..end)?;
// let scan_count =
// u32::from_le_bytes(raw_scan_count.try_into().ok()?) as usize;
// Some(scan_count)
// }

fn get_data(&self, offset: usize, byte_count: usize) -> Option<&[u8]> {
let start = offset + HEADER_SIZE * U32_SIZE;
let end = offset + byte_count;
self.mmap.get(start..end)
Expand All @@ -75,10 +100,10 @@ pub struct IndexedTdfBlobReader {
#[cfg(feature = "minitdf")]
impl IndexedTdfBlobReader {
pub fn new(
file_name: impl AsRef<Path>,
path: impl TimsTofPathLike,
binary_offsets: Vec<usize>,
) -> Result<Self, IndexedTdfBlobReaderError> {
let blob_reader = TdfBlobReader::new(file_name)?;
let blob_reader = TdfBlobReader::new(path)?;
let reader = Self {
binary_offsets,
blob_reader: blob_reader,
Expand Down Expand Up @@ -111,6 +136,10 @@ pub enum TdfBlobReaderError {
Decompression,
#[error("Invalid offset {0}")]
InvalidOffset(usize),
#[error("{0}")]
TimsTofPathError(#[from] TimsTofPathError),
#[error("No binary file found")]
NoBinary,
}

#[derive(Debug, thiserror::Error)]
Expand Down
Loading
Loading