From 8b2588bbf6805af204011d25228420919bd78d8c Mon Sep 17 00:00:00 2001 From: Sebastian Wehner Date: Fri, 3 Nov 2023 12:00:09 +0100 Subject: [PATCH 1/3] feat: updated to miniTDF conventions. Adapted test-file names accordingly. --- README.md | 14 +++++++++---- .../spectrum_readers/mini_tdf_reader.rs | 19 ++++++++++++++++-- tests/test.ms2/converter.ms2spectrum.parquet | Bin 0 -> 10560 bytes ...rter.ms2.bin => converter.ms2spectrum.bin} | Bin ....parquet => converter.ms2spectrum.parquet} | Bin 5 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 tests/test.ms2/converter.ms2spectrum.parquet rename tests/test2.ms2/{converter.ms2.bin => converter.ms2spectrum.bin} (100%) rename tests/test2.ms2/{converter.MS2Spectra.ms2.parquet => converter.ms2spectrum.parquet} (100%) diff --git a/README.md b/README.md index 73965a4..4427dd9 100644 --- a/README.md +++ b/README.md @@ -24,12 +24,18 @@ Two primary data types are exposed through TimsRust: ### File formats Two file formats are supported: -* Bruker .d folder containing: +* TDF - Bruker .d folder containing: * analysis.tdf * analysis.tdf_bin -* Bruker .ms2 folder containing: - * converter.ms2.bin - * converter.MS2Spectra.ms2.parquet + + +* miniTDF - ProteoScape optimized Bruker file-format. Similar to TDF, miniTDF consists of multiple files: a binary '.bin' + and an index '.parquet' file. The file-names are made up to the following convention: `..`. + e.g. for MS2 spectrum information: `.ms2spectrum.`. Therefor the following files are expected + in the provided ms2 folder: + * *.ms2spectrum.bin + * *.ms2spectrum.parquet + ## Python bindings diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index ae3cac1..d1aad1f 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -1,3 +1,4 @@ +use std::fs; use { crate::{ file_readers::{ @@ -23,6 +24,18 @@ pub struct MiniTDFReader { frame_reader: BinFileReader, } +fn find_ms2spectrum_file(ms2_dir_path: &str, extension: String) -> String { + let files = fs::read_dir(ms2_dir_path).unwrap(); + for file in files { + let filename = file.unwrap().path().file_name().unwrap().to_str().unwrap().to_owned(); + if filename.ends_with( std::format!("ms2spectrum.{}", extension).as_str()) { + return filename + } + } + panic!("No '*.ms2spectrum.{}' file found in '{}'", extension, ms2_dir_path) +} + + impl MiniTDFReader { pub fn new(path_name: String) -> Self { let mut reader: MiniTDFReader = Self::default(); @@ -35,7 +48,8 @@ impl MiniTDFReader { fn read_parquet_file_name(&mut self) { let mut path: PathBuf = PathBuf::from(&self.path_name); - path.push("converter.MS2Spectra.ms2.parquet"); + let ms2_parquet_file = find_ms2spectrum_file(&self.path_name, "parquet".to_owned()); + path.push(ms2_parquet_file); self.parquet_file_name = path.to_string_lossy().into_owned(); } @@ -45,7 +59,8 @@ impl MiniTDFReader { } fn set_spectrum_reader(&mut self) { let mut path: PathBuf = PathBuf::from(&self.path_name); - path.push("converter.ms2.bin"); + let ms2_bin_file = find_ms2spectrum_file(&self.path_name, "bin".to_owned()); + path.push(ms2_bin_file); let file_name: String = path.to_string_lossy().into_owned(); self.frame_reader = BinFileReader::new(String::from(&file_name), self.offsets.clone()); diff --git a/tests/test.ms2/converter.ms2spectrum.parquet b/tests/test.ms2/converter.ms2spectrum.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d7cb7300b56bfbae9b5627ab97139592f69a6732 GIT binary patch literal 10560 zcmdTKTZkLib+o%EvTMiN)KazWCTtaFnl@{%q+K~tFxVsQN;}F*-qo&NQ;It?nw4gE zW~4|XX|={UpZP4Mlr)r*4}bJ4-|bH*Erfog#n4hp2>tR$DWMPuftJwr+&k}wY=_7y z9lWD^&$;)!?>XlV-lfWk@Ip8qzOoYzhKXkgBDxSD&IGQ_5!y4a|AvY1#c*^L7A?YG za5)+>FwsyzUlSo-^nn8M>?K4l7?_*CM34^x%i)Ek)FKgFiq6r^@LVJq`7jXqC_t9U z&=;Y*v-j4db-IJ2S& z9|#iUM+=thp?#iJik+r_HTY#9^6LQk!94l1dC1j)^Gt^M%u`R)&({Ij` zpPl>v(&_WBS~@*l>?j?v-%&eVk#{Ke_eJvOi(gtX$Eos3Z>|hsr8i?OiVQClBFM}T z^K;}oVJkxfvuzZ9IPA3J^uvqf{jY&0UNRGi`-Yt($KeQump3mM7(L2iYQ{E0=K;KshhdX}%0eU{Yt|hQfe3G28!bozT zTu02jP`fhVj`zN41=vET^IF{IjCa35{$mOB@DiCo`q6qAmCy(ouTu<+vub*wE~Zeieq4`qk)X)g#rOb#d-4uOR;kqQDJ-I zn}>&eLB$S&-sZyq`PuX2yU&ACUTPC~JlaA3(I0P+8JK9u7$GLc*n)y0_LKkCi#9#> z)thh`_S2J zDMGJh&tq?Wm;Bpz{Nij97e7^&KEIG6qQO4j7GyTO3@zvjbEvJ=24;^)gwF!p*=Ff5 zM~kAttn#kSU8PZ@tydRqVkK&A2hMf63`V-prk<0hfgN$HgX|%ok#$>>hy<6Gmp*=y zJ^d_t?wR$%p8~R&qv_kDOg)iu@}a z36i16{m4ZEn!RYS%PLK_AN}ox+c8mT3MaStjx>;!e(cqi@7<0a0}Mv79;>v3*y>75 zfvyYy%0QA})>qk{3ez|qce*fK$3p=P6L-E36OG<2OkB{(I;k9rg4Dd_glS^kQpa7y zE;dj^9zLmH2?la^Y$7_`k-)16E(A;intftoT~=eG(kZ!dnz0%)HWyo@cli=i33isM|F;D`B`HogLQ z`8iNiWQp$TUTy}|#mE+_PVH=nhv{c@$X@+pm|g--;;;V85k$ znY*!5WNg1vcp1B~Q)KMA2YPw?EO4>6MU&I#HiX;jxH7^`TJ7|3!IDI-2j2%!z3PtH zUavDklEBwobVWtbMDj}fN<0QNC8o2tBd=^+*@)dCqUWe``DX1^?b;cA=eafXB&Z#V zqW0hqJ+D#J29;@1If`mg*%pRnG7kI*0k=&2Zfjec!y8ktk0Ha3we8^|%tMq2EPiG6K&3tu)=oIUTCc-z8 z>3*}87}S#*Z>DbY95iYP39aRnYM;v@eoU9CogCMbK78{zkUe)=UoWTlJd%kLo3&II z=pQrtG~x|1CPqRv9T(~uOqZ6^A}d#qnpuojnR>YmG~$hYkaxQ;GPTqZmn?TVnaA|1 z)pCC4pd}uN~x_mSK-UI&e2Zg5aDZ6LF?YM;raP218^ z=?LPW1bS*dRWjgT@L|GkCp+I%f~#jzb~{6C#7jq-zFe&uZ`8`|W`4_zi^Kg<3u4H| zvjDPJ2hF@Rl0d03x6N$+IQd9~tgug*xkoKu*}WEnkcW@kr;uhfvOmxj%Jdc@XBhJC#8c$^q= za=ObW({XM+(`^_wpPj6mEcWHZm~`NIpS?4PCU00*ZQ8{Y1BBq z?^*{N*2_a))OsC|SA98#kIq=HMEdw<9(P3rjMr-G{zKFSuc+vpQr~(M&^@MEE7c={HY9Xw~Ggc zJXwEv5W`Fs>H_q}k`cF<%V<}Z2etd~tQqq?8j~(yC?4VwroZKzPcYuxuhYA^`?995 zP7hratgD1>x-BawV)W+bOPYOk{qm>!T(>t|PT;ZG8_Ac!`5x~t?da=iN`4f7v~>&e zE%bvBJ8=Qc3oiaelfPzP4}W~0G3qkhXI%St&HWSN1;uB^+oOP*>=5o}I9{j`x~idt z)Cs&PibpS7ykJ+^dn|;P&Hb5{Yri|*N@%^Ok3_4A;6ohmG$rlA5XrMw#6jVXk4*&4 zdHQQ~pM-44+ANy#Xz}mX2j3Sf$x@qxo{Y&?@V*m+zaC!@1b}EEx)54OFgV4g zQMPn{VYn?`5a~>Tfj1moIA^ebgyhp7jTN2{(UmUSdwn3Nm-%k@GQN1P=ojl1?aICM U@iIX?=0D<}@V^Om;r~wl7lyMu*#H0l literal 0 HcmV?d00001 diff --git a/tests/test2.ms2/converter.ms2.bin b/tests/test2.ms2/converter.ms2spectrum.bin similarity index 100% rename from tests/test2.ms2/converter.ms2.bin rename to tests/test2.ms2/converter.ms2spectrum.bin diff --git a/tests/test2.ms2/converter.MS2Spectra.ms2.parquet b/tests/test2.ms2/converter.ms2spectrum.parquet similarity index 100% rename from tests/test2.ms2/converter.MS2Spectra.ms2.parquet rename to tests/test2.ms2/converter.ms2spectrum.parquet From 09fa88dbf2046e507b58a8df7e011d8810c196bb Mon Sep 17 00:00:00 2001 From: Sebastian Wehner Date: Mon, 6 Nov 2023 09:59:02 +0100 Subject: [PATCH 2/3] fix: throw error instead of panicking when expected minitdf file formats are not found in provided path. --- .../spectrum_readers/mini_tdf_reader.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index d1aad1f..999bd1d 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -14,6 +14,7 @@ use { rayon::prelude::*, std::path::PathBuf, }; +use crate::file_readers::FileFormatError; #[derive(Debug, Default, Clone)] pub struct MiniTDFReader { @@ -24,15 +25,21 @@ pub struct MiniTDFReader { frame_reader: BinFileReader, } -fn find_ms2spectrum_file(ms2_dir_path: &str, extension: String) -> String { +fn find_ms2spectrum_file(ms2_dir_path: &str, extension: String) -> Result { let files = fs::read_dir(ms2_dir_path).unwrap(); for file in files { let filename = file.unwrap().path().file_name().unwrap().to_str().unwrap().to_owned(); if filename.ends_with( std::format!("ms2spectrum.{}", extension).as_str()) { - return filename + return Ok(filename) } } - panic!("No '*.ms2spectrum.{}' file found in '{}'", extension, ms2_dir_path) + let err = match extension.as_str() { + "parquet" => FileFormatError::MetadataFilesAreMissing, + "bin" => FileFormatError::BinaryFilesAreMissing, + _ => FileFormatError::BinaryFilesAreMissing + }; + println!("{}", format!("No '*.ms2spectrum.{}' file found in '{}'", extension, ms2_dir_path)); + return Err(err); } @@ -48,7 +55,7 @@ impl MiniTDFReader { fn read_parquet_file_name(&mut self) { let mut path: PathBuf = PathBuf::from(&self.path_name); - let ms2_parquet_file = find_ms2spectrum_file(&self.path_name, "parquet".to_owned()); + let ms2_parquet_file = find_ms2spectrum_file(&self.path_name, "parquet".to_owned()).unwrap(); path.push(ms2_parquet_file); self.parquet_file_name = path.to_string_lossy().into_owned(); } @@ -59,7 +66,7 @@ impl MiniTDFReader { } fn set_spectrum_reader(&mut self) { let mut path: PathBuf = PathBuf::from(&self.path_name); - let ms2_bin_file = find_ms2spectrum_file(&self.path_name, "bin".to_owned()); + let ms2_bin_file = find_ms2spectrum_file(&self.path_name, "bin".to_owned()).unwrap(); path.push(ms2_bin_file); let file_name: String = path.to_string_lossy().into_owned(); self.frame_reader = From 58589bdf2976da7a258bf5f0723ccde3a6f97bfe Mon Sep 17 00:00:00 2001 From: Sebastian Wehner Date: Mon, 6 Nov 2023 10:01:15 +0100 Subject: [PATCH 3/3] fix: provided directory is either .d for tdf or anything for minitdf. --- src/file_readers/file_formats.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/file_readers/file_formats.rs b/src/file_readers/file_formats.rs index 808ed8b..82f3edf 100644 --- a/src/file_readers/file_formats.rs +++ b/src/file_readers/file_formats.rs @@ -20,15 +20,7 @@ impl FileFormat { .unwrap_or_default(); let format = match extension { "d" => Self::DFolder(path), - "ms2" => Self::MS2Folder(path), - _ => { - if let Some(path) = path.parent() { - // Only recurse if there is a valid parent section, - // otherwise we'll get a stack overflow - return Self::parse(path); - } - return Err(FileFormatError::NoParentWithBrukerExtension); - }, + _ => Self::MS2Folder(path) }; format.is_valid()?; Ok(format)