From 12ea3ade42d0859218d890a5331363a4ee0dc7f1 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 10 Jul 2024 15:27:17 +0200 Subject: [PATCH 01/69] FIX: correct RT in mgf export --- src/io/writers/mgf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/writers/mgf.rs b/src/io/writers/mgf.rs index 30ab5d2..e7693d6 100644 --- a/src/io/writers/mgf.rs +++ b/src/io/writers/mgf.rs @@ -35,7 +35,7 @@ impl MGFEntry { let intensity = precursor.intensity.unwrap_or(0.0); let charge = precursor.charge.unwrap_or(0); let ms2_data = format!( - "TITLE=index:{}, im:{:.4}, intensity:{:.4}, frame:{}, ce:{:.4}\nPEPMASS={:.4}\nCHARGE={}\nRT={:.2}\n", + "TITLE=index:{}, im:{:.4}, intensity:{:.4}, frame:{}, ce:{:.4}\nPEPMASS={:.4}\nCHARGE={}\nRTINSECONDS={:.2}\n", title, precursor.im, intensity, precursor.frame_index, spectrum.collision_energy, precursor.mz, charge, precursor.rt ); ms2_data From b5450f4c119080e1f5f5f094819af75dd2f5cfb5 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 10 Jul 2024 15:54:53 +0200 Subject: [PATCH 02/69] FIX: update test suites --- tests/frame_readers.rs | 13 ++++++++----- tests/spectrum_readers.rs | 8 +++----- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 8804a32..21ac3eb 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,7 +1,8 @@ +use rayon::iter::ParallelIterator; use std::{path::Path, sync::Arc}; use timsrust::{ + io::readers::FrameReader, ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, - FileReader, }; fn get_local_directory() -> &'static Path { @@ -18,8 +19,9 @@ fn tdf_reader_frames1() { .to_str() .unwrap() .to_string(); - let frames: Vec = - FileReader::new(&file_path).unwrap().read_all_ms1_frames(); + let frames: Vec = FrameReader::new(&file_path) + .parallel_filter(|x| x.msms_type == 0) + .collect(); let expected: Vec = vec![ Frame { scan_offsets: vec![0, 1, 3, 6, 10], @@ -61,8 +63,9 @@ fn tdf_reader_frames2() { .to_str() .unwrap() .to_string(); - let frames: Vec = - FileReader::new(&file_path).unwrap().read_all_ms2_frames(); + let frames: Vec = FrameReader::new(&file_path) + .parallel_filter(|x| x.msms_type != 0) + .collect(); let expected: Vec = vec![ // Frame::default(), Frame { diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 085013f..78637e1 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,7 +1,7 @@ use std::path::Path; use timsrust::{ + io::readers::SpectrumReader, ms_data::{Precursor, Spectrum}, - FileReader, }; fn get_local_directory() -> &'static Path { @@ -18,8 +18,7 @@ fn minitdf_reader() { .to_str() .unwrap() .to_string(); - let spectra: Vec = - FileReader::new(file_path).unwrap().read_all_spectra(); + let spectra: Vec = SpectrumReader::new(file_path).get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -69,8 +68,7 @@ fn tdf_reader_dda() { .to_str() .unwrap() .to_string(); - let spectra: Vec = - FileReader::new(file_path).unwrap().read_all_spectra(); + let spectra: Vec = SpectrumReader::new(file_path).get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], From 12124d9a76e55679cadf9ee356bab6b5a5055407 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 10 Jul 2024 15:55:20 +0200 Subject: [PATCH 03/69] CHORE: dependency updates --- Cargo.lock | 685 +++++++++++++++++++++++------------------------------ Cargo.toml | 10 +- 2 files changed, 300 insertions(+), 395 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1795706..32a722e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,20 +10,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "ahash" -version = "0.7.7" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a824f2aa7e75a0c98c5a504fceb80649e9c35265d44525b5f94de4771a395cd" -dependencies = [ - "getrandom", - "once_cell", - "version_check", -] - -[[package]] -name = "ahash" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd7d5a2cecb58716e47d67d5703a249964b14c7be1ec3cad3affc295b2d1c35d" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "const-random", @@ -35,9 +24,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -78,19 +67,25 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + [[package]] name = "arrow-array" version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea9a0fd21121304cad96f307c938d861cb1e7f0c151b93047462cd9817d760fb" dependencies = [ - "ahash 0.8.5", + "ahash", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "half 2.2.1", - "hashbrown 0.14.0", + "half", + "hashbrown", "num", ] @@ -100,7 +95,7 @@ version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30ce342ecf5971004e23cef8b5fb3bacd2bbc48a381464144925074e1472e9eb" dependencies = [ - "half 2.2.1", + "half", "num", ] @@ -116,7 +111,7 @@ dependencies = [ "arrow-schema", "arrow-select", "chrono", - "half 2.2.1", + "half", "lexical-core", "num", ] @@ -129,7 +124,7 @@ checksum = "1d9a83dad6a53d6907765106d3bc61d6d9d313cfe1751701b3ef0948e7283dc2" dependencies = [ "arrow-buffer", "arrow-schema", - "half 2.2.1", + "half", "num", ] @@ -166,28 +161,17 @@ dependencies = [ "num", ] -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" -version = "1.1.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "base64" -version = "0.21.2" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "bitflags" @@ -197,15 +181,15 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.2.1" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24a6904aef64d73cf10ab17ebace7befb918b82164785cb89907993be7f83813" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "brotli" -version = "3.3.4" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" +checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -214,9 +198,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.3.4" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -224,27 +208,27 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytemuck" -version = "1.13.1" +version = "1.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" +checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.4.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cast" @@ -254,11 +238,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.79" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "eaff6f8ce506b9773fa786672d63fc7a191ffea1be33f72bbd4aeacefca9ffc8" dependencies = [ "jobserver", + "libc", + "once_cell", ] [[package]] @@ -269,21 +255,21 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.26" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", - "winapi", + "windows-targets", ] [[package]] name = "ciborium" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", @@ -292,93 +278,95 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", - "half 1.8.2", + "half", ] [[package]] name = "clap" -version = "3.2.25" +version = "4.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" +checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462" dependencies = [ - "bitflags 1.3.2", - "clap_lex", - "indexmap", - "textwrap", + "clap_builder", ] [[package]] -name = "clap_lex" -version = "0.2.4" +name = "clap_builder" +version = "4.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942" dependencies = [ - "os_str_bytes", + "anstyle", + "clap_lex", ] +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + [[package]] name = "const-random" -version = "0.1.15" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" dependencies = [ "const-random-macro", - "proc-macro-hack", ] [[package]] name = "const-random-macro" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ "getrandom", "once_cell", - "proc-macro-hack", "tiny-keccak", ] [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ "cfg-if", ] [[package]] name = "criterion" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ "anes", - "atty", "cast", "ciborium", "clap", "criterion-plot", + "is-terminal", "itertools", - "lazy_static", "num-traits", + "once_cell", "oorandom", "plotters", "rayon", @@ -400,48 +388,30 @@ dependencies = [ "itertools", ] -[[package]] -name = "crossbeam-channel" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ - "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" -version = "0.9.14" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "autocfg", - "cfg-if", "crossbeam-utils", - "memoffset", - "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.15" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" -dependencies = [ - "cfg-if", -] +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "crunchy" @@ -462,15 +432,15 @@ dependencies = [ [[package]] name = "either" -version = "1.8.1" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "fallible-iterator" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" [[package]] name = "fallible-streaming-iterator" @@ -490,9 +460,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.26" +version = "1.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" dependencies = [ "crc32fast", "miniz_oxide", @@ -500,9 +470,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.9" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", @@ -511,74 +481,51 @@ dependencies = [ [[package]] name = "half" -version = "1.8.2" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" - -[[package]] -name = "half" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b4af3693f1b705df946e9fe5631932443781d0aabb423b62fcd4d73f6d2fd0" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ + "cfg-if", "crunchy", "num-traits", ] [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "ahash 0.7.7", + "ahash", ] -[[package]] -name = "hashbrown" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" - [[package]] name = "hashlink" -version = "0.8.1" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69fe1fcf8b4278d860ad0548329f892a3631fb63f82574df68275f34cdbe0ffa" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" dependencies = [ - "hashbrown 0.12.3", + "hashbrown", ] [[package]] name = "hermit-abi" -version = "0.1.19" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "hermit-abi" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" -dependencies = [ - "libc", -] +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "iana-time-zone" -version = "0.1.57" +version = "0.1.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows", + "windows-core", ] [[package]] @@ -590,22 +537,23 @@ dependencies = [ "cc", ] -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - [[package]] name = "integer-encoding" version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + [[package]] name = "itertools" version = "0.10.5" @@ -617,34 +565,28 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jobserver" -version = "0.1.26" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" dependencies = [ "libc", ] [[package]] name = "js-sys" -version = "0.3.67" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" dependencies = [ "wasm-bindgen", ] -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - [[package]] name = "lexical-core" version = "0.8.5" @@ -711,21 +653,21 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.152" +version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "libm" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libsqlite3-sys" -version = "0.26.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc22eff61b133b115c6e8c74e818c628d6d5e7a502afea6f64dee076dd94326" +checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" dependencies = [ "cc", "pkg-config", @@ -744,15 +686,15 @@ dependencies = [ [[package]] name = "log" -version = "0.4.19" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "lz4" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" +checksum = "d6eab492fe7f8651add23237ea56dbf11b3c4ff762ab83d40a47f11433421f91" dependencies = [ "libc", "lz4-sys", @@ -760,9 +702,9 @@ dependencies = [ [[package]] name = "lz4-sys" -version = "1.9.4" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +checksum = "e9764018d143cc854c9f17f0b907de70f14393b1f502da6375dce70f00514eb3" dependencies = [ "cc", "libc", @@ -770,42 +712,33 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.1" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memmap2" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45fd3a57831bf88bc63f8cebc0cf956116276e97fef3966103e96416209f7c92" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" dependencies = [ "libc", ] -[[package]] -name = "memoffset" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" -dependencies = [ - "autocfg", -] - [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" dependencies = [ "adler", ] [[package]] name = "num" -version = "0.4.0" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" dependencies = [ "num-bigint", "num-complex", @@ -817,39 +750,37 @@ dependencies = [ [[package]] name = "num-bigint" -version = "0.4.3" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ - "autocfg", "num-integer", "num-traits", ] [[package]] name = "num-complex" -version = "0.4.3" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" dependencies = [ "num-traits", ] [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "autocfg", "num-traits", ] [[package]] name = "num-iter" -version = "0.1.43" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" dependencies = [ "autocfg", "num-integer", @@ -858,11 +789,10 @@ dependencies = [ [[package]] name = "num-rational" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" dependencies = [ - "autocfg", "num-bigint", "num-integer", "num-traits", @@ -870,58 +800,42 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", "libm", ] -[[package]] -name = "num_cpus" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" -dependencies = [ - "hermit-abi 0.2.6", - "libc", -] - [[package]] name = "once_cell" -version = "1.17.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" -version = "11.1.3" +version = "11.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "ordered-float" -version = "2.10.0" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" dependencies = [ "num-traits", ] -[[package]] -name = "os_str_bytes" -version = "6.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" - [[package]] name = "parquet" version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baab9c36b1c8300b81b4d577d306a0a733f9d34021363098d3548e37757ed6c8" dependencies = [ - "ahash 0.8.5", + "ahash", "arrow-array", "arrow-buffer", "arrow-cast", @@ -934,7 +848,7 @@ dependencies = [ "bytes", "chrono", "flate2", - "hashbrown 0.14.0", + "hashbrown", "lz4", "num", "num-bigint", @@ -943,26 +857,26 @@ dependencies = [ "snap", "thrift", "twox-hash", - "zstd", + "zstd 0.12.4", ] [[package]] name = "paste" -version = "1.0.12" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pkg-config" -version = "0.3.26" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "plotters" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" dependencies = [ "num-traits", "plotters-backend", @@ -973,48 +887,42 @@ dependencies = [ [[package]] name = "plotters-backend" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" +checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" [[package]] name = "plotters-svg" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" dependencies = [ "plotters-backend", ] -[[package]] -name = "proc-macro-hack" -version = "0.5.20+deprecated" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" - [[package]] name = "proc-macro2" -version = "1.0.60" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.28" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" -version = "1.7.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", @@ -1022,21 +930,19 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.11.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ - "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "num_cpus", ] [[package]] name = "regex" -version = "1.10.3" +version = "1.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", @@ -1046,9 +952,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.4" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b7fa1134405e2ec9353fd416b17f8dacd46c473d7d3fd1cf202706a14eb792a" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", @@ -1057,17 +963,17 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "rusqlite" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2" +checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" dependencies = [ - "bitflags 2.2.1", + "bitflags 2.6.0", "fallible-iterator", "fallible-streaming-iterator", "hashlink", @@ -1086,9 +992,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.16" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "same-file" @@ -1099,49 +1005,43 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - [[package]] name = "semver" -version = "1.0.17" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "seq-macro" -version = "0.3.3" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.164" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.164" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.70", ] [[package]] name = "serde_json" -version = "1.0.99" +version = "1.0.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3" +checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" dependencies = [ "itoa", "ryu", @@ -1150,15 +1050,15 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.10.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "snap" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "static_assertions" @@ -1179,39 +1079,33 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.18" +version = "2.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" +checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] -[[package]] -name = "textwrap" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" - [[package]] name = "thiserror" -version = "1.0.40" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.40" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.70", ] [[package]] @@ -1227,7 +1121,7 @@ dependencies = [ [[package]] name = "timsrust" -version = "0.3.0" +version = "0.3.1" dependencies = [ "bytemuck", "byteorder", @@ -1238,7 +1132,7 @@ dependencies = [ "rayon", "rusqlite", "thiserror", - "zstd", + "zstd 0.13.2", ] [[package]] @@ -1272,9 +1166,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.9" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "vcpkg" @@ -1290,9 +1184,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", @@ -1306,9 +1200,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.90" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -1316,24 +1210,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.90" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.70", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.90" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1341,82 +1235,70 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.90" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.70", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.90" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" [[package]] name = "web-sys" -version = "0.3.67" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] -name = "winapi" -version = "0.3.9" +name = "winapi-util" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", + "windows-sys", ] [[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.6" +name = "windows-core" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "winapi", + "windows-targets", ] [[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows" -version = "0.48.0" +name = "windows-sys" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", + "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", @@ -1425,92 +1307,115 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" -version = "0.48.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "zerocopy" -version = "0.7.3" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a7af71d8643341260a65f89fa60c0eeaa907f34544d8f6d9b0df72f069b5e74" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.3" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9731702e2f0617ad526794ae28fbc6f6ca8849b5ba729666c2a5bc4b6ddee2cd" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.18", + "syn 2.0.70", ] [[package]] name = "zstd" -version = "0.12.3+zstd.1.5.2" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" dependencies = [ - "zstd-safe", + "zstd-safe 6.0.6", +] + +[[package]] +name = "zstd" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +dependencies = [ + "zstd-safe 7.2.0", ] [[package]] name = "zstd-safe" -version = "6.0.5+zstd.1.5.4" +version = "6.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56d9e60b4b1758206c238a10165fbcae3ca37b01744e394c463463f6529d23b" +checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" dependencies = [ "libc", "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "7.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa" +dependencies = [ + "zstd-sys", +] + [[package]] name = "zstd-sys" -version = "2.0.8+zstd.1.5.5" +version = "2.0.12+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" +checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" dependencies = [ "cc", - "libc", "pkg-config", ] diff --git a/Cargo.toml b/Cargo.toml index 932a754..a3b8509 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "timsrust" -version = "0.3.0" +version = "0.3.1" edition = "2021" description = "A crate to read Bruker timsTOF data" license = "Apache-2.0" @@ -15,9 +15,9 @@ keywords = ["MS", "LC-TIMS-TOF", "PASEF"] [dependencies] byteorder = "1.4.3" -zstd = "0.12.3" -rusqlite = { version = "0.29.0", features = ["bundled"] } -rayon = "1.5" +zstd = "0.13.2" +rusqlite = { version = "0.31.0", features = ["bundled"] } +rayon = "1.10.0" linreg = "0.2.0" bytemuck = "1.13.1" parquet = "42.0.0" @@ -25,7 +25,7 @@ thiserror = "1.0.0" memmap2 = "0.9.3" [dev-dependencies] -criterion = { version = "0.4", features = ["html_reports"] } +criterion = { version = "0.5.1", features = ["html_reports"] } [[bench]] name = "speed_performance" From 66b1e6f5e5f0bf077e62528754aa1891416e3909 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 10 Jul 2024 15:56:02 +0200 Subject: [PATCH 04/69] BENCH: update of benchmark suite --- benches/speed_performance.rs | 60 +++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index 579b3e1..4bd5a4e 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -1,5 +1,9 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use timsrust::FileReader; +use rayon::iter::ParallelIterator; +use timsrust::{ + io::readers::{FrameReader, SpectrumReader}, + ms_data::Frame, +}; const DDA_TEST: &str = "/mnt/c/Users/Sander.Willems/Documents/data/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/"; @@ -8,20 +12,26 @@ const DIA_TEST: &str = const SYP_TEST: &str = "/mnt/c/Users/Sander.Willems/Documents/data/20230505_TIMS05_PaSk_SA_HeLa_6min_syP_5scans_30Da_S1-D4_1_2330.d/"; -fn read_all_frames(file_reader: &FileReader) { - file_reader.read_all_frames(); +fn read_all_frames(frame_reader: &FrameReader) { + frame_reader + .parallel_filter(|x| true) + .collect::>(); } -fn read_all_ms1_frames(file_reader: &FileReader) { - file_reader.read_all_ms1_frames(); +fn read_all_ms1_frames(frame_reader: &FrameReader) { + frame_reader + .parallel_filter(|x| x.msms_type == 0) + .collect::>(); } -fn read_all_ms2_frames(file_reader: &FileReader) { - file_reader.read_all_ms2_frames(); +fn read_all_ms2_frames(frame_reader: &FrameReader) { + frame_reader + .parallel_filter(|x| x.msms_type != 0) + .collect::>(); } -fn read_all_spectra(file_reader: &FileReader) { - file_reader.read_all_spectra(); +fn read_all_spectra(spectrum_reader: &SpectrumReader) { + spectrum_reader.get_all(); } fn criterion_benchmark_dda(c: &mut Criterion) { @@ -29,19 +39,19 @@ fn criterion_benchmark_dda(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; - let file_reader: FileReader = - FileReader::new(d_folder_name.to_string()).unwrap(); + let frame_reader = FrameReader::new(d_folder_name); + let spectrum_reader = SpectrumReader::new(d_folder_name); group.bench_function("DDA read_all_frames 6m", |b| { - b.iter(|| read_all_frames(black_box(&file_reader))) + b.iter(|| read_all_frames(black_box(&frame_reader))) }); group.bench_function("DDA read_all_ms1_frames 6m", |b| { - b.iter(|| read_all_ms1_frames(black_box(&file_reader))) + b.iter(|| read_all_ms1_frames(black_box(&frame_reader))) }); group.bench_function("DDA read_all_ms2_frames 6m", |b| { - b.iter(|| read_all_ms2_frames(black_box(&file_reader))) + b.iter(|| read_all_ms2_frames(black_box(&frame_reader))) }); group.bench_function("DDA read_all_spectra 6m", |b| { - b.iter(|| read_all_spectra(black_box(&file_reader))) + b.iter(|| read_all_spectra(black_box(&spectrum_reader))) }); group.finish(); } @@ -51,16 +61,16 @@ fn criterion_benchmark_dia(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DIA_TEST; - let file_reader: FileReader = - FileReader::new(d_folder_name.to_string()).unwrap(); + let frame_reader = FrameReader::new(d_folder_name); + let spectrum_reader = SpectrumReader::new(d_folder_name); group.bench_function("DIA read_all_frames 6m", |b| { - b.iter(|| read_all_frames(black_box(&file_reader))) + b.iter(|| read_all_frames(black_box(&frame_reader))) }); group.bench_function("DIA read_all_ms1_frames 6m", |b| { - b.iter(|| read_all_ms1_frames(black_box(&file_reader))) + b.iter(|| read_all_ms1_frames(black_box(&frame_reader))) }); group.bench_function("DIA read_all_ms2_frames 6m", |b| { - b.iter(|| read_all_ms2_frames(black_box(&file_reader))) + b.iter(|| read_all_ms2_frames(black_box(&frame_reader))) }); group.finish(); } @@ -70,16 +80,16 @@ fn criterion_benchmark_syp(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = SYP_TEST; - let file_reader: FileReader = - FileReader::new(d_folder_name.to_string()).unwrap(); + let frame_reader = FrameReader::new(d_folder_name); + let spectrum_reader = SpectrumReader::new(d_folder_name); group.bench_function("SYP read_all_frames 6m", |b| { - b.iter(|| read_all_frames(black_box(&file_reader))) + b.iter(|| read_all_frames(black_box(&frame_reader))) }); group.bench_function("SYP read_all_ms1_frames 6m", |b| { - b.iter(|| read_all_ms1_frames(black_box(&file_reader))) + b.iter(|| read_all_ms1_frames(black_box(&frame_reader))) }); group.bench_function("SYP read_all_ms2_frames 6m", |b| { - b.iter(|| read_all_ms2_frames(black_box(&file_reader))) + b.iter(|| read_all_ms2_frames(black_box(&frame_reader))) }); group.finish(); } From 150d623a92fccd0d480fa6e76e6c2179ff171c5f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 12 Jul 2024 16:18:36 +0200 Subject: [PATCH 05/69] FEAT: added derive options to converters --- src/domain_converters/frame_to_rt.rs | 2 +- src/domain_converters/scan_to_im.rs | 2 +- src/domain_converters/tof_to_mz.rs | 4 ++-- src/io/readers/spectrum_reader/tdf.rs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/domain_converters/frame_to_rt.rs b/src/domain_converters/frame_to_rt.rs index 5c83388..eb7d1d1 100644 --- a/src/domain_converters/frame_to_rt.rs +++ b/src/domain_converters/frame_to_rt.rs @@ -1,5 +1,5 @@ /// A converter from Frame -> retention time. -#[derive(Debug, Clone)] +#[derive(Debug, Default, Clone, PartialEq)] pub struct Frame2RtConverter { rt_values: Vec, } diff --git a/src/domain_converters/scan_to_im.rs b/src/domain_converters/scan_to_im.rs index 68339f4..e7390ff 100644 --- a/src/domain_converters/scan_to_im.rs +++ b/src/domain_converters/scan_to_im.rs @@ -1,5 +1,5 @@ /// A converter from Scan -> (inversed) ion mobility. -#[derive(Debug, Clone)] +#[derive(Debug, Default, Clone, Copy, PartialEq)] pub struct Scan2ImConverter { scan_intercept: f64, scan_slope: f64, diff --git a/src/domain_converters/tof_to_mz.rs b/src/domain_converters/tof_to_mz.rs index e23ac18..c9a3abc 100644 --- a/src/domain_converters/tof_to_mz.rs +++ b/src/domain_converters/tof_to_mz.rs @@ -1,7 +1,7 @@ use linreg::linear_regression; /// A converter from TOF -> m/z. -#[derive(Debug, Clone)] +#[derive(Debug, Default, Clone, Copy, PartialEq)] pub struct Tof2MzConverter { tof_intercept: f64, tof_slope: f64, @@ -22,7 +22,7 @@ impl Tof2MzConverter { } } - pub fn from_pairs(data: &Vec<(f64, u32)>) -> Self { + pub fn regress_from_pairs(data: &Vec<(f64, u32)>) -> Self { let x: Vec = data.iter().map(|(_, x_val)| *x_val).collect(); let y: Vec = data.iter().map(|(y_val, _)| (*y_val).sqrt()).collect(); diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index fe1cbcc..58ffcd6 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -98,7 +98,7 @@ impl SpectrumReaderTrait for TDFSpectrumReader { acc }); if hits.len() >= 2 { - self.mz_reader = Tof2MzConverter::from_pairs(&hits); + self.mz_reader = Tof2MzConverter::regress_from_pairs(&hits); } } } From a3b254a1593c71935c2b1f6b5a25eddca5f5adc5 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 12 Jul 2024 13:09:25 -0700 Subject: [PATCH 06/69] (wip) added span-run and nsplit strategies --- src/io/readers.rs | 1 + src/io/readers/precursor_reader/tdf/dia.rs | 20 +-- src/io/readers/spectrum_reader/tdf.rs | 6 +- src/io/readers/spectrum_reader/tdf/dda.rs | 4 + src/io/readers/spectrum_reader/tdf/dia.rs | 35 +++-- .../spectrum_reader/tdf/raw_spectra.rs | 5 + src/io/readers/tdf_utils.rs | 124 ++++++++++++++++++ src/utils/vec_utils.rs | 4 +- 8 files changed, 161 insertions(+), 38 deletions(-) create mode 100644 src/io/readers/tdf_utils.rs diff --git a/src/io/readers.rs b/src/io/readers.rs index 03d5248..fd9f3ce 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -4,6 +4,7 @@ mod metadata_reader; mod precursor_reader; mod quad_settings_reader; mod spectrum_reader; +mod tdf_utils; pub use frame_reader::*; pub use metadata_reader::*; diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index d604769..46fdc37 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,5 +1,6 @@ use std::path::{Path, PathBuf}; +use crate::io::readers::tdf_utils::expand_quadrupole_settings; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, @@ -34,23 +35,8 @@ impl DIATDFPrecursorReader { SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); let quadrupole_settings = QuadrupoleSettingsReader::new(tdf_sql_reader.get_path()); - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - for sub_window in 0..group.isolation_mz.len() { - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![group.scan_starts[sub_window]], - scan_ends: vec![group.scan_ends[sub_window]], - isolation_mz: vec![group.isolation_mz[sub_window]], - isolation_width: vec![group.isolation_width[sub_window]], - collision_energy: vec![group.collision_energy[sub_window]], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } + let expanded_quadrupole_settings = + expand_quadrupole_settings(&window_groups, &quadrupole_settings); Self { path: path.as_ref().to_path_buf(), expanded_quadrupole_settings, diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index fe1cbcc..270102d 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -69,7 +69,11 @@ impl SpectrumReaderTrait for TDFSpectrumReader { } fn len(&self) -> usize { - self.precursor_reader.len() + debug_assert_eq!( + self.precursor_reader.len(), + self.raw_spectrum_reader.len() + ); + self.raw_spectrum_reader.len() } fn get_path(&self) -> PathBuf { diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index c5d9eb8..b309c69 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -96,4 +96,8 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { }; raw_spectrum } + + fn len(&self) -> usize { + self.offsets.len() - 1 + } } diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 493152b..24386c8 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,3 +1,4 @@ +use crate::io::readers::tdf_utils::expand_quadrupole_settings; use crate::{ io::readers::{ file_readers::sql_reader::{ @@ -19,27 +20,12 @@ pub struct DIARawSpectrumReader { impl DIARawSpectrumReader { pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { - let window_groups = + let window_groups: Vec = SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); let quadrupole_settings = QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path()); - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - for sub_window in 0..group.isolation_mz.len() { - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![group.scan_starts[sub_window]], - scan_ends: vec![group.scan_ends[sub_window]], - isolation_mz: vec![group.isolation_mz[sub_window]], - isolation_width: vec![group.isolation_width[sub_window]], - collision_energy: vec![group.collision_energy[sub_window]], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } + let expanded_quadrupole_settings = + expand_quadrupole_settings(&window_groups, &quadrupole_settings); Self { expanded_quadrupole_settings, frame_reader, @@ -50,6 +36,15 @@ impl DIARawSpectrumReader { impl RawSpectrumReaderTrait for DIARawSpectrumReader { fn get(&self, index: usize) -> RawSpectrum { let quad_settings = &self.expanded_quadrupole_settings[index]; + if index < 10 { + println!("{}", index); + println!("{:?}", quad_settings); + } + if index > (self.expanded_quadrupole_settings.len() - 10) { + println!("{}", index); + println!("{:?}", quad_settings); + } + let collision_energy = quad_settings.collision_energy[0]; let isolation_mz = quad_settings.isolation_mz[0]; let isolation_width = quad_settings.isolation_width[0]; @@ -75,4 +70,8 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { }; raw_spectrum } + + fn len(&self) -> usize { + self.expanded_quadrupole_settings.len() + } } diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index 156b6a4..ea88d2b 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -110,8 +110,13 @@ impl RawSpectrumReader { pub fn get(&self, index: usize) -> RawSpectrum { self.raw_spectrum_reader.get(index) } + + pub fn len(&self) -> usize { + self.raw_spectrum_reader.len() + } } pub trait RawSpectrumReaderTrait: Sync { fn get(&self, index: usize) -> RawSpectrum; + fn len(&self) -> usize; } diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs new file mode 100644 index 0000000..ff4d938 --- /dev/null +++ b/src/io/readers/tdf_utils.rs @@ -0,0 +1,124 @@ +use crate::io::readers::file_readers::sql_reader::frame_groups::SqlWindowGroup; +use crate::ms_data::QuadrupoleSettings; + +type SpanStep = (usize, usize); + +enum QuadWindowExpansionStrategy { + None, + Even(usize), + Uniform(SpanStep), +} + +fn scan_range_subsplit( + start: usize, + end: usize, + strategy: &QuadWindowExpansionStrategy, +) -> Vec<(usize, usize)> { + let out = match strategy { + QuadWindowExpansionStrategy::None => { + vec![(start, end)] + }, + QuadWindowExpansionStrategy::Even(num_splits) => { + let sub_subwindow_width = (end - start) / (num_splits + 1); + let mut out = Vec::new(); + for sub_subwindow in 0..num_splits.clone() { + let sub_subwindow_scan_start = + start + (sub_subwindow_width * sub_subwindow); + let sub_subwindow_scan_end = + start + (sub_subwindow_width * (sub_subwindow + 2)); + + out.push((sub_subwindow_scan_start, sub_subwindow_scan_end)) + } + out + }, + QuadWindowExpansionStrategy::Uniform((span, step)) => { + let mut curr_start = start.clone(); + let mut curr_end = start + span; + let mut out = Vec::new(); + while curr_end < end { + out.push((curr_start, curr_end)); + curr_start += step; + curr_end += step; + } + out + }, + }; + out +} + +pub fn expand_quadrupole_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], +) -> Vec { + // Read the 'NUM_SUB_SUB_SPLITS' from env variables ... default to 1 + // (for now) + + let splits = match std::env::var("NUM_SUB_SUB_SPLITS") { + Ok(s) => match s.parse::() { + Ok(n) => { + println!("Number of splits: {} from env", n); + QuadWindowExpansionStrategy::Even(n) + }, + Err(_) => { + println!("Invalid number of splits: {}", s); + QuadWindowExpansionStrategy::None + }, + }, + Err(_) => match std::env::var("SUB_SPLITS_SPAN") { + Ok(s) => match s.parse::() { + Ok(n) => { + println!("Number of scans per split: {} from env", n); + QuadWindowExpansionStrategy::Uniform((n, n / 2)) + }, + Err(_) => { + println!("Invalid number of splits: {}", s); + QuadWindowExpansionStrategy::None + }, + }, + Err(_) => QuadWindowExpansionStrategy::None, + }, + }; + + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + for sub_window in 0..group.isolation_mz.len() { + let subwindow_scan_start = group.scan_starts[sub_window]; + let subwindow_scan_end = group.scan_ends[sub_window]; + for (sws, swe) in scan_range_subsplit( + subwindow_scan_start, + subwindow_scan_end, + &splits, + ) { + assert!( + sws >= subwindow_scan_start, + "{} >= {} not true", + sws, + subwindow_scan_start + ); + assert!( + swe <= subwindow_scan_end, + "{} <= {} not true", + swe, + subwindow_scan_end + ); + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![group.isolation_mz[sub_window]], + isolation_width: vec![group.isolation_width[sub_window]], + collision_energy: vec![group.collision_energy[sub_window]], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + } + println!( + "Number of expanded quad settings {}", + expanded_quadrupole_settings.len() + ); + expanded_quadrupole_settings +} diff --git a/src/utils/vec_utils.rs b/src/utils/vec_utils.rs index 724fc3c..3ee53c0 100644 --- a/src/utils/vec_utils.rs +++ b/src/utils/vec_utils.rs @@ -12,8 +12,8 @@ pub fn group_and_sum + Copy>( return (vec![], vec![]); } let order: Vec = argsort(&groups); - let mut new_groups: Vec = vec![]; - let mut new_values: Vec = vec![]; + let mut new_groups: Vec = Vec::with_capacity(order.len()); + let mut new_values: Vec = Vec::with_capacity(order.len()); let mut current_group: T = groups[order[0]]; let mut current_value: U = values[order[0]]; for &index in &order[1..] { From b41919a5ad7fd14e5e039fcfdb30e1febe635428 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 15 Jul 2024 15:42:54 +0200 Subject: [PATCH 07/69] CHORE: removed file reader in favor of type specific readers --- src/errors.rs | 17 ++++++++++++----- src/lib.rs | 3 +-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index 7af743c..44782f1 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,14 +1,21 @@ -use crate::{ - file_readers, - // io::readers::common::{sql_reader::SqlError, tdf_blobs::TdfBlobError}, -}; +#[derive(thiserror::Error, Debug)] +pub enum FileFormatError { + #[error("DirectoryDoesNotExist")] + DirectoryDoesNotExist, + #[error("NoParentWithBrukerExtension")] + NoParentWithBrukerExtension, + #[error("BinaryFilesAreMissing")] + BinaryFilesAreMissing, + #[error("MetadataFilesAreMissing")] + MetadataFilesAreMissing, +} /// An error that is produced by timsrust (uses [thiserror]). #[derive(thiserror::Error, Debug)] pub enum Error { /// An error to indicate a path is not a Bruker File Format. #[error("FileFormatError: {0}")] - FileFormatError(#[from] file_readers::FileFormatError), + FileFormatError(#[from] FileFormatError), // #[error("SqlError: {0}")] // SqlError(#[from] SqlError), // #[error("BinError: {0}")] diff --git a/src/lib.rs b/src/lib.rs index 085bee8..b519699 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,9 +23,8 @@ pub mod domain_converters; mod errors; -mod file_readers; pub mod io; pub mod ms_data; mod utils; -pub use crate::{errors::*, file_readers::FileReader}; +pub use crate::errors::*; From 88f7a1a1d3d9d7210377ed4cc0dedbf8d18825f8 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 15 Jul 2024 15:44:10 +0200 Subject: [PATCH 08/69] FEAT: made precursor an option for spectra --- src/io/readers/spectrum_reader.rs | 2 +- src/io/readers/spectrum_reader/minitdf.rs | 2 +- src/io/writers/mgf.rs | 2 +- src/ms_data/spectra.rs | 2 +- tests/spectrum_readers.rs | 20 ++++++++++---------- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 0082ca3..336634a 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -47,7 +47,7 @@ impl SpectrumReader { .into_par_iter() .map(|index| self.get(index)) .collect(); - spectra.sort_by_key(|x| x.precursor.index); + spectra.sort_by_key(|x| x.precursor.unwrap().index); spectra } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 84a6f4a..0afcde1 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -75,7 +75,7 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { spectrum.mz_values = mz_values.to_vec(); } let precursor = self.precursor_reader.get(index); - spectrum.precursor = precursor; + spectrum.precursor = Some(precursor); spectrum.index = precursor.index; spectrum.collision_energy = self.collision_energies[index]; spectrum.isolation_mz = precursor.mz; //FIX? diff --git a/src/io/writers/mgf.rs b/src/io/writers/mgf.rs index e7693d6..0ad5ef0 100644 --- a/src/io/writers/mgf.rs +++ b/src/io/writers/mgf.rs @@ -30,7 +30,7 @@ pub struct MGFEntry; impl MGFEntry { pub fn write_header(spectrum: &Spectrum) -> String { - let precursor = spectrum.precursor; + let precursor = spectrum.precursor.unwrap(); let title = precursor.index; let intensity = precursor.intensity.unwrap_or(0.0); let charge = precursor.charge.unwrap_or(0); diff --git a/src/ms_data/spectra.rs b/src/ms_data/spectra.rs index bd9cf25..7ffb9f7 100644 --- a/src/ms_data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -5,7 +5,7 @@ use super::Precursor; pub struct Spectrum { pub mz_values: Vec, pub intensities: Vec, - pub precursor: Precursor, + pub precursor: Option, pub index: usize, pub collision_energy: f64, pub isolation_mz: f64, diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 78637e1..3473d18 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -23,7 +23,7 @@ fn minitdf_reader() { Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], intensities: vec![1.0, 2.0, 3.0, 4.0], - precursor: Precursor { + precursor: Some(Precursor { mz: 123.4567, rt: 12.345, im: 1.234, @@ -31,7 +31,7 @@ fn minitdf_reader() { intensity: Some(0.0), index: 1, frame_index: 1, - }, + }), index: 1, collision_energy: 0.0, isolation_mz: 123.4567, @@ -40,7 +40,7 @@ fn minitdf_reader() { Spectrum { mz_values: vec![1100.0, 1200.002, 1300.03, 1400.4], intensities: vec![10.0, 20.0, 30.0, 40.0], - precursor: Precursor { + precursor: Some(Precursor { mz: 987.6543, rt: 9.876, im: 0.9876, @@ -48,7 +48,7 @@ fn minitdf_reader() { intensity: Some(0.0), index: 2, frame_index: 2, - }, + }), index: 2, collision_energy: 0.0, isolation_mz: 987.6543, @@ -73,7 +73,7 @@ fn tdf_reader_dda() { Spectrum { mz_values: vec![199.7633445943076], intensities: vec![162.0], - precursor: Precursor { + precursor: Some(Precursor { mz: 500.0, rt: 0.2, im: 1.25, @@ -81,7 +81,7 @@ fn tdf_reader_dda() { intensity: Some(10.0), index: 1, frame_index: 1, - }, + }), index: 0, collision_energy: 0.0, isolation_mz: 500.5, @@ -90,7 +90,7 @@ fn tdf_reader_dda() { Spectrum { mz_values: vec![169.5419900362706, 695.6972509397959], intensities: vec![120.0, 624.0], - precursor: Precursor { + precursor: Some(Precursor { mz: 501.0, rt: 0.2, im: 1.0, @@ -98,7 +98,7 @@ fn tdf_reader_dda() { intensity: Some(10.0), index: 2, frame_index: 1, - }, + }), index: 1, collision_energy: 0.0, isolation_mz: 501.5, @@ -107,7 +107,7 @@ fn tdf_reader_dda() { Spectrum { mz_values: vec![827.1915846690921], intensities: vec![714.0], - precursor: Precursor { + precursor: Some(Precursor { mz: 502.0, rt: 0.4, im: 1.25, @@ -115,7 +115,7 @@ fn tdf_reader_dda() { intensity: Some(10.0), index: 3, frame_index: 3, - }, + }), index: 2, collision_energy: 0.0, isolation_mz: 502.5, From b3d1ca3b843b55976ed9851054229d511998c5cd Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 15 Jul 2024 15:44:35 +0200 Subject: [PATCH 09/69] FIX: added specific parallel filters for frame reader --- src/io/readers/frame_reader.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index e13b6be..29c07a9 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -43,7 +43,6 @@ impl FrameReader { AcquisitionType::DDAPASEF } else if sql_frames.iter().any(|x| x.msms_type == 9) { AcquisitionType::DIAPASEF - // TODO: can also be diagonalpasef } else { AcquisitionType::Unknown }; @@ -117,6 +116,18 @@ impl FrameReader { frame } + pub fn get_all(&self) -> Vec { + self.parallel_filter(|_| true).collect() + } + + pub fn get_all_ms1(&self) -> Vec { + self.parallel_filter(|x| x.msms_type == 0).collect() + } + + pub fn get_all_ms2(&self) -> Vec { + self.parallel_filter(|x| x.msms_type != 0).collect() + } + pub fn get_acquisition(&self) -> AcquisitionType { self.acquisition } From 64275d35f113f0cc741091cb222a88ab5db54109 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 15 Jul 2024 15:45:31 +0200 Subject: [PATCH 10/69] CHORE: added derives for metadata --- src/ms_data/metadata.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ms_data/metadata.rs b/src/ms_data/metadata.rs index 55766a5..350d6ea 100644 --- a/src/ms_data/metadata.rs +++ b/src/ms_data/metadata.rs @@ -5,7 +5,7 @@ use crate::domain_converters::{ }; /// Metadata from a single run. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct Metadata { pub path: PathBuf, pub rt_converter: Frame2RtConverter, From a7d988667a0c3546c97bf3927be46df0137c4d16 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 15 Jul 2024 15:49:13 +0200 Subject: [PATCH 11/69] CHORE: updated derives of ms data --- src/ms_data/acquisition.rs | 2 +- src/ms_data/frames.rs | 2 +- src/ms_data/metadata.rs | 3 ++- src/ms_data/precursors.rs | 2 +- src/ms_data/quadrupole.rs | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/ms_data/acquisition.rs b/src/ms_data/acquisition.rs index b3e7be0..365823f 100644 --- a/src/ms_data/acquisition.rs +++ b/src/ms_data/acquisition.rs @@ -1,5 +1,5 @@ /// The kind of acquisition that was used. -#[derive(Debug, PartialEq, Clone, Copy, Default)] +#[derive(Clone, Copy, Debug, Default, PartialEq)] pub enum AcquisitionType { DDAPASEF, DIAPASEF, diff --git a/src/ms_data/frames.rs b/src/ms_data/frames.rs index c4858e0..6cf7fd1 100644 --- a/src/ms_data/frames.rs +++ b/src/ms_data/frames.rs @@ -2,7 +2,7 @@ use super::{AcquisitionType, QuadrupoleSettings}; use std::sync::Arc; /// A frame with all unprocessed data as it was acquired. -#[derive(Debug, PartialEq, Default, Clone)] +#[derive(Clone, Debug, Default, PartialEq)] pub struct Frame { pub scan_offsets: Vec, pub tof_indices: Vec, diff --git a/src/ms_data/metadata.rs b/src/ms_data/metadata.rs index 350d6ea..14d1a9a 100644 --- a/src/ms_data/metadata.rs +++ b/src/ms_data/metadata.rs @@ -5,7 +5,8 @@ use crate::domain_converters::{ }; /// Metadata from a single run. -#[derive(Debug, Clone, PartialEq)] +#[derive(Clone, Debug, Default, PartialEq)] + pub struct Metadata { pub path: PathBuf, pub rt_converter: Frame2RtConverter, diff --git a/src/ms_data/precursors.rs b/src/ms_data/precursors.rs index 4f99d59..5e39e1c 100644 --- a/src/ms_data/precursors.rs +++ b/src/ms_data/precursors.rs @@ -1,5 +1,5 @@ /// The MS1 precursor that got selected for fragmentation. -#[derive(Debug, Default, Clone, Copy, PartialEq)] +#[derive(Clone, Copy, Debug, Default, PartialEq)] pub struct Precursor { pub mz: f64, pub rt: f64, diff --git a/src/ms_data/quadrupole.rs b/src/ms_data/quadrupole.rs index 19c8d94..b9d2185 100644 --- a/src/ms_data/quadrupole.rs +++ b/src/ms_data/quadrupole.rs @@ -1,5 +1,5 @@ /// The quadrupole settings used for fragmentation. -#[derive(Debug, Default, Clone, PartialEq)] +#[derive(Clone, Debug, Default, PartialEq)] pub struct QuadrupoleSettings { pub index: usize, pub scan_starts: Vec, From feea54afa13dcb3b47b67e1458842d196651cc37 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 10:22:59 +0200 Subject: [PATCH 12/69] FEAT: Made TDFBlob len more struct multiple of 4 --- src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs b/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs index b75d494..fbb2698 100644 --- a/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs +++ b/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs @@ -1,17 +1,18 @@ const U32_SIZE: usize = std::mem::size_of::(); -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default, PartialEq)] pub struct TdfBlob { bytes: Vec, } impl TdfBlob { pub fn new(bytes: Vec) -> Self { + assert!(bytes.len() % U32_SIZE == 0); Self { bytes } } pub fn get(&self, index: usize) -> u32 { - debug_assert!(index < self.len()); + assert!(index < self.len()); Self::concatenate_bytes( self.bytes[index], self.bytes[index + self.len()], From ce505508193c98e468f836c4a3b3f8c6df177b59 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 11:10:14 +0200 Subject: [PATCH 13/69] CHORE: implemented derives for sql structs --- src/io/readers/file_readers/parquet_reader/precursors.rs | 2 +- src/io/readers/file_readers/sql_reader/frame_groups.rs | 2 +- src/io/readers/file_readers/sql_reader/frames.rs | 2 +- src/io/readers/file_readers/sql_reader/metadata.rs | 1 + src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs | 2 +- src/io/readers/file_readers/sql_reader/precursors.rs | 2 +- src/io/readers/file_readers/sql_reader/quad_settings.rs | 2 +- 7 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/io/readers/file_readers/parquet_reader/precursors.rs b/src/io/readers/file_readers/parquet_reader/precursors.rs index 3ce5302..3f64dbe 100644 --- a/src/io/readers/file_readers/parquet_reader/precursors.rs +++ b/src/io/readers/file_readers/parquet_reader/precursors.rs @@ -1,6 +1,6 @@ use super::ReadableParquetTable; -#[derive(Default, Debug, PartialEq)] +#[derive(Clone, Debug, Default, PartialEq)] pub struct ParquetPrecursor { pub mz: f64, pub rt: f64, diff --git a/src/io/readers/file_readers/sql_reader/frame_groups.rs b/src/io/readers/file_readers/sql_reader/frame_groups.rs index a46e72e..add0b73 100644 --- a/src/io/readers/file_readers/sql_reader/frame_groups.rs +++ b/src/io/readers/file_readers/sql_reader/frame_groups.rs @@ -1,6 +1,6 @@ use super::ReadableSqlTable; -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, Default, PartialEq)] pub struct SqlWindowGroup { pub frame: usize, pub window_group: u8, diff --git a/src/io/readers/file_readers/sql_reader/frames.rs b/src/io/readers/file_readers/sql_reader/frames.rs index e1d7337..be5de7c 100644 --- a/src/io/readers/file_readers/sql_reader/frames.rs +++ b/src/io/readers/file_readers/sql_reader/frames.rs @@ -1,6 +1,6 @@ use super::ReadableSqlTable; -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, Default, PartialEq)] pub struct SqlFrame { pub id: usize, pub scan_mode: u8, diff --git a/src/io/readers/file_readers/sql_reader/metadata.rs b/src/io/readers/file_readers/sql_reader/metadata.rs index fb045ba..710a5df 100644 --- a/src/io/readers/file_readers/sql_reader/metadata.rs +++ b/src/io/readers/file_readers/sql_reader/metadata.rs @@ -1,5 +1,6 @@ use super::ReadableSqlHashMap; +#[derive(Clone, Debug, Default, PartialEq)] pub struct SqlMetadata; impl ReadableSqlHashMap for SqlMetadata { diff --git a/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs b/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs index 51e09cd..68508df 100644 --- a/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs +++ b/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs @@ -1,6 +1,6 @@ use super::ReadableSqlTable; -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, Default, PartialEq)] pub struct SqlPasefFrameMsMs { pub frame: usize, pub scan_start: usize, diff --git a/src/io/readers/file_readers/sql_reader/precursors.rs b/src/io/readers/file_readers/sql_reader/precursors.rs index c2b00aa..24647fa 100644 --- a/src/io/readers/file_readers/sql_reader/precursors.rs +++ b/src/io/readers/file_readers/sql_reader/precursors.rs @@ -1,6 +1,6 @@ use super::ReadableSqlTable; -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, Default, PartialEq)] pub struct SqlPrecursor { pub id: usize, pub mz: f64, diff --git a/src/io/readers/file_readers/sql_reader/quad_settings.rs b/src/io/readers/file_readers/sql_reader/quad_settings.rs index d7d69b4..5d15639 100644 --- a/src/io/readers/file_readers/sql_reader/quad_settings.rs +++ b/src/io/readers/file_readers/sql_reader/quad_settings.rs @@ -1,6 +1,6 @@ use super::ReadableSqlTable; -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, Default, PartialEq)] pub struct SqlQuadSettings { pub window_group: usize, pub scan_start: usize, From 477656a6fcac8fc017f21d4ea1e185ff5fbd3f55 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 11:33:58 +0200 Subject: [PATCH 14/69] CHORE: made a parse default trait for sql to minimiza usage of unwrap occurences --- src/io/readers/file_readers/sql_reader.rs | 12 +++++++++++- .../file_readers/sql_reader/frame_groups.rs | 6 +++--- .../readers/file_readers/sql_reader/frames.rs | 18 +++++++++--------- .../sql_reader/pasef_frame_msms.rs | 16 ++++++++-------- .../file_readers/sql_reader/precursors.rs | 14 +++++++------- .../file_readers/sql_reader/quad_settings.rs | 14 +++++++------- 6 files changed, 45 insertions(+), 35 deletions(-) diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index 6704532..106e077 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -10,7 +10,7 @@ use std::{ path::{Path, PathBuf}, }; -use rusqlite::Connection; +use rusqlite::{types::FromSql, Connection}; #[derive(Debug)] pub struct SqlReader { @@ -85,6 +85,16 @@ pub trait ReadableSqlHashMap { } } +pub trait ParseDefault { + fn parse_default(&self, index: usize) -> T; +} + +impl ParseDefault for rusqlite::Row<'_> { + fn parse_default(&self, index: usize) -> T { + self.get(index).unwrap_or_default() + } +} + #[derive(thiserror::Error, Debug)] #[error("SqlError: {0}")] pub struct SqlError(#[from] rusqlite::Error); diff --git a/src/io/readers/file_readers/sql_reader/frame_groups.rs b/src/io/readers/file_readers/sql_reader/frame_groups.rs index add0b73..7f9a8fe 100644 --- a/src/io/readers/file_readers/sql_reader/frame_groups.rs +++ b/src/io/readers/file_readers/sql_reader/frame_groups.rs @@ -1,4 +1,4 @@ -use super::ReadableSqlTable; +use super::{ParseDefault, ReadableSqlTable}; #[derive(Clone, Debug, Default, PartialEq)] pub struct SqlWindowGroup { @@ -13,8 +13,8 @@ impl ReadableSqlTable for SqlWindowGroup { fn from_sql_row(row: &rusqlite::Row) -> Self { Self { - frame: row.get(0).unwrap_or_default(), - window_group: row.get(1).unwrap_or_default(), + frame: row.parse_default(0), + window_group: row.parse_default(1), } } } diff --git a/src/io/readers/file_readers/sql_reader/frames.rs b/src/io/readers/file_readers/sql_reader/frames.rs index be5de7c..56f45aa 100644 --- a/src/io/readers/file_readers/sql_reader/frames.rs +++ b/src/io/readers/file_readers/sql_reader/frames.rs @@ -1,4 +1,4 @@ -use super::ReadableSqlTable; +use super::{ParseDefault, ReadableSqlTable}; #[derive(Clone, Debug, Default, PartialEq)] pub struct SqlFrame { @@ -19,14 +19,14 @@ impl ReadableSqlTable for SqlFrame { fn from_sql_row(row: &rusqlite::Row) -> Self { Self { - id: row.get(0).unwrap_or_default(), - scan_mode: row.get(1).unwrap_or_default(), - msms_type: row.get(2).unwrap_or_default(), - peak_count: row.get(3).unwrap_or_default(), - rt: row.get(4).unwrap_or_default(), - scan_count: row.get(5).unwrap_or_default(), - binary_offset: row.get(6).unwrap_or_default(), - accumulation_time: row.get(7).unwrap_or_default(), + id: row.parse_default(0), + scan_mode: row.parse_default(1), + msms_type: row.parse_default(2), + peak_count: row.parse_default(3), + rt: row.parse_default(4), + scan_count: row.parse_default(5), + binary_offset: row.parse_default(6), + accumulation_time: row.parse_default(7), } } } diff --git a/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs b/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs index 68508df..6ce26a8 100644 --- a/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs +++ b/src/io/readers/file_readers/sql_reader/pasef_frame_msms.rs @@ -1,4 +1,4 @@ -use super::ReadableSqlTable; +use super::{ParseDefault, ReadableSqlTable}; #[derive(Clone, Debug, Default, PartialEq)] pub struct SqlPasefFrameMsMs { @@ -18,13 +18,13 @@ impl ReadableSqlTable for SqlPasefFrameMsMs { fn from_sql_row(row: &rusqlite::Row) -> Self { Self { - frame: row.get(0).unwrap_or_default(), - scan_start: row.get(1).unwrap_or_default(), - scan_end: row.get(2).unwrap_or_default(), - isolation_mz: row.get(3).unwrap_or_default(), - isolation_width: row.get(4).unwrap_or_default(), - collision_energy: row.get(5).unwrap_or_default(), - precursor: row.get(6).unwrap_or_default(), + frame: row.parse_default(0), + scan_start: row.parse_default(1), + scan_end: row.parse_default(2), + isolation_mz: row.parse_default(3), + isolation_width: row.parse_default(4), + collision_energy: row.parse_default(5), + precursor: row.parse_default(6), } } } diff --git a/src/io/readers/file_readers/sql_reader/precursors.rs b/src/io/readers/file_readers/sql_reader/precursors.rs index 24647fa..105342b 100644 --- a/src/io/readers/file_readers/sql_reader/precursors.rs +++ b/src/io/readers/file_readers/sql_reader/precursors.rs @@ -1,4 +1,4 @@ -use super::ReadableSqlTable; +use super::{ParseDefault, ReadableSqlTable}; #[derive(Clone, Debug, Default, PartialEq)] pub struct SqlPrecursor { @@ -17,12 +17,12 @@ impl ReadableSqlTable for SqlPrecursor { fn from_sql_row(row: &rusqlite::Row) -> Self { Self { - id: row.get(0).unwrap_or_default(), - mz: row.get(1).unwrap_or_default(), - charge: row.get(2).unwrap_or_default(), - scan_average: row.get(3).unwrap_or_default(), - intensity: row.get(4).unwrap_or_default(), - precursor_frame: row.get(5).unwrap_or_default(), + id: row.parse_default(0), + mz: row.parse_default(1), + charge: row.parse_default(2), + scan_average: row.parse_default(3), + intensity: row.parse_default(4), + precursor_frame: row.parse_default(5), } } } diff --git a/src/io/readers/file_readers/sql_reader/quad_settings.rs b/src/io/readers/file_readers/sql_reader/quad_settings.rs index 5d15639..ddc99f8 100644 --- a/src/io/readers/file_readers/sql_reader/quad_settings.rs +++ b/src/io/readers/file_readers/sql_reader/quad_settings.rs @@ -1,4 +1,4 @@ -use super::ReadableSqlTable; +use super::{ParseDefault, ReadableSqlTable}; #[derive(Clone, Debug, Default, PartialEq)] pub struct SqlQuadSettings { @@ -17,12 +17,12 @@ impl ReadableSqlTable for SqlQuadSettings { fn from_sql_row(row: &rusqlite::Row) -> Self { Self { - window_group: row.get(0).unwrap_or_default(), - scan_start: row.get(1).unwrap_or_default(), - scan_end: row.get(2).unwrap_or_default(), - mz_center: row.get(3).unwrap_or_default(), - mz_width: row.get(4).unwrap_or_default(), - collision_energy: row.get(5).unwrap_or_default(), + window_group: row.parse_default(0), + scan_start: row.parse_default(1), + scan_end: row.parse_default(2), + mz_center: row.parse_default(3), + mz_width: row.parse_default(4), + collision_energy: row.parse_default(5), } } } From bed5c18e3acf7ba71f2c3f8b9f54c22a25747080 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 11:50:40 +0200 Subject: [PATCH 15/69] FIX: raw_spectrum precursor option --- src/io/readers/spectrum_reader/tdf/raw_spectra.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index 156b6a4..7172940 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -66,7 +66,7 @@ impl RawSpectrum { .map(|&x| mz_reader.convert(x)) .collect(), intensities: self.intensities.iter().map(|x| *x as f64).collect(), - precursor: precursor, + precursor: Some(precursor), index: index, collision_energy: self.collision_energy, isolation_mz: self.isolation_mz, From 0e1198f46567ecaac7385883e797064d00e2e4b6 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 13:32:29 +0200 Subject: [PATCH 16/69] FEAT: implemented error propagation for tdf blobs --- .../readers/file_readers/tdf_blob_reader.rs | 143 +++++++++--------- .../file_readers/tdf_blob_reader/tdf_blobs.rs | 45 ++++-- src/io/readers/frame_reader.rs | 49 +++--- src/io/readers/spectrum_reader/minitdf.rs | 6 +- src/io/readers/spectrum_reader/tdf/dda.rs | 2 +- src/io/readers/spectrum_reader/tdf/dia.rs | 2 +- 6 files changed, 140 insertions(+), 107 deletions(-) diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index fad363c..a6f6e93 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -7,7 +7,7 @@ use std::path::{Path, PathBuf}; pub use tdf_blobs::*; use zstd::decode_all; -const U32_SIZE: usize = std::mem::size_of::(); +const BLOB_TYPE_SIZE: usize = std::mem::size_of::(); const HEADER_SIZE: usize = 2; #[derive(Debug)] @@ -19,67 +19,64 @@ pub struct TdfBlobReader { impl TdfBlobReader { // TODO parse compression1 - pub fn new(file_name: impl AsRef) -> Result { - let path: PathBuf = file_name.as_ref().to_path_buf(); - let file: File = File::open(&path)?; - let mmap: Mmap = unsafe { Mmap::map(&file)? }; - Ok(Self { + pub fn new( + file_name: impl AsRef, + ) -> Result { + let path = file_name.as_ref().to_path_buf(); + let file = File::open(&path)?; + let mmap = unsafe { Mmap::map(&file)? }; + let reader = Self { path, mmap, global_file_offset: 0, - }) - } - - pub fn get_blob(&self, offset: usize) -> Result { - let offset: usize = self.get_offset(offset)?; - let byte_count: usize = self.get_byte_count(offset)?; - let compressed_bytes: &[u8] = - self.get_compressed_bytes(offset, byte_count); - match decode_all(compressed_bytes) { - Ok(bytes) => Ok(TdfBlob::new(bytes)), - Err(_) => Err(TdfBlobError::Decompression(self.path.clone())), - } + }; + Ok(reader) } - fn get_offset(&self, offset: usize) -> Result { + pub fn get_blob( + &self, + offset: usize, + ) -> Result { let offset = self.global_file_offset + offset; - self.check_valid_offset(offset) + let byte_count = self.get_byte_count(offset)?; + let compressed_bytes = self.get_compressed_bytes(offset, byte_count)?; + let bytes = decode_all(compressed_bytes)?; + let blob = TdfBlob::new(bytes)?; + Ok(blob) } - fn check_valid_offset(&self, offset: usize) -> Result { - if (offset + U32_SIZE) >= self.mmap.len() { - return Err(TdfBlobError::Offset(offset, self.path.clone())); - } - Ok(offset) - } - - fn get_byte_count(&self, offset: usize) -> Result { - let raw_byte_count: &[u8] = - &self.mmap[offset as usize..(offset + U32_SIZE) as usize]; + fn get_byte_count( + &self, + offset: usize, + ) -> Result { + let start = offset as usize; + let end = (offset + BLOB_TYPE_SIZE) as usize; + let raw_byte_count = self.mmap.get(start..end).ok_or( + TdfBlobReaderError::RangeOutOfBounds { + start, + end, + length: self.mmap.len(), + }, + )?; let byte_count = u32::from_le_bytes(raw_byte_count.try_into().unwrap()) as usize; - self.check_valid_byte_count(byte_count, offset) + Ok(byte_count) } - fn check_valid_byte_count( + fn get_compressed_bytes( &self, - byte_count: usize, offset: usize, - ) -> Result { - if (byte_count < (HEADER_SIZE * U32_SIZE)) - || ((offset + byte_count) > self.len()) - { - return Err(TdfBlobError::ByteCount( - byte_count, - offset, - self.path.clone(), - )); - } - Ok(byte_count) - } - - fn get_compressed_bytes(&self, offset: usize, byte_count: usize) -> &[u8] { - &self.mmap[(offset + HEADER_SIZE * U32_SIZE)..offset + byte_count] + byte_count: usize, + ) -> Result<&[u8], TdfBlobReaderError> { + let start = offset + HEADER_SIZE * BLOB_TYPE_SIZE; + let end = offset + byte_count; + self.mmap + .get(start..end) + .ok_or(TdfBlobReaderError::RangeOutOfBounds { + start, + end, + length: self.mmap.len(), + }) } pub fn len(&self) -> usize { @@ -97,27 +94,26 @@ impl IndexedTdfBlobReader { pub fn new( file_name: impl AsRef, binary_offsets: Vec, - ) -> Result { - Ok(Self { + ) -> Result { + let blob_reader = TdfBlobReader::new(file_name)?; + let reader = Self { binary_offsets, - blob_reader: TdfBlobReader::new(file_name)?, - }) + blob_reader: blob_reader, + }; + Ok(reader) } - pub fn get_blob(&self, index: usize) -> Result { - self.check_valid_index(index)?; - let offset = self.binary_offsets[index]; - self.blob_reader.get_blob(offset) - } - - fn check_valid_index(&self, index: usize) -> Result { - if index >= self.len() { - return Err(TdfBlobError::Index( + pub fn get_blob( + &self, + index: usize, + ) -> Result { + let offset = *self.binary_offsets.get(index).ok_or( + TdfBlobReaderError::IndexOutOfBounds { index, - self.blob_reader.path.clone(), - )); - } - Ok(index) + length: self.binary_offsets.len(), + }, + )?; + self.blob_reader.get_blob(offset) } pub fn len(&self) -> usize { @@ -126,15 +122,24 @@ impl IndexedTdfBlobReader { } #[derive(Debug, thiserror::Error)] -pub enum TdfBlobError { - #[error("Cannot read or mmap file {0}")] +pub enum TdfBlobReaderError { + #[error("{0}")] IO(#[from] io::Error), + #[error("{0}")] + TdfBlob(#[from] TdfBlobError), + #[error("Index {index} out of bounds for length {length})")] + IndexOutOfBounds { index: usize, length: usize }, + #[error("Range [{start}-{end}] out of bounds for length {length})")] + RangeOutOfBounds { + start: usize, + end: usize, + length: usize, + }, + #[error("Index {0} is invalid for file {1}")] Index(usize, PathBuf), #[error("Offset {0} is invalid for file {1}")] Offset(usize, PathBuf), #[error("Byte count {0} from offset {1} is invalid for file {2}")] ByteCount(usize, usize, PathBuf), - #[error("Zstd decompression failed for file {0}")] - Decompression(PathBuf), } diff --git a/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs b/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs index fbb2698..4dbf945 100644 --- a/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs +++ b/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs @@ -1,4 +1,4 @@ -const U32_SIZE: usize = std::mem::size_of::(); +const BLOB_TYPE_SIZE: usize = std::mem::size_of::(); #[derive(Clone, Debug, Default, PartialEq)] pub struct TdfBlob { @@ -6,19 +6,30 @@ pub struct TdfBlob { } impl TdfBlob { - pub fn new(bytes: Vec) -> Self { - assert!(bytes.len() % U32_SIZE == 0); - Self { bytes } + pub fn new(bytes: Vec) -> Result { + if bytes.len() % BLOB_TYPE_SIZE != 0 { + Err(TdfBlobError::InvalidLength { + length: bytes.len(), + }) + } else { + Ok(Self { bytes }) + } } - pub fn get(&self, index: usize) -> u32 { - assert!(index < self.len()); - Self::concatenate_bytes( - self.bytes[index], - self.bytes[index + self.len()], - self.bytes[index + 2 * self.len()], - self.bytes[index + 3 * self.len()], - ) + pub fn get(&self, index: usize) -> Result { + if index >= self.len() { + Err(TdfBlobError::IndexOutOfBounds { + length: self.len(), + index, + }) + } else { + Ok(Self::concatenate_bytes( + self.bytes[index], + self.bytes[index + self.len()], + self.bytes[index + 2 * self.len()], + self.bytes[index + 3 * self.len()], + )) + } } fn concatenate_bytes(b1: u8, b2: u8, b3: u8, b4: u8) -> u32 { @@ -29,10 +40,18 @@ impl TdfBlob { } pub fn len(&self) -> usize { - self.bytes.len() / U32_SIZE + self.bytes.len() / BLOB_TYPE_SIZE } pub fn is_empty(&self) -> bool { self.len() == 0 } } + +#[derive(Debug, thiserror::Error)] +pub enum TdfBlobError { + #[error("Length {length} not a multiple of {BLOB_TYPE_SIZE}")] + InvalidLength { length: usize }, + #[error("Index {index} out of bounds for length {length})")] + IndexOutOfBounds { index: usize, length: usize }, +} diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 29c07a9..39c2026 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -17,7 +17,9 @@ use super::{ frame_groups::SqlWindowGroup, frames::SqlFrame, ReadableSqlTable, SqlReader, }, - tdf_blob_reader::{TdfBlob, TdfBlobReader}, + tdf_blob_reader::{ + TdfBlob, TdfBlobError, TdfBlobReader, TdfBlobReaderError, + }, }, QuadrupoleSettingsReader, }; @@ -80,27 +82,24 @@ impl FrameReader { (0..self.len()) .into_par_iter() .filter(move |x| predicate(&self.sql_frames[*x])) - .map(move |x| self.get(x)) + .map(move |x| self.get(x).unwrap()) } - pub fn get(&self, index: usize) -> Frame { + pub fn get(&self, index: usize) -> Result { let mut frame: Frame = Frame::default(); let sql_frame = &self.sql_frames[index]; frame.index = sql_frame.id; - let blob = match self.tdf_bin_reader.get_blob(sql_frame.binary_offset) { - Ok(blob) => blob, - Err(_) => return frame, - }; - let scan_count: usize = blob.get(0) as usize; + let blob = self.tdf_bin_reader.get_blob(sql_frame.binary_offset)?; + let scan_count: usize = blob.get(0)? as usize; let peak_count: usize = (blob.len() - scan_count) / 2; - frame.scan_offsets = read_scan_offsets(scan_count, peak_count, &blob); - frame.intensities = read_intensities(scan_count, peak_count, &blob); + frame.scan_offsets = read_scan_offsets(scan_count, peak_count, &blob)?; + frame.intensities = read_intensities(scan_count, peak_count, &blob)?; frame.tof_indices = read_tof_indices( scan_count, peak_count, &blob, &frame.scan_offsets, - ); + )?; frame.ms_level = MSLevel::read_from_msms_type(sql_frame.msms_type); frame.rt = sql_frame.rt; frame.acquisition_type = self.acquisition; @@ -113,7 +112,7 @@ impl FrameReader { frame.quadrupole_settings = self.quadrupole_settings[window_group as usize - 1].clone(); } - frame + Ok(frame) } pub fn get_all(&self) -> Vec { @@ -145,29 +144,29 @@ fn read_scan_offsets( scan_count: usize, peak_count: usize, blob: &TdfBlob, -) -> Vec { +) -> Result, FrameReaderError> { let mut scan_offsets: Vec = Vec::with_capacity(scan_count + 1); scan_offsets.push(0); for scan_index in 0..scan_count - 1 { let index = scan_index + 1; - let scan_size: usize = (blob.get(index) / 2) as usize; + let scan_size: usize = (blob.get(index)? / 2) as usize; scan_offsets.push(scan_offsets[scan_index] + scan_size); } scan_offsets.push(peak_count); - scan_offsets + Ok(scan_offsets) } fn read_intensities( scan_count: usize, peak_count: usize, blob: &TdfBlob, -) -> Vec { +) -> Result, FrameReaderError> { let mut intensities: Vec = Vec::with_capacity(peak_count); for peak_index in 0..peak_count { let index: usize = scan_count + 1 + 2 * peak_index; - intensities.push(blob.get(index)); + intensities.push(blob.get(index)?); } - intensities + Ok(intensities) } fn read_tof_indices( @@ -175,7 +174,7 @@ fn read_tof_indices( peak_count: usize, blob: &TdfBlob, scan_offsets: &Vec, -) -> Vec { +) -> Result, FrameReaderError> { let mut tof_indices: Vec = Vec::with_capacity(peak_count); for scan_index in 0..scan_count { let start_offset: usize = scan_offsets[scan_index]; @@ -183,10 +182,18 @@ fn read_tof_indices( let mut current_sum: u32 = 0; for peak_index in start_offset..end_offset { let index = scan_count + 2 * peak_index; - let tof_index: u32 = blob.get(index); + let tof_index: u32 = blob.get(index)?; current_sum += tof_index; tof_indices.push(current_sum - 1); } } - tof_indices + Ok(tof_indices) +} + +#[derive(Debug, thiserror::Error)] +pub enum FrameReaderError { + #[error("{0}")] + TdfBlob(#[from] TdfBlobError), + #[error("{0}")] + TdfBlobReader(#[from] TdfBlobReaderError), } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 0afcde1..3b89f2c 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -6,7 +6,9 @@ use crate::{ parquet_reader::{ precursors::ParquetPrecursor, ReadableParquetTable, }, - tdf_blob_reader::IndexedTdfBlobReader, + tdf_blob_reader::{ + IndexedTdfBlobReader, TdfBlobError, TdfBlobReaderError, + }, }, PrecursorReader, }, @@ -60,7 +62,7 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { if !blob.is_empty() { let size: usize = blob.len(); let spectrum_data: Vec = - (0..size).map(|i| blob.get(i)).collect(); + (0..size).map(|i| blob.get(i).unwrap()).collect(); let scan_count: usize = blob.len() / 3; let tof_indices_bytes: &[u32] = &spectrum_data[..scan_count as usize * 2]; diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index c5d9eb8..93ab962 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -67,7 +67,7 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { isolation_mz = pasef_frame.isolation_mz; isolation_width = pasef_frame.isolation_width; let frame_index: usize = pasef_frame.frame - 1; - let frame = self.frame_reader.get(frame_index); + let frame = self.frame_reader.get(frame_index).unwrap(); if frame.intensities.len() == 0 { continue; } diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 493152b..6033bfe 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -56,7 +56,7 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { let scan_start = quad_settings.scan_starts[0]; let scan_end = quad_settings.scan_ends[0]; let frame_index = quad_settings.index - 1; - let frame = self.frame_reader.get(frame_index); + let frame = self.frame_reader.get(frame_index).unwrap(); let offset_start = frame.scan_offsets[scan_start] as usize; let offset_end = frame.scan_offsets[scan_end] as usize; let tof_indices = &frame.tof_indices[offset_start..offset_end]; From f9836f447674639c7563ce85708ed628c46480f7 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 13:47:04 +0200 Subject: [PATCH 17/69] FEAT: removed final unwrap from tdfblobreader --- src/io/readers/file_readers/tdf_blob_reader.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index a6f6e93..c562eff 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -59,7 +59,7 @@ impl TdfBlobReader { }, )?; let byte_count = - u32::from_le_bytes(raw_byte_count.try_into().unwrap()) as usize; + u32::from_le_bytes(raw_byte_count.try_into()?) as usize; Ok(byte_count) } @@ -135,11 +135,6 @@ pub enum TdfBlobReaderError { end: usize, length: usize, }, - - #[error("Index {0} is invalid for file {1}")] - Index(usize, PathBuf), - #[error("Offset {0} is invalid for file {1}")] - Offset(usize, PathBuf), - #[error("Byte count {0} from offset {1} is invalid for file {2}")] - ByteCount(usize, usize, PathBuf), + #[error("{0}")] + TryFromSliceError(#[from] std::array::TryFromSliceError), } From 66fc4eb51cd44b444f45b66727d945eb517a59d8 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 14:55:26 +0200 Subject: [PATCH 18/69] FEAT: cleaned up tdf blob errors --- .../readers/file_readers/tdf_blob_reader.rs | 94 ++++++++----------- .../file_readers/tdf_blob_reader/tdf_blobs.rs | 21 ++--- src/io/readers/frame_reader.rs | 18 ++-- src/io/readers/spectrum_reader/minitdf.rs | 2 +- 4 files changed, 53 insertions(+), 82 deletions(-) diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index c562eff..7cecc35 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -3,7 +3,7 @@ mod tdf_blobs; use memmap2::Mmap; use std::fs::File; use std::io; -use std::path::{Path, PathBuf}; +use std::path::Path; pub use tdf_blobs::*; use zstd::decode_all; @@ -12,7 +12,6 @@ const HEADER_SIZE: usize = 2; #[derive(Debug)] pub struct TdfBlobReader { - path: PathBuf, mmap: Mmap, global_file_offset: usize, } @@ -26,61 +25,43 @@ impl TdfBlobReader { let file = File::open(&path)?; let mmap = unsafe { Mmap::map(&file)? }; let reader = Self { - path, mmap, global_file_offset: 0, }; Ok(reader) } - pub fn get_blob( - &self, - offset: usize, - ) -> Result { + pub fn get(&self, offset: usize) -> Result { let offset = self.global_file_offset + offset; - let byte_count = self.get_byte_count(offset)?; - let compressed_bytes = self.get_compressed_bytes(offset, byte_count)?; - let bytes = decode_all(compressed_bytes)?; + let byte_count = self + .get_byte_count(offset) + .ok_or(TdfBlobReaderError::InvalidOffset)?; + let compressed_bytes = self + .get_compressed_bytes(offset, byte_count) + .ok_or(TdfBlobReaderError::CorruptData)?; + let bytes = decode_all(compressed_bytes) + .map_err(|_| TdfBlobReaderError::Decompression)?; let blob = TdfBlob::new(bytes)?; Ok(blob) } - fn get_byte_count( - &self, - offset: usize, - ) -> Result { + fn get_byte_count(&self, offset: usize) -> Option { let start = offset as usize; let end = (offset + BLOB_TYPE_SIZE) as usize; - let raw_byte_count = self.mmap.get(start..end).ok_or( - TdfBlobReaderError::RangeOutOfBounds { - start, - end, - length: self.mmap.len(), - }, - )?; + let raw_byte_count = self.mmap.get(start..end)?; let byte_count = - u32::from_le_bytes(raw_byte_count.try_into()?) as usize; - Ok(byte_count) + u32::from_le_bytes(raw_byte_count.try_into().ok()?) as usize; + Some(byte_count) } fn get_compressed_bytes( &self, offset: usize, byte_count: usize, - ) -> Result<&[u8], TdfBlobReaderError> { + ) -> Option<&[u8]> { let start = offset + HEADER_SIZE * BLOB_TYPE_SIZE; let end = offset + byte_count; - self.mmap - .get(start..end) - .ok_or(TdfBlobReaderError::RangeOutOfBounds { - start, - end, - length: self.mmap.len(), - }) - } - - pub fn len(&self) -> usize { - self.mmap.len() + self.mmap.get(start..end) } } @@ -103,21 +84,16 @@ impl IndexedTdfBlobReader { Ok(reader) } - pub fn get_blob( + pub fn get( &self, index: usize, - ) -> Result { - let offset = *self.binary_offsets.get(index).ok_or( - TdfBlobReaderError::IndexOutOfBounds { - index, - length: self.binary_offsets.len(), - }, - )?; - self.blob_reader.get_blob(offset) - } - - pub fn len(&self) -> usize { - self.binary_offsets.len() + ) -> Result { + let offset = *self + .binary_offsets + .get(index) + .ok_or(IndexedTdfBlobReaderError::InvalidIndex)?; + let blob = self.blob_reader.get(offset)?; + Ok(blob) } } @@ -127,14 +103,18 @@ pub enum TdfBlobReaderError { IO(#[from] io::Error), #[error("{0}")] TdfBlob(#[from] TdfBlobError), - #[error("Index {index} out of bounds for length {length})")] - IndexOutOfBounds { index: usize, length: usize }, - #[error("Range [{start}-{end}] out of bounds for length {length})")] - RangeOutOfBounds { - start: usize, - end: usize, - length: usize, - }, + #[error("Data is corrupt")] + CorruptData, + #[error("Decompression fails")] + Decompression, + #[error("Invalid offset")] + InvalidOffset, +} + +#[derive(Debug, thiserror::Error)] +pub enum IndexedTdfBlobReaderError { #[error("{0}")] - TryFromSliceError(#[from] std::array::TryFromSliceError), + TdfBlobReaderError(#[from] TdfBlobReaderError), + #[error("Invalid index")] + InvalidIndex, } diff --git a/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs b/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs index 4dbf945..6445244 100644 --- a/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs +++ b/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs @@ -8,22 +8,17 @@ pub struct TdfBlob { impl TdfBlob { pub fn new(bytes: Vec) -> Result { if bytes.len() % BLOB_TYPE_SIZE != 0 { - Err(TdfBlobError::InvalidLength { - length: bytes.len(), - }) + Err(TdfBlobError(bytes.len())) } else { Ok(Self { bytes }) } } - pub fn get(&self, index: usize) -> Result { + pub fn get(&self, index: usize) -> Option { if index >= self.len() { - Err(TdfBlobError::IndexOutOfBounds { - length: self.len(), - index, - }) + None } else { - Ok(Self::concatenate_bytes( + Some(Self::concatenate_bytes( self.bytes[index], self.bytes[index + self.len()], self.bytes[index + 2 * self.len()], @@ -49,9 +44,5 @@ impl TdfBlob { } #[derive(Debug, thiserror::Error)] -pub enum TdfBlobError { - #[error("Length {length} not a multiple of {BLOB_TYPE_SIZE}")] - InvalidLength { length: usize }, - #[error("Index {index} out of bounds for length {length})")] - IndexOutOfBounds { index: usize, length: usize }, -} +#[error("Length {0} is not a multiple of {BLOB_TYPE_SIZE}")] +pub struct TdfBlobError(usize); diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 39c2026..c5bac24 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -85,11 +85,11 @@ impl FrameReader { .map(move |x| self.get(x).unwrap()) } - pub fn get(&self, index: usize) -> Result { + pub fn get(&self, index: usize) -> Option { let mut frame: Frame = Frame::default(); let sql_frame = &self.sql_frames[index]; frame.index = sql_frame.id; - let blob = self.tdf_bin_reader.get_blob(sql_frame.binary_offset)?; + let blob = self.tdf_bin_reader.get(sql_frame.binary_offset).ok()?; let scan_count: usize = blob.get(0)? as usize; let peak_count: usize = (blob.len() - scan_count) / 2; frame.scan_offsets = read_scan_offsets(scan_count, peak_count, &blob)?; @@ -112,7 +112,7 @@ impl FrameReader { frame.quadrupole_settings = self.quadrupole_settings[window_group as usize - 1].clone(); } - Ok(frame) + Some(frame) } pub fn get_all(&self) -> Vec { @@ -144,7 +144,7 @@ fn read_scan_offsets( scan_count: usize, peak_count: usize, blob: &TdfBlob, -) -> Result, FrameReaderError> { +) -> Option> { let mut scan_offsets: Vec = Vec::with_capacity(scan_count + 1); scan_offsets.push(0); for scan_index in 0..scan_count - 1 { @@ -153,20 +153,20 @@ fn read_scan_offsets( scan_offsets.push(scan_offsets[scan_index] + scan_size); } scan_offsets.push(peak_count); - Ok(scan_offsets) + Some(scan_offsets) } fn read_intensities( scan_count: usize, peak_count: usize, blob: &TdfBlob, -) -> Result, FrameReaderError> { +) -> Option> { let mut intensities: Vec = Vec::with_capacity(peak_count); for peak_index in 0..peak_count { let index: usize = scan_count + 1 + 2 * peak_index; intensities.push(blob.get(index)?); } - Ok(intensities) + Some(intensities) } fn read_tof_indices( @@ -174,7 +174,7 @@ fn read_tof_indices( peak_count: usize, blob: &TdfBlob, scan_offsets: &Vec, -) -> Result, FrameReaderError> { +) -> Option> { let mut tof_indices: Vec = Vec::with_capacity(peak_count); for scan_index in 0..scan_count { let start_offset: usize = scan_offsets[scan_index]; @@ -187,7 +187,7 @@ fn read_tof_indices( tof_indices.push(current_sum - 1); } } - Ok(tof_indices) + Some(tof_indices) } #[derive(Debug, thiserror::Error)] diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 3b89f2c..ea622dc 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -58,7 +58,7 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { fn get(&self, index: usize) -> Spectrum { let mut spectrum = Spectrum::default(); spectrum.index = index; - let blob = self.blob_reader.get_blob(index).unwrap(); + let blob = self.blob_reader.get(index).unwrap(); if !blob.is_empty() { let size: usize = blob.len(); let spectrum_data: Vec = From a948d3ceb3090b8f41049c0a9b56fe2aeb4e6c5d Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 14:57:01 +0200 Subject: [PATCH 19/69] CHORE: renamed to u32size in tdf blob reader --- src/io/readers/file_readers/tdf_blob_reader.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index 7cecc35..55876ac 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -7,7 +7,7 @@ use std::path::Path; pub use tdf_blobs::*; use zstd::decode_all; -const BLOB_TYPE_SIZE: usize = std::mem::size_of::(); +const U32_SIZE: usize = std::mem::size_of::(); const HEADER_SIZE: usize = 2; #[derive(Debug)] @@ -47,7 +47,7 @@ impl TdfBlobReader { fn get_byte_count(&self, offset: usize) -> Option { let start = offset as usize; - let end = (offset + BLOB_TYPE_SIZE) as usize; + let end = (offset + U32_SIZE) as usize; let raw_byte_count = self.mmap.get(start..end)?; let byte_count = u32::from_le_bytes(raw_byte_count.try_into().ok()?) as usize; @@ -59,7 +59,7 @@ impl TdfBlobReader { offset: usize, byte_count: usize, ) -> Option<&[u8]> { - let start = offset + HEADER_SIZE * BLOB_TYPE_SIZE; + let start = offset + HEADER_SIZE * U32_SIZE; let end = offset + byte_count; self.mmap.get(start..end) } From 73d08ee26da559776a0559cf7369a9970b1ac6f3 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 15:03:46 +0200 Subject: [PATCH 20/69] CHORE: cleaner sqlerror --- src/io/readers/file_readers/sql_reader.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index 106e077..6da8a67 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -96,5 +96,5 @@ impl ParseDefault for rusqlite::Row<'_> { } #[derive(thiserror::Error, Debug)] -#[error("SqlError: {0}")] +#[error("{0}")] pub struct SqlError(#[from] rusqlite::Error); From 3ac2aaff04f7f0eeef90a0e33346d97057f8c4d9 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 15:12:46 +0200 Subject: [PATCH 21/69] DOCS: parquet error --- src/io/readers/file_readers/parquet_reader.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/readers/file_readers/parquet_reader.rs b/src/io/readers/file_readers/parquet_reader.rs index c881761..a5c6927 100644 --- a/src/io/readers/file_readers/parquet_reader.rs +++ b/src/io/readers/file_readers/parquet_reader.rs @@ -38,7 +38,7 @@ pub trait ReadableParquetTable { #[derive(Debug, thiserror::Error)] pub enum ParquetError { - #[error("Cannot read file {0}")] + #[error("{0}")] IO(#[from] io::Error), #[error("Cannot iterate over row {0}")] ParquetIO(#[from] parquet::errors::ParquetError), From 125bdec9b01d83cf541075d9685da8dabdd87d3d Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 15:33:43 +0200 Subject: [PATCH 22/69] CHORE: improced error propagation on framereader --- benches/speed_performance.rs | 18 +++---- src/io/readers/frame_reader.rs | 75 ++++++++++++++++----------- src/io/readers/spectrum_reader/tdf.rs | 2 +- tests/frame_readers.rs | 11 ++-- 4 files changed, 60 insertions(+), 46 deletions(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index 4bd5a4e..d9bd813 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -13,21 +13,15 @@ const SYP_TEST: &str = "/mnt/c/Users/Sander.Willems/Documents/data/20230505_TIMS05_PaSk_SA_HeLa_6min_syP_5scans_30Da_S1-D4_1_2330.d/"; fn read_all_frames(frame_reader: &FrameReader) { - frame_reader - .parallel_filter(|x| true) - .collect::>(); + frame_reader.get_all(); } fn read_all_ms1_frames(frame_reader: &FrameReader) { - frame_reader - .parallel_filter(|x| x.msms_type == 0) - .collect::>(); + frame_reader.get_all_ms1(); } fn read_all_ms2_frames(frame_reader: &FrameReader) { - frame_reader - .parallel_filter(|x| x.msms_type != 0) - .collect::>(); + frame_reader.get_all_ms2(); } fn read_all_spectra(spectrum_reader: &SpectrumReader) { @@ -39,7 +33,7 @@ fn criterion_benchmark_dda(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; - let frame_reader = FrameReader::new(d_folder_name); + let frame_reader = FrameReader::new(d_folder_name).unwrap(); let spectrum_reader = SpectrumReader::new(d_folder_name); group.bench_function("DDA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) @@ -61,7 +55,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DIA_TEST; - let frame_reader = FrameReader::new(d_folder_name); + let frame_reader = FrameReader::new(d_folder_name).unwrap(); let spectrum_reader = SpectrumReader::new(d_folder_name); group.bench_function("DIA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) @@ -80,7 +74,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = SYP_TEST; - let frame_reader = FrameReader::new(d_folder_name); + let frame_reader = FrameReader::new(d_folder_name).unwrap(); let spectrum_reader = SpectrumReader::new(d_folder_name); group.bench_function("SYP read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index c5bac24..f4bd239 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -15,7 +15,7 @@ use super::{ file_readers::{ sql_reader::{ frame_groups::SqlWindowGroup, frames::SqlFrame, ReadableSqlTable, - SqlReader, + SqlError, SqlReader, }, tdf_blob_reader::{ TdfBlob, TdfBlobError, TdfBlobReader, TdfBlobReaderError, @@ -35,12 +35,16 @@ pub struct FrameReader { } impl FrameReader { - pub fn new(path: impl AsRef) -> Self { - let sql_path = find_extension(&path, "analysis.tdf").unwrap(); - let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); - let sql_frames = SqlFrame::from_sql_reader(&tdf_sql_reader).unwrap(); - let bin_path = find_extension(&path, "analysis.tdf_bin").unwrap(); - let tdf_bin_reader = TdfBlobReader::new(bin_path).unwrap(); + pub fn new(path: impl AsRef) -> Result { + let sql_path = find_extension(&path, "analysis.tdf").ok_or( + FrameReaderError::FileNotFound("analysis.tdf".to_string()), + )?; + let tdf_sql_reader = SqlReader::open(sql_path)?; + let sql_frames = SqlFrame::from_sql_reader(&tdf_sql_reader)?; + let bin_path = find_extension(&path, "analysis.tdf_bin").ok_or( + FrameReaderError::FileNotFound("analysis.tdf_bin".to_string()), + )?; + let tdf_bin_reader = TdfBlobReader::new(bin_path)?; let acquisition = if sql_frames.iter().any(|x| x.msms_type == 8) { AcquisitionType::DDAPASEF } else if sql_frames.iter().any(|x| x.msms_type == 9) { @@ -52,7 +56,7 @@ impl FrameReader { let quadrupole_settings; if acquisition == AcquisitionType::DIAPASEF { for window_group in - SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap() + SqlWindowGroup::from_sql_reader(&tdf_sql_reader)? { window_groups[window_group.frame - 1] = window_group.window_group; @@ -62,7 +66,7 @@ impl FrameReader { } else { quadrupole_settings = vec![]; } - Self { + let reader = Self { path: path.as_ref().to_path_buf(), tdf_bin_reader, sql_frames, @@ -72,25 +76,28 @@ impl FrameReader { .into_iter() .map(|x| Arc::new(x)) .collect(), - } + }; + Ok(reader) } pub fn parallel_filter<'a, F: Fn(&SqlFrame) -> bool + Sync + Send + 'a>( &'a self, predicate: F, - ) -> impl ParallelIterator + 'a { + ) -> impl ParallelIterator> + 'a + { (0..self.len()) .into_par_iter() .filter(move |x| predicate(&self.sql_frames[*x])) - .map(move |x| self.get(x).unwrap()) + .map(move |x| self.get(x)) } - pub fn get(&self, index: usize) -> Option { + pub fn get(&self, index: usize) -> Result { let mut frame: Frame = Frame::default(); let sql_frame = &self.sql_frames[index]; frame.index = sql_frame.id; - let blob = self.tdf_bin_reader.get(sql_frame.binary_offset).ok()?; - let scan_count: usize = blob.get(0)? as usize; + let blob = self.tdf_bin_reader.get(sql_frame.binary_offset)?; + let scan_count: usize = + blob.get(0).ok_or(FrameReaderError::CorruptFrame)? as usize; let peak_count: usize = (blob.len() - scan_count) / 2; frame.scan_offsets = read_scan_offsets(scan_count, peak_count, &blob)?; frame.intensities = read_intensities(scan_count, peak_count, &blob)?; @@ -112,18 +119,18 @@ impl FrameReader { frame.quadrupole_settings = self.quadrupole_settings[window_group as usize - 1].clone(); } - Some(frame) + Ok(frame) } - pub fn get_all(&self) -> Vec { + pub fn get_all(&self) -> Vec> { self.parallel_filter(|_| true).collect() } - pub fn get_all_ms1(&self) -> Vec { + pub fn get_all_ms1(&self) -> Vec> { self.parallel_filter(|x| x.msms_type == 0).collect() } - pub fn get_all_ms2(&self) -> Vec { + pub fn get_all_ms2(&self) -> Vec> { self.parallel_filter(|x| x.msms_type != 0).collect() } @@ -144,29 +151,32 @@ fn read_scan_offsets( scan_count: usize, peak_count: usize, blob: &TdfBlob, -) -> Option> { +) -> Result, FrameReaderError> { let mut scan_offsets: Vec = Vec::with_capacity(scan_count + 1); scan_offsets.push(0); for scan_index in 0..scan_count - 1 { let index = scan_index + 1; - let scan_size: usize = (blob.get(index)? / 2) as usize; + let scan_size: usize = + (blob.get(index).ok_or(FrameReaderError::CorruptFrame)? / 2) + as usize; scan_offsets.push(scan_offsets[scan_index] + scan_size); } scan_offsets.push(peak_count); - Some(scan_offsets) + Ok(scan_offsets) } fn read_intensities( scan_count: usize, peak_count: usize, blob: &TdfBlob, -) -> Option> { +) -> Result, FrameReaderError> { let mut intensities: Vec = Vec::with_capacity(peak_count); for peak_index in 0..peak_count { let index: usize = scan_count + 1 + 2 * peak_index; - intensities.push(blob.get(index)?); + intensities + .push(blob.get(index).ok_or(FrameReaderError::CorruptFrame)?); } - Some(intensities) + Ok(intensities) } fn read_tof_indices( @@ -174,7 +184,7 @@ fn read_tof_indices( peak_count: usize, blob: &TdfBlob, scan_offsets: &Vec, -) -> Option> { +) -> Result, FrameReaderError> { let mut tof_indices: Vec = Vec::with_capacity(peak_count); for scan_index in 0..scan_count { let start_offset: usize = scan_offsets[scan_index]; @@ -182,18 +192,23 @@ fn read_tof_indices( let mut current_sum: u32 = 0; for peak_index in start_offset..end_offset { let index = scan_count + 2 * peak_index; - let tof_index: u32 = blob.get(index)?; + let tof_index: u32 = + blob.get(index).ok_or(FrameReaderError::CorruptFrame)?; current_sum += tof_index; tof_indices.push(current_sum - 1); } } - Some(tof_indices) + Ok(tof_indices) } #[derive(Debug, thiserror::Error)] pub enum FrameReaderError { #[error("{0}")] - TdfBlob(#[from] TdfBlobError), + TdfBlobReaderError(#[from] TdfBlobReaderError), + #[error("{0}")] + FileNotFound(String), #[error("{0}")] - TdfBlobReader(#[from] TdfBlobReaderError), + SqlError(#[from] SqlError), + #[error("Corrupt Frame")] + CorruptFrame, } diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 58ffcd6..93b80be 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -32,7 +32,7 @@ pub struct TDFSpectrumReader { impl TDFSpectrumReader { pub fn new(path_name: impl AsRef) -> Self { - let frame_reader: FrameReader = FrameReader::new(&path_name); + let frame_reader: FrameReader = FrameReader::new(&path_name).unwrap(); let sql_path = find_extension(&path_name, "analysis.tdf").unwrap(); let metadata = MetadataReader::new(&sql_path); let mz_reader: Tof2MzConverter = metadata.mz_converter; diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 21ac3eb..c67d7ab 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,4 +1,3 @@ -use rayon::iter::ParallelIterator; use std::{path::Path, sync::Arc}; use timsrust::{ io::readers::FrameReader, @@ -20,7 +19,10 @@ fn tdf_reader_frames1() { .unwrap() .to_string(); let frames: Vec = FrameReader::new(&file_path) - .parallel_filter(|x| x.msms_type == 0) + .unwrap() + .get_all_ms1() + .into_iter() + .map(|x| x.unwrap()) .collect(); let expected: Vec = vec![ Frame { @@ -64,7 +66,10 @@ fn tdf_reader_frames2() { .unwrap() .to_string(); let frames: Vec = FrameReader::new(&file_path) - .parallel_filter(|x| x.msms_type != 0) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) .collect(); let expected: Vec = vec![ // Frame::default(), From f5f91ad3ec29caa68622523f750fad133f7faef9 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 15:35:20 +0200 Subject: [PATCH 23/69] CHORE: cargo fmt --- src/io/readers/frame_reader.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index f4bd239..78c6e85 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -17,9 +17,7 @@ use super::{ frame_groups::SqlWindowGroup, frames::SqlFrame, ReadableSqlTable, SqlError, SqlReader, }, - tdf_blob_reader::{ - TdfBlob, TdfBlobError, TdfBlobReader, TdfBlobReaderError, - }, + tdf_blob_reader::{TdfBlob, TdfBlobReader, TdfBlobReaderError}, }, QuadrupoleSettingsReader, }; From e32b84407c94253dc74ae4201c11e896e20147f5 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 16:04:21 +0200 Subject: [PATCH 24/69] CHORE: metadata reader propagates errors --- src/io/readers/file_readers/sql_reader.rs | 6 +- src/io/readers/metadata_reader.rs | 125 ++++++++++++--------- src/io/readers/precursor_reader/tdf/dda.rs | 2 +- src/io/readers/precursor_reader/tdf/dia.rs | 2 +- src/io/readers/spectrum_reader/tdf.rs | 2 +- 5 files changed, 79 insertions(+), 58 deletions(-) diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index 6da8a67..279aa06 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -58,7 +58,11 @@ pub trait ReadableSqlTable { let mut stmt = reader.connection.prepare(&query)?; let rows = stmt.query_map([], |row| Ok(Self::from_sql_row(row)))?; let result = rows.collect::, _>>()?; - Ok(result) + if result.len() == 0 { + Err(SqlError(rusqlite::Error::QueryReturnedNoRows)) + } else { + Ok(result) + } } } diff --git a/src/io/readers/metadata_reader.rs b/src/io/readers/metadata_reader.rs index a439239..1e47668 100644 --- a/src/io/readers/metadata_reader.rs +++ b/src/io/readers/metadata_reader.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, path::Path}; +use std::{collections::HashMap, fmt::Debug, path::Path, str::FromStr}; use crate::{ domain_converters::{Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}, @@ -6,7 +6,7 @@ use crate::{ }; use super::file_readers::sql_reader::{ - metadata::SqlMetadata, ReadableSqlHashMap, SqlReader, + metadata::SqlMetadata, ReadableSqlHashMap, SqlError, SqlReader, }; const OTOF_CONTROL: &str = "Bruker otofControl"; @@ -14,75 +14,92 @@ const OTOF_CONTROL: &str = "Bruker otofControl"; pub struct MetadataReader; impl MetadataReader { - pub fn new(path: impl AsRef) -> Metadata { + pub fn new( + path: impl AsRef, + ) -> Result { let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); + let tdf_sql_reader = SqlReader::open(&sql_path)?; let sql_metadata: HashMap = - SqlMetadata::from_sql_reader(&tdf_sql_reader).unwrap(); - let compression_type = sql_metadata - .get("TimsCompressionType") - .unwrap() - .parse() - .unwrap(); - Metadata { + SqlMetadata::from_sql_reader(&tdf_sql_reader)?; + let compression_type = + parse_value(&sql_metadata, "TimsCompressionType")?; + let metadata = Metadata { path: path.as_ref().to_path_buf(), - rt_converter: get_rt_converter(&tdf_sql_reader), - im_converter: get_im_converter(&sql_metadata, &tdf_sql_reader), - mz_converter: get_mz_converter(&sql_metadata), + rt_converter: get_rt_converter(&tdf_sql_reader)?, + im_converter: get_im_converter(&sql_metadata, &tdf_sql_reader)?, + mz_converter: get_mz_converter(&sql_metadata)?, compression_type, - } + }; + Ok(metadata) } } -fn get_rt_converter(tdf_sql_reader: &SqlReader) -> Frame2RtConverter { - let rt_values: Vec = tdf_sql_reader - .read_column_from_table("Time", "Frames") - .unwrap(); - Frame2RtConverter::from_values(rt_values) +fn get_rt_converter( + tdf_sql_reader: &SqlReader, +) -> Result { + let rt_values: Vec = + tdf_sql_reader.read_column_from_table("Time", "Frames")?; + Ok(Frame2RtConverter::from_values(rt_values)) } -fn get_mz_converter(sql_metadata: &HashMap) -> Tof2MzConverter { - let software = sql_metadata.get("AcquisitionSoftware").unwrap(); - let tof_max_index: u32 = sql_metadata - .get("DigitizerNumSamples") - .unwrap() - .parse() - .unwrap(); - let mut mz_min: f64 = sql_metadata - .get("MzAcqRangeLower") - .unwrap() - .parse() - .unwrap(); - let mut mz_max: f64 = sql_metadata - .get("MzAcqRangeUpper") - .unwrap() - .parse() - .unwrap(); +fn get_mz_converter( + sql_metadata: &HashMap, +) -> Result { + let software = sql_metadata.get("AcquisitionSoftware").ok_or( + MetadataReaderError::KeyNotFound("AcquisitionSoftware".to_string()), + )?; + let tof_max_index: u32 = parse_value(sql_metadata, "DigitizerNumSamples")?; + let mut mz_min: f64 = parse_value(sql_metadata, "MzAcqRangeLower")?; + let mut mz_max: f64 = parse_value(sql_metadata, "MzAcqRangeUpper")?; if software == OTOF_CONTROL { mz_min -= 5.0; mz_max += 5.0; } - Tof2MzConverter::from_boundaries(mz_min, mz_max, tof_max_index) + Ok(Tof2MzConverter::from_boundaries( + mz_min, + mz_max, + tof_max_index, + )) } fn get_im_converter( sql_metadata: &HashMap, tdf_sql_reader: &SqlReader, -) -> Scan2ImConverter { - let scan_counts: Vec = tdf_sql_reader - .read_column_from_table("NumScans", "Frames") - .unwrap(); - let scan_max_index = *scan_counts.iter().max().unwrap(); - // let scan_max_index = 927; - let im_min: f64 = sql_metadata - .get("OneOverK0AcqRangeLower") - .unwrap() - .parse() - .unwrap(); - let im_max: f64 = sql_metadata - .get("OneOverK0AcqRangeUpper") - .unwrap() +) -> Result { + let scan_counts: Vec = + tdf_sql_reader.read_column_from_table("NumScans", "Frames")?; + let scan_max_index = *scan_counts.iter().max().unwrap(); // SqlReader cannot return empty vecs, so always succeeds + let im_min: f64 = parse_value(sql_metadata, "OneOverK0AcqRangeLower")?; + let im_max: f64 = parse_value(sql_metadata, "OneOverK0AcqRangeUpper")?; + Ok(Scan2ImConverter::from_boundaries( + im_min, + im_max, + scan_max_index, + )) +} + +fn parse_value( + hash_map: &HashMap, + key: &str, +) -> Result { + let value: T = hash_map + .get(key) + .ok_or(MetadataReaderError::KeyNotFound(key.to_string()))? .parse() - .unwrap(); - Scan2ImConverter::from_boundaries(im_min, im_max, scan_max_index) + .map_err(|_| MetadataReaderError::ParseError(key.to_string()))?; + Ok(value) +} + +#[derive(Debug, thiserror::Error)] +pub enum MetadataReaderError { + // #[error("{0}")] + // TdfBlobReaderError(#[from] TdfBlobReaderError), + // #[error("{0}")] + // FileNotFound(String), + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("Key not found: {0}")] + KeyNotFound(String), + #[error("Key not parsable: {0}")] + ParseError(String), } diff --git a/src/io/readers/precursor_reader/tdf/dda.rs b/src/io/readers/precursor_reader/tdf/dda.rs index 763307a..f7b0e2f 100644 --- a/src/io/readers/precursor_reader/tdf/dda.rs +++ b/src/io/readers/precursor_reader/tdf/dda.rs @@ -27,7 +27,7 @@ impl DDATDFPrecursorReader { pub fn new(path: impl AsRef) -> Self { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); - let metadata = MetadataReader::new(&path); + let metadata = MetadataReader::new(&path).unwrap(); let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; let sql_precursors = diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index d604769..e8463c5 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -27,7 +27,7 @@ impl DIATDFPrecursorReader { pub fn new(path: impl AsRef) -> Self { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); - let metadata = MetadataReader::new(&path); + let metadata = MetadataReader::new(&path).unwrap(); let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; let window_groups = diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 93b80be..b42594e 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -34,7 +34,7 @@ impl TDFSpectrumReader { pub fn new(path_name: impl AsRef) -> Self { let frame_reader: FrameReader = FrameReader::new(&path_name).unwrap(); let sql_path = find_extension(&path_name, "analysis.tdf").unwrap(); - let metadata = MetadataReader::new(&sql_path); + let metadata = MetadataReader::new(&sql_path).unwrap(); let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); let precursor_reader = PrecursorReader::new(&sql_path); From 05982bb5af586865ee60a94b4d46f124f3edb756 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 16:38:55 +0200 Subject: [PATCH 25/69] FIX: better error info for tdf blob reader --- src/io/readers/file_readers/tdf_blob_reader.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index 55876ac..1c2fe97 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -35,7 +35,7 @@ impl TdfBlobReader { let offset = self.global_file_offset + offset; let byte_count = self .get_byte_count(offset) - .ok_or(TdfBlobReaderError::InvalidOffset)?; + .ok_or(TdfBlobReaderError::InvalidOffset(offset))?; let compressed_bytes = self .get_compressed_bytes(offset, byte_count) .ok_or(TdfBlobReaderError::CorruptData)?; @@ -91,7 +91,7 @@ impl IndexedTdfBlobReader { let offset = *self .binary_offsets .get(index) - .ok_or(IndexedTdfBlobReaderError::InvalidIndex)?; + .ok_or(IndexedTdfBlobReaderError::InvalidIndex(index))?; let blob = self.blob_reader.get(offset)?; Ok(blob) } @@ -107,14 +107,14 @@ pub enum TdfBlobReaderError { CorruptData, #[error("Decompression fails")] Decompression, - #[error("Invalid offset")] - InvalidOffset, + #[error("Invalid offset {0}")] + InvalidOffset(usize), } #[derive(Debug, thiserror::Error)] pub enum IndexedTdfBlobReaderError { #[error("{0}")] TdfBlobReaderError(#[from] TdfBlobReaderError), - #[error("Invalid index")] - InvalidIndex, + #[error("Invalid index {0}")] + InvalidIndex(usize), } From 6753a4659a0fed0a8fabf2b749d7be608338bee9 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 16:57:57 +0200 Subject: [PATCH 26/69] FEAT: implemented error propagation for precursor reader --- src/io/readers/precursor_reader.rs | 32 +++++++------- src/io/readers/precursor_reader/minitdf.rs | 49 +++++++++++----------- src/io/readers/precursor_reader/tdf.rs | 48 +++++++++++++-------- src/io/readers/precursor_reader/tdf/dda.rs | 43 ++++++++++--------- src/io/readers/precursor_reader/tdf/dia.rs | 43 ++++++++++--------- src/io/readers/spectrum_reader/minitdf.rs | 9 ++-- src/io/readers/spectrum_reader/tdf.rs | 10 +++-- 7 files changed, 131 insertions(+), 103 deletions(-) diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index 23a804b..5544dce 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -2,10 +2,10 @@ mod minitdf; mod tdf; use core::fmt; -use std::path::{Path, PathBuf}; +use std::path::Path; -use minitdf::MiniTDFPrecursorReader; -use tdf::TDFPrecursorReader; +use minitdf::{MiniTDFPrecursorReader, MiniTDFPrecursorReaderError}; +use tdf::{TDFPrecursorReader, TDFPrecursorReaderError}; use crate::ms_data::Precursor; @@ -20,31 +20,35 @@ impl fmt::Debug for PrecursorReader { } impl PrecursorReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new(path: impl AsRef) -> Result { let precursor_reader: Box = match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)), - Some("tdf") => Box::new(TDFPrecursorReader::new(path)), + Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)?), + Some("tdf") => Box::new(TDFPrecursorReader::new(path)?), _ => panic!(), }; - Self { precursor_reader } + let reader = Self { precursor_reader }; + Ok(reader) } - pub fn get(&self, index: usize) -> Precursor { + pub fn get(&self, index: usize) -> Option { self.precursor_reader.get(index) } - pub fn get_path(&self) -> PathBuf { - self.precursor_reader.get_path() - } - pub fn len(&self) -> usize { self.precursor_reader.len() } } trait PrecursorReaderTrait: Sync { - fn get(&self, index: usize) -> Precursor; - fn get_path(&self) -> PathBuf; + fn get(&self, index: usize) -> Option; fn len(&self) -> usize; } + +#[derive(Debug, thiserror::Error)] +pub enum PrecursorReaderError { + #[error("{0}")] + MiniTDFPrecursorReaderError(#[from] MiniTDFPrecursorReaderError), + #[error("{0}")] + TDFPrecursorReaderError(#[from] TDFPrecursorReaderError), +} diff --git a/src/io/readers/precursor_reader/minitdf.rs b/src/io/readers/precursor_reader/minitdf.rs index 0d5ee06..be11ee0 100644 --- a/src/io/readers/precursor_reader/minitdf.rs +++ b/src/io/readers/precursor_reader/minitdf.rs @@ -1,8 +1,8 @@ -use std::path::{Path, PathBuf}; +use std::path::Path; use crate::{ io::readers::file_readers::parquet_reader::{ - precursors::ParquetPrecursor, ReadableParquetTable, + precursors::ParquetPrecursor, ParquetError, ReadableParquetTable, }, ms_data::Precursor, }; @@ -11,40 +11,39 @@ use super::PrecursorReaderTrait; #[derive(Debug)] pub struct MiniTDFPrecursorReader { - path: PathBuf, parquet_precursors: Vec, } impl MiniTDFPrecursorReader { - pub fn new(path: impl AsRef) -> Self { - let parquet_precursors = - ParquetPrecursor::from_parquet_file(&path).unwrap(); - Self { - path: path.as_ref().to_path_buf(), - parquet_precursors, - } + pub fn new( + path: impl AsRef, + ) -> Result { + let parquet_precursors = ParquetPrecursor::from_parquet_file(&path)?; + let reader = Self { parquet_precursors }; + Ok(reader) } } impl PrecursorReaderTrait for MiniTDFPrecursorReader { - fn get(&self, index: usize) -> Precursor { - let x = &self.parquet_precursors[index]; - Precursor { - mz: x.mz, - rt: x.rt, - im: x.im, - charge: Some(x.charge), - intensity: Some(x.intensity), - index: x.index, - frame_index: x.frame_index, - } + fn get(&self, index: usize) -> Option { + let parquet_precursor = &self.parquet_precursors.get(index)?; + let precursor = Precursor { + mz: parquet_precursor.mz, + rt: parquet_precursor.rt, + im: parquet_precursor.im, + charge: Some(parquet_precursor.charge), + intensity: Some(parquet_precursor.intensity), + index: parquet_precursor.index, + frame_index: parquet_precursor.frame_index, + }; + Some(precursor) } fn len(&self) -> usize { self.parquet_precursors.len() } - - fn get_path(&self) -> PathBuf { - self.path.clone() - } } + +#[derive(thiserror::Error, Debug)] +#[error("{0}")] +pub struct MiniTDFPrecursorReaderError(#[from] ParquetError); diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 8619a1e..30e1dd5 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -1,13 +1,13 @@ mod dda; mod dia; -use std::path::{Path, PathBuf}; +use std::path::Path; -use dda::DDATDFPrecursorReader; -use dia::DIATDFPrecursorReader; +use dda::{DDATDFPrecursorReader, DDATDFPrecursorReaderError}; +use dia::{DIATDFPrecursorReader, DIATDFPrecursorReaderError}; use crate::{ - io::readers::file_readers::sql_reader::SqlReader, + io::readers::file_readers::sql_reader::{SqlError, SqlReader}, ms_data::{AcquisitionType, Precursor}, }; @@ -18,12 +18,13 @@ pub struct TDFPrecursorReader { } impl TDFPrecursorReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new( + path: impl AsRef, + ) -> Result { let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); - let sql_frames: Vec = tdf_sql_reader - .read_column_from_table("ScanMode", "Frames") - .unwrap(); + let tdf_sql_reader = SqlReader::open(sql_path)?; + let sql_frames: Vec = + tdf_sql_reader.read_column_from_table("ScanMode", "Frames")?; let acquisition_type = if sql_frames.iter().any(|&x| x == 8) { AcquisitionType::DDAPASEF } else if sql_frames.iter().any(|&x| x == 9) { @@ -34,27 +35,40 @@ impl TDFPrecursorReader { let precursor_reader: Box = match acquisition_type { AcquisitionType::DDAPASEF => { - Box::new(DDATDFPrecursorReader::new(path)) + Box::new(DDATDFPrecursorReader::new(path)?) }, AcquisitionType::DIAPASEF => { - Box::new(DIATDFPrecursorReader::new(path)) + Box::new(DIATDFPrecursorReader::new(path)?) + }, + acquisition_type => { + return Err(TDFPrecursorReaderError::UnknownPrecursorType( + format!("{:?}", acquisition_type), + )) }, - _ => panic!(), }; - Self { precursor_reader } + let reader = Self { precursor_reader }; + Ok(reader) } } impl PrecursorReaderTrait for TDFPrecursorReader { - fn get(&self, index: usize) -> Precursor { + fn get(&self, index: usize) -> Option { self.precursor_reader.get(index) } fn len(&self) -> usize { self.precursor_reader.len() } +} - fn get_path(&self) -> PathBuf { - self.precursor_reader.get_path() - } +#[derive(Debug, thiserror::Error)] +pub enum TDFPrecursorReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + DDATDFPrecursorReaderError(#[from] DDATDFPrecursorReaderError), + #[error("{0}")] + DIATDFPrecursorReaderError(#[from] DIATDFPrecursorReaderError), + #[error("Invalid acquistion type for precursor reader: {0}")] + UnknownPrecursorType(String), } diff --git a/src/io/readers/precursor_reader/tdf/dda.rs b/src/io/readers/precursor_reader/tdf/dda.rs index f7b0e2f..80cf641 100644 --- a/src/io/readers/precursor_reader/tdf/dda.rs +++ b/src/io/readers/precursor_reader/tdf/dda.rs @@ -1,4 +1,4 @@ -use std::path::{Path, PathBuf}; +use std::path::Path; use crate::{ domain_converters::{ @@ -6,9 +6,9 @@ use crate::{ }, io::readers::{ file_readers::sql_reader::{ - precursors::SqlPrecursor, ReadableSqlTable, SqlReader, + precursors::SqlPrecursor, ReadableSqlTable, SqlError, SqlReader, }, - MetadataReader, + MetadataReader, MetadataReaderError, }, ms_data::Precursor, }; @@ -17,36 +17,36 @@ use super::PrecursorReaderTrait; #[derive(Debug)] pub struct DDATDFPrecursorReader { - path: PathBuf, sql_precursors: Vec, rt_converter: Frame2RtConverter, im_converter: Scan2ImConverter, } impl DDATDFPrecursorReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new( + path: impl AsRef, + ) -> Result { let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); - let metadata = MetadataReader::new(&path).unwrap(); + let tdf_sql_reader = SqlReader::open(sql_path)?; + let metadata = MetadataReader::new(&path)?; let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; - let sql_precursors = - SqlPrecursor::from_sql_reader(&tdf_sql_reader).unwrap(); - Self { - path: path.as_ref().to_path_buf(), + let sql_precursors = SqlPrecursor::from_sql_reader(&tdf_sql_reader)?; + let reader = Self { sql_precursors, rt_converter, im_converter, - } + }; + Ok(reader) } } impl PrecursorReaderTrait for DDATDFPrecursorReader { - fn get(&self, index: usize) -> Precursor { - let sql_precursor = &self.sql_precursors[index]; + fn get(&self, index: usize) -> Option { + let sql_precursor = &self.sql_precursors.get(index)?; let frame_id: usize = sql_precursor.precursor_frame; let scan_id: f64 = sql_precursor.scan_average; - Precursor { + let precursor = Precursor { mz: sql_precursor.mz, rt: self.rt_converter.convert(frame_id as u32), im: self.im_converter.convert(scan_id), @@ -54,14 +54,19 @@ impl PrecursorReaderTrait for DDATDFPrecursorReader { intensity: Some(sql_precursor.intensity), index: index + 1, frame_index: frame_id, - } + }; + Some(precursor) } fn len(&self) -> usize { self.sql_precursors.len() } +} - fn get_path(&self) -> PathBuf { - self.path.clone() - } +#[derive(Debug, thiserror::Error)] +pub enum DDATDFPrecursorReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + MetadataReaderError(#[from] MetadataReaderError), } diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index e8463c5..e02a941 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,4 +1,4 @@ -use std::path::{Path, PathBuf}; +use std::path::Path; use crate::{ domain_converters::{ @@ -6,9 +6,9 @@ use crate::{ }, io::readers::{ file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, ReadableSqlTable, SqlReader, + frame_groups::SqlWindowGroup, ReadableSqlTable, SqlError, SqlReader, }, - MetadataReader, QuadrupoleSettingsReader, + MetadataReader, MetadataReaderError, QuadrupoleSettingsReader, }, ms_data::{Precursor, QuadrupoleSettings}, }; @@ -17,21 +17,21 @@ use super::PrecursorReaderTrait; #[derive(Debug)] pub struct DIATDFPrecursorReader { - path: PathBuf, expanded_quadrupole_settings: Vec, rt_converter: Frame2RtConverter, im_converter: Scan2ImConverter, } impl DIATDFPrecursorReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new( + path: impl AsRef, + ) -> Result { let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); - let metadata = MetadataReader::new(&path).unwrap(); + let tdf_sql_reader = SqlReader::open(sql_path)?; + let metadata = MetadataReader::new(&path)?; let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; - let window_groups = - SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); + let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; let quadrupole_settings = QuadrupoleSettingsReader::new(tdf_sql_reader.get_path()); let mut expanded_quadrupole_settings: Vec = vec![]; @@ -51,22 +51,22 @@ impl DIATDFPrecursorReader { expanded_quadrupole_settings.push(sub_quad_settings) } } - Self { - path: path.as_ref().to_path_buf(), + let reader = Self { expanded_quadrupole_settings, rt_converter, im_converter, - } + }; + Ok(reader) } } impl PrecursorReaderTrait for DIATDFPrecursorReader { - fn get(&self, index: usize) -> Precursor { - let quad_settings = &self.expanded_quadrupole_settings[index]; + fn get(&self, index: usize) -> Option { + let quad_settings = &self.expanded_quadrupole_settings.get(index)?; let scan_id = (quad_settings.scan_starts[0] + quad_settings.scan_ends[0]) as f32 / 2.0; - Precursor { + let precursor = Precursor { mz: quad_settings.isolation_mz[0], rt: self.rt_converter.convert(quad_settings.index as u32 - 1), im: self.im_converter.convert(scan_id), @@ -74,14 +74,19 @@ impl PrecursorReaderTrait for DIATDFPrecursorReader { intensity: None, index: index, frame_index: quad_settings.index, - } + }; + Some(precursor) } fn len(&self) -> usize { self.expanded_quadrupole_settings.len() } +} - fn get_path(&self) -> PathBuf { - self.path.clone() - } +#[derive(Debug, thiserror::Error)] +pub enum DIATDFPrecursorReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + MetadataReaderError(#[from] MetadataReaderError), } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index ea622dc..83c50bd 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -6,9 +6,7 @@ use crate::{ parquet_reader::{ precursors::ParquetPrecursor, ReadableParquetTable, }, - tdf_blob_reader::{ - IndexedTdfBlobReader, TdfBlobError, TdfBlobReaderError, - }, + tdf_blob_reader::IndexedTdfBlobReader, }, PrecursorReader, }, @@ -30,7 +28,8 @@ impl MiniTDFSpectrumReader { pub fn new(path: impl AsRef) -> Self { let parquet_file_name = find_extension(&path, "ms2spectrum.parquet").unwrap(); - let precursor_reader = PrecursorReader::new(&parquet_file_name); + let precursor_reader = + PrecursorReader::new(&parquet_file_name).unwrap(); let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name) .unwrap() .iter() @@ -76,7 +75,7 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { intensity_values.iter().map(|&x| x as f64).collect(); spectrum.mz_values = mz_values.to_vec(); } - let precursor = self.precursor_reader.get(index); + let precursor = self.precursor_reader.get(index).unwrap(); spectrum.precursor = Some(precursor); spectrum.index = precursor.index; spectrum.collision_energy = self.collision_energies[index]; diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index b42594e..511730e 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -37,7 +37,7 @@ impl TDFSpectrumReader { let metadata = MetadataReader::new(&sql_path).unwrap(); let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); - let precursor_reader = PrecursorReader::new(&sql_path); + let precursor_reader = PrecursorReader::new(&sql_path).unwrap(); let acquisition_type = frame_reader.get_acquisition(); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, @@ -63,8 +63,10 @@ impl TDFSpectrumReader { impl SpectrumReaderTrait for TDFSpectrumReader { fn get(&self, index: usize) -> Spectrum { let raw_spectrum = self.read_single_raw_spectrum(index); - let spectrum = raw_spectrum - .finalize(self.precursor_reader.get(index), &self.mz_reader); + let spectrum = raw_spectrum.finalize( + self.precursor_reader.get(index).unwrap(), + &self.mz_reader, + ); spectrum } @@ -81,7 +83,7 @@ impl SpectrumReaderTrait for TDFSpectrumReader { .into_par_iter() .map(|index| { let spectrum = self.read_single_raw_spectrum(index); - let precursor = self.precursor_reader.get(index); + let precursor = self.precursor_reader.get(index).unwrap(); let precursor_mz: f64 = precursor.mz; let mut result: Vec<(f64, u32)> = vec![]; for &tof_index in spectrum.tof_indices.iter() { From b79d552ea2b93013e73c030c37e0931210c25f82 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 17:03:46 +0200 Subject: [PATCH 27/69] FEAT: error propagation for quadSettingsReader --- src/io/readers/frame_reader.rs | 6 ++++-- src/io/readers/precursor_reader/tdf/dia.rs | 5 ++++- src/io/readers/quad_settings_reader.rs | 24 ++++++++++++++++------ src/io/readers/spectrum_reader/tdf/dia.rs | 2 +- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 78c6e85..af9b713 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -19,7 +19,7 @@ use super::{ }, tdf_blob_reader::{TdfBlob, TdfBlobReader, TdfBlobReaderError}, }, - QuadrupoleSettingsReader, + QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }; #[derive(Debug)] @@ -60,7 +60,7 @@ impl FrameReader { window_group.window_group; } quadrupole_settings = - QuadrupoleSettingsReader::new(tdf_sql_reader.get_path()); + QuadrupoleSettingsReader::new(tdf_sql_reader.get_path())?; } else { quadrupole_settings = vec![]; } @@ -209,4 +209,6 @@ pub enum FrameReaderError { SqlError(#[from] SqlError), #[error("Corrupt Frame")] CorruptFrame, + #[error("{0}")] + QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), } diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index e02a941..9531fdd 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -9,6 +9,7 @@ use crate::{ frame_groups::SqlWindowGroup, ReadableSqlTable, SqlError, SqlReader, }, MetadataReader, MetadataReaderError, QuadrupoleSettingsReader, + QuadrupoleSettingsReaderError, }, ms_data::{Precursor, QuadrupoleSettings}, }; @@ -33,7 +34,7 @@ impl DIATDFPrecursorReader { let im_converter: Scan2ImConverter = metadata.im_converter; let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; let quadrupole_settings = - QuadrupoleSettingsReader::new(tdf_sql_reader.get_path()); + QuadrupoleSettingsReader::new(tdf_sql_reader.get_path())?; let mut expanded_quadrupole_settings: Vec = vec![]; for window_group in window_groups { let window = window_group.window_group; @@ -89,4 +90,6 @@ pub enum DIATDFPrecursorReaderError { SqlError(#[from] SqlError), #[error("{0}")] MetadataReaderError(#[from] MetadataReaderError), + #[error("{0}")] + QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), } diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index 16367a5..8a62d3c 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -3,7 +3,7 @@ use std::path::Path; use crate::{ms_data::QuadrupoleSettings, utils::vec_utils::argsort}; use super::file_readers::sql_reader::{ - quad_settings::SqlQuadSettings, ReadableSqlTable, SqlReader, + quad_settings::SqlQuadSettings, ReadableSqlTable, SqlError, SqlReader, }; pub struct QuadrupoleSettingsReader { @@ -12,16 +12,18 @@ pub struct QuadrupoleSettingsReader { } impl QuadrupoleSettingsReader { - pub fn new(path: impl AsRef) -> Vec { + pub fn new( + path: impl AsRef, + ) -> Result, QuadrupoleSettingsReaderError> { let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); + let tdf_sql_reader = SqlReader::open(&sql_path)?; let sql_quadrupole_settings = - SqlQuadSettings::from_sql_reader(&tdf_sql_reader).unwrap(); + SqlQuadSettings::from_sql_reader(&tdf_sql_reader)?; let window_group_count = sql_quadrupole_settings .iter() .map(|x| x.window_group) .max() - .unwrap() as usize; + .unwrap() as usize; // SqlReader cannot return empty vecs, so always succeeds let quadrupole_settings = (0..window_group_count) .map(|window_group| { let mut quad = QuadrupoleSettings::default(); @@ -35,7 +37,7 @@ impl QuadrupoleSettingsReader { }; quad_reader.update_from_sql_quadrupole_settings(); quad_reader.resort_groups(); - quad_reader.quadrupole_settings + Ok(quad_reader.quadrupole_settings) } fn update_from_sql_quadrupole_settings(&mut self) { @@ -81,3 +83,13 @@ impl QuadrupoleSettingsReader { .collect(); } } + +#[derive(Debug, thiserror::Error)] +pub enum QuadrupoleSettingsReaderError { + // #[error("{0}")] + // MiniTDFPrecursorReaderError(#[from] MiniTDFPrecursorReaderError), + // #[error("{0}")] + // TDFPrecursorReaderError(#[from] TDFPrecursorReaderError), + #[error("{0}")] + SqlError(#[from] SqlError), +} diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 6033bfe..e0342e4 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -22,7 +22,7 @@ impl DIARawSpectrumReader { let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); let quadrupole_settings = - QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path()); + QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path()).unwrap(); let mut expanded_quadrupole_settings: Vec = vec![]; for window_group in window_groups { let window = window_group.window_group; From 4c34ec6b16756f19f05f20e09eb42d613a8d53e7 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 17:04:32 +0200 Subject: [PATCH 28/69] CHORE: changed to minor version update rather than patch --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 32a722e..27b720b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1121,7 +1121,7 @@ dependencies = [ [[package]] name = "timsrust" -version = "0.3.1" +version = "0.4.0" dependencies = [ "bytemuck", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index a3b8509..a758bd6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "timsrust" -version = "0.3.1" +version = "0.4.0" edition = "2021" description = "A crate to read Bruker timsTOF data" license = "Apache-2.0" From cc83557b5dd6ed567abd2d72780726edd2c55b1a Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 16 Jul 2024 17:10:49 +0200 Subject: [PATCH 29/69] CHORE: remove unused file_readers --- src/file_readers.rs | 159 -------------------------------------------- 1 file changed, 159 deletions(-) delete mode 100644 src/file_readers.rs diff --git a/src/file_readers.rs b/src/file_readers.rs deleted file mode 100644 index b14d0fd..0000000 --- a/src/file_readers.rs +++ /dev/null @@ -1,159 +0,0 @@ -use std::{fs, path::PathBuf}; - -use crate::io::readers::file_readers::sql_reader::frames::SqlFrame; -use crate::io::readers::SpectrumReader; -use crate::{io::readers::FrameReader, Error}; - -use crate::ms_data::{Frame, Spectrum}; -use rayon::iter::ParallelIterator; - -/// A reader to read [frames](crate::ms_data::Frame) and [spectra](crate::ms_data::Spectrum). -pub struct FileReader { - frame_reader: Option, - spectrum_reader: Option, -} - -impl FileReader { - // TODO refactor out - // TODO proper error handling - // TODO update docs - pub fn new>(path_name: T) -> Result { - let format: FileFormat = FileFormat::parse(path_name)?; - let frame_reader = match &format { - FileFormat::DFolder(path) => Some(FrameReader::new(&path)), - FileFormat::MS2Folder(_) => None, - }; - let spectrum_reader = match &format { - FileFormat::DFolder(path) => { - let reader = SpectrumReader::new(path); - // reader.calibrate(); - Some(reader) - }, - FileFormat::MS2Folder(path) => Some(SpectrumReader::new(path)), - }; - Ok(Self { - frame_reader, - spectrum_reader, - }) - } - - pub fn read_single_frame(&self, index: usize) -> Frame { - self.frame_reader.as_ref().unwrap().get(index) - } - - fn read_multiple_frames<'a, F: Fn(&SqlFrame) -> bool + Sync + Send + 'a>( - &self, - predicate: F, - ) -> Vec { - self.frame_reader - .as_ref() - .unwrap() - .parallel_filter(|x| predicate(x)) - .collect() - } - - pub fn read_all_frames(&self) -> Vec { - self.read_multiple_frames(|_| true) - } - - pub fn read_all_ms1_frames(&self) -> Vec { - self.read_multiple_frames(|x| x.msms_type == 0) - } - - pub fn read_all_ms2_frames(&self) -> Vec { - self.read_multiple_frames(|x| x.msms_type != 0) - } - - pub fn read_single_spectrum(&self, index: usize) -> Spectrum { - self.spectrum_reader.as_ref().unwrap().get(index) - } - - pub fn read_all_spectra(&self) -> Vec { - self.spectrum_reader.as_ref().unwrap().get_all() - } -} - -pub enum FileFormat { - DFolder(PathBuf), - MS2Folder(PathBuf), -} - -impl FileFormat { - // TODO make into proper struct - pub fn parse( - input: impl AsRef, - ) -> Result { - let path: PathBuf = input.as_ref().to_path_buf(); - if !path.exists() { - return Err(FileFormatError::DirectoryDoesNotExist); - } - let extension: &str = path - .extension() - .unwrap_or_default() - .to_str() - .unwrap_or_default(); - let format = match extension { - "d" => Self::DFolder(path), - _ => Self::MS2Folder(path), - }; - format.is_valid()?; - Ok(format) - } - - /// FileFormat is guaranteed to be `valid` if it is constructed - fn is_valid(&self) -> Result<(), FileFormatError> { - match &self { - Self::DFolder(path) => { - if !folder_contains_extension(path, "tdf_bin") { - return Err(FileFormatError::BinaryFilesAreMissing); - } - if !folder_contains_extension(path, "tdf") { - return Err(FileFormatError::MetadataFilesAreMissing); - } - }, - Self::MS2Folder(path) => { - if !folder_contains_extension(path, "bin") { - return Err(FileFormatError::BinaryFilesAreMissing); - } - if !folder_contains_extension(path, "parquet") { - return Err(FileFormatError::MetadataFilesAreMissing); - } - }, - } - Ok(()) - } -} - -fn folder_contains_extension( - input: impl AsRef, - extension: &str, -) -> bool { - let folder_path: PathBuf = input.as_ref().to_path_buf(); - if !folder_path.is_dir() { - return false; - } - if let Ok(entries) = fs::read_dir(folder_path) { - for entry in entries { - if let Ok(entry) = entry { - if let Some(ext) = entry.path().extension() { - if ext == extension { - return true; - } - } - } - } - } - false -} - -#[derive(thiserror::Error, Debug)] -pub enum FileFormatError { - #[error("DirectoryDoesNotExist")] - DirectoryDoesNotExist, - #[error("NoParentWithBrukerExtension")] - NoParentWithBrukerExtension, - #[error("BinaryFilesAreMissing")] - BinaryFilesAreMissing, - #[error("MetadataFilesAreMissing")] - MetadataFilesAreMissing, -} From f5fb8d943b59683000dcc875863a04bb902db847 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 16 Jul 2024 23:42:19 -0700 Subject: [PATCH 30/69] propagated configs (untested) --- src/file_readers.rs | 18 +++- src/io/readers/frame_reader.rs | 9 +- src/io/readers/precursor_reader.rs | 18 +++- src/io/readers/precursor_reader/tdf.rs | 38 ++++++-- src/io/readers/precursor_reader/tdf/dia.rs | 25 ++++- src/io/readers/spectrum_reader.rs | 36 ++++++- src/io/readers/spectrum_reader/minitdf.rs | 2 +- src/io/readers/spectrum_reader/tdf.rs | 32 +++--- src/io/readers/spectrum_reader/tdf/dia.rs | 21 +++- src/io/readers/tdf_utils.rs | 108 +++++++++++++++++---- tests/frame_readers.rs | 9 +- tests/spectrum_readers.rs | 9 +- 12 files changed, 261 insertions(+), 64 deletions(-) diff --git a/src/file_readers.rs b/src/file_readers.rs index b14d0fd..785504f 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -1,7 +1,7 @@ use std::{fs, path::PathBuf}; use crate::io::readers::file_readers::sql_reader::frames::SqlFrame; -use crate::io::readers::SpectrumReader; +use crate::io::readers::{SpectrumReader, SpectrumReaderConfig}; use crate::{io::readers::FrameReader, Error}; use crate::ms_data::{Frame, Spectrum}; @@ -17,19 +17,27 @@ impl FileReader { // TODO refactor out // TODO proper error handling // TODO update docs - pub fn new>(path_name: T) -> Result { + pub fn new>( + path_name: T, + reader_config: SpectrumReaderConfig, + ) -> Result { let format: FileFormat = FileFormat::parse(path_name)?; let frame_reader = match &format { - FileFormat::DFolder(path) => Some(FrameReader::new(&path)), + FileFormat::DFolder(path) => Some(FrameReader::new( + &path, + reader_config.frame_splitting_params, + )), FileFormat::MS2Folder(_) => None, }; let spectrum_reader = match &format { FileFormat::DFolder(path) => { - let reader = SpectrumReader::new(path); + let reader = SpectrumReader::new(path, reader_config); // reader.calibrate(); Some(reader) }, - FileFormat::MS2Folder(path) => Some(SpectrumReader::new(path)), + FileFormat::MS2Folder(path) => { + Some(SpectrumReader::new(path, reader_config)) + }, }; Ok(Self { frame_reader, diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index e13b6be..187a221 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -19,7 +19,7 @@ use super::{ }, tdf_blob_reader::{TdfBlob, TdfBlobReader}, }, - QuadrupoleSettingsReader, + FrameWindowSplittingStrategy, QuadrupoleSettingsReader, }; #[derive(Debug)] @@ -30,10 +30,14 @@ pub struct FrameReader { acquisition: AcquisitionType, window_groups: Vec, quadrupole_settings: Vec>, + pub splitting_strategy: FrameWindowSplittingStrategy, } impl FrameReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new( + path: impl AsRef, + config: FrameWindowSplittingStrategy, + ) -> Self { let sql_path = find_extension(&path, "analysis.tdf").unwrap(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); let sql_frames = SqlFrame::from_sql_reader(&tdf_sql_reader).unwrap(); @@ -71,6 +75,7 @@ impl FrameReader { .into_iter() .map(|x| Arc::new(x)) .collect(), + splitting_strategy: config, } } diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index 23a804b..8c01afb 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -9,6 +9,8 @@ use tdf::TDFPrecursorReader; use crate::ms_data::Precursor; +use super::FrameWindowSplittingStrategy; + pub struct PrecursorReader { precursor_reader: Box, } @@ -20,11 +22,19 @@ impl fmt::Debug for PrecursorReader { } impl PrecursorReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new( + path: impl AsRef, + config: Option, + ) -> Self { + let tmp = path.as_ref().extension().and_then(|e| e.to_str()); let precursor_reader: Box = - match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)), - Some("tdf") => Box::new(TDFPrecursorReader::new(path)), + match (tmp, config) { + (Some("parquet"), None) => { + Box::new(MiniTDFPrecursorReader::new(path)) + }, + (Some("tdf"), strat) => { + Box::new(TDFPrecursorReader::new(path, strat)) + }, _ => panic!(), }; Self { precursor_reader } diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 8619a1e..2be9a24 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -7,7 +7,9 @@ use dda::DDATDFPrecursorReader; use dia::DIATDFPrecursorReader; use crate::{ - io::readers::file_readers::sql_reader::SqlReader, + io::readers::{ + file_readers::sql_reader::SqlReader, FrameWindowSplittingStrategy, + }, ms_data::{AcquisitionType, Precursor}, }; @@ -18,7 +20,10 @@ pub struct TDFPrecursorReader { } impl TDFPrecursorReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new( + path: impl AsRef, + splitting_strategy: Option, + ) -> Self { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); let sql_frames: Vec = tdf_sql_reader @@ -32,14 +37,33 @@ impl TDFPrecursorReader { AcquisitionType::Unknown }; let precursor_reader: Box = - match acquisition_type { - AcquisitionType::DDAPASEF => { + match (acquisition_type, splitting_strategy) { + (AcquisitionType::DDAPASEF, None) => { Box::new(DDATDFPrecursorReader::new(path)) }, - AcquisitionType::DIAPASEF => { - Box::new(DIATDFPrecursorReader::new(path)) + ( + AcquisitionType::DDAPASEF, + Some(FrameWindowSplittingStrategy::None), + ) => { + // Not 100% sure when this happens ... + // By this I mean generating a Some(None) + // ./tests/frame_readers.rs:60:25 generates it. + // JSPP - 2024-Jul-16 + Box::new(DDATDFPrecursorReader::new(path)) + }, + (AcquisitionType::DIAPASEF, Some(splitting_strat)) => { + Box::new(DIATDFPrecursorReader::new(path, splitting_strat)) + }, + (AcquisitionType::DIAPASEF, None) => { + Box::new(DIATDFPrecursorReader::new( + path, + FrameWindowSplittingStrategy::None, + )) }, - _ => panic!(), + _ => panic!( + "No idea how to handle {:?} - {:?}", + acquisition_type, splitting_strategy + ), }; Self { precursor_reader } } diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index 46fdc37..f480f7c 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,6 +1,9 @@ use std::path::{Path, PathBuf}; -use crate::io::readers::tdf_utils::expand_quadrupole_settings; +use crate::io::readers::tdf_utils::{ + expand_quadrupole_settings, expand_window_settings, +}; +use crate::io::readers::FrameWindowSplittingStrategy; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, @@ -25,7 +28,10 @@ pub struct DIATDFPrecursorReader { } impl DIATDFPrecursorReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new( + path: impl AsRef, + splitting_strat: FrameWindowSplittingStrategy, + ) -> Self { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path).unwrap(); let metadata = MetadataReader::new(&path); @@ -35,8 +41,19 @@ impl DIATDFPrecursorReader { SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); let quadrupole_settings = QuadrupoleSettingsReader::new(tdf_sql_reader.get_path()); - let expanded_quadrupole_settings = - expand_quadrupole_settings(&window_groups, &quadrupole_settings); + let expanded_quadrupole_settings = match splitting_strat { + FrameWindowSplittingStrategy::None => quadrupole_settings, + FrameWindowSplittingStrategy::Quadrupole(x) => { + expand_quadrupole_settings( + &window_groups, + &quadrupole_settings, + &x, + ) + }, + FrameWindowSplittingStrategy::Window(x) => { + expand_window_settings(&window_groups, &quadrupole_settings, &x) + }, + }; Self { path: path.as_ref().to_path_buf(), expanded_quadrupole_settings, diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 0082ca3..bf45c97 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -7,12 +7,44 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; use tdf::TDFSpectrumReader; +use crate::io::readers::tdf_utils::QuadWindowExpansionStrategy; use crate::ms_data::Spectrum; pub struct SpectrumReader { spectrum_reader: Box, } +#[derive(Debug)] +pub struct SpectrumProcessingParams { + smoothing_window: u32, + centroiding_window: u32, + calibration_tolerance: f64, +} + +impl Default for SpectrumProcessingParams { + fn default() -> Self { + Self { + smoothing_window: 1, + centroiding_window: 1, + calibration_tolerance: 0.1, + } + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub enum FrameWindowSplittingStrategy { + #[default] + None, + Quadrupole(QuadWindowExpansionStrategy), + Window(QuadWindowExpansionStrategy), +} + +#[derive(Debug, Default)] +pub struct SpectrumReaderConfig { + pub spectrum_processing_params: SpectrumProcessingParams, + pub frame_splitting_params: FrameWindowSplittingStrategy, +} + impl fmt::Debug for SpectrumReader { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "SpectrumReader {{ /* fields omitted */ }}") @@ -20,11 +52,11 @@ impl fmt::Debug for SpectrumReader { } impl SpectrumReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new(path: impl AsRef, config: SpectrumReaderConfig) -> Self { let spectrum_reader: Box = match path.as_ref().extension().and_then(|e| e.to_str()) { Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)), - Some("d") => Box::new(TDFSpectrumReader::new(path)), + Some("d") => Box::new(TDFSpectrumReader::new(path, config)), _ => panic!(), }; Self { spectrum_reader } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 84a6f4a..6681a24 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -28,7 +28,7 @@ impl MiniTDFSpectrumReader { pub fn new(path: impl AsRef) -> Self { let parquet_file_name = find_extension(&path, "ms2spectrum.parquet").unwrap(); - let precursor_reader = PrecursorReader::new(&parquet_file_name); + let precursor_reader = PrecursorReader::new(&parquet_file_name, None); let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name) .unwrap() .iter() diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 270102d..c635c91 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -16,11 +16,7 @@ use crate::{ utils::find_extension, }; -use super::SpectrumReaderTrait; - -const SMOOTHING_WINDOW: u32 = 1; -const CENTROIDING_WINDOW: u32 = 1; -const CALIBRATION_TOLERANCE: f64 = 0.1; +use super::{SpectrumReaderConfig, SpectrumReaderTrait}; #[derive(Debug)] pub struct TDFSpectrumReader { @@ -28,16 +24,24 @@ pub struct TDFSpectrumReader { precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, raw_spectrum_reader: RawSpectrumReader, + config: SpectrumReaderConfig, } impl TDFSpectrumReader { - pub fn new(path_name: impl AsRef) -> Self { - let frame_reader: FrameReader = FrameReader::new(&path_name); + pub fn new( + path_name: impl AsRef, + config: SpectrumReaderConfig, + ) -> Self { + let frame_reader: FrameReader = + FrameReader::new(&path_name, config.frame_splitting_params); let sql_path = find_extension(&path_name, "analysis.tdf").unwrap(); let metadata = MetadataReader::new(&sql_path); let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); - let precursor_reader = PrecursorReader::new(&sql_path); + let precursor_reader = PrecursorReader::new( + &sql_path, + Some(config.frame_splitting_params), + ); let acquisition_type = frame_reader.get_acquisition(); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, @@ -49,14 +53,15 @@ impl TDFSpectrumReader { precursor_reader, mz_reader, raw_spectrum_reader, + config, } } pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { let raw_spectrum = self.raw_spectrum_reader.get(index); raw_spectrum - .smooth(SMOOTHING_WINDOW) - .centroid(CENTROIDING_WINDOW) + .smooth(self.config.spectrum_processing_params.smoothing_window) + .centroid(self.config.spectrum_processing_params.centroiding_window) } } @@ -90,7 +95,12 @@ impl SpectrumReaderTrait for TDFSpectrumReader { let mut result: Vec<(f64, u32)> = vec![]; for &tof_index in spectrum.tof_indices.iter() { let mz = self.mz_reader.convert(tof_index); - if (mz - precursor_mz).abs() < CALIBRATION_TOLERANCE { + if (mz - precursor_mz).abs() + < self + .config + .spectrum_processing_params + .calibration_tolerance + { let hit = (precursor_mz, tof_index); result.push(hit); } diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 24386c8..ad836bf 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,4 +1,7 @@ -use crate::io::readers::tdf_utils::expand_quadrupole_settings; +use crate::io::readers::tdf_utils::{ + expand_quadrupole_settings, expand_window_settings, +}; +use crate::io::readers::FrameWindowSplittingStrategy; use crate::{ io::readers::{ file_readers::sql_reader::{ @@ -24,8 +27,20 @@ impl DIARawSpectrumReader { SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); let quadrupole_settings = QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path()); - let expanded_quadrupole_settings = - expand_quadrupole_settings(&window_groups, &quadrupole_settings); + let expanded_quadrupole_settings = match frame_reader.splitting_strategy + { + FrameWindowSplittingStrategy::None => quadrupole_settings, + FrameWindowSplittingStrategy::Quadrupole(x) => { + expand_quadrupole_settings( + &window_groups, + &quadrupole_settings, + &x, + ) + }, + FrameWindowSplittingStrategy::Window(x) => { + expand_window_settings(&window_groups, &quadrupole_settings, &x) + }, + }; Self { expanded_quadrupole_settings, frame_reader, diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs index ff4d938..6850aaf 100644 --- a/src/io/readers/tdf_utils.rs +++ b/src/io/readers/tdf_utils.rs @@ -3,7 +3,8 @@ use crate::ms_data::QuadrupoleSettings; type SpanStep = (usize, usize); -enum QuadWindowExpansionStrategy { +#[derive(Debug, Copy, Clone)] +pub enum QuadWindowExpansionStrategy { None, Even(usize), Uniform(SpanStep), @@ -43,16 +44,21 @@ fn scan_range_subsplit( out }, }; + + debug_assert!( + out.iter().all(|(s, e)| s < e), + "Invalid scan range: {:?}", + out + ); + debug_assert!( + out.iter().all(|(s, e)| *s >= start && *e <= end), + "Invalid scan range: {:?}", + out + ); out } -pub fn expand_quadrupole_settings( - window_groups: &[SqlWindowGroup], - quadrupole_settings: &[QuadrupoleSettings], -) -> Vec { - // Read the 'NUM_SUB_SUB_SPLITS' from env variables ... default to 1 - // (for now) - +fn expansion_strategy_from_env() -> QuadWindowExpansionStrategy { let splits = match std::env::var("NUM_SUB_SUB_SPLITS") { Ok(s) => match s.parse::() { Ok(n) => { @@ -79,6 +85,78 @@ pub fn expand_quadrupole_settings( }, }; + splits +} + +pub fn expand_window_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + let window_group_start = + group.scan_starts.iter().min().unwrap().clone(); + let window_group_end = group.scan_ends.iter().max().unwrap().clone(); + + for (sws, swe) in + scan_range_subsplit(window_group_start, window_group_end, &strategy) + { + let mut mz_sum = 0.0; + let mut mz_min = std::f64::MAX; + let mut mz_max = std::f64::MIN; + let mut nce_sum = 0.0; + let mut num_added = 0; + + for i in 0..group.isolation_mz.len() { + // Should I be checking here for overlap instead of full containment? + if sws <= group.scan_starts[i] && swe >= group.scan_ends[i] { + mz_sum += group.isolation_mz[i]; + mz_min = mz_min.min( + group.isolation_mz[i] + - (group.isolation_width[i] / 2.0), + ); + mz_max = mz_max.max( + group.isolation_mz[i] + + (group.isolation_width[i] / 2.0), + ); + nce_sum += group.collision_energy[i]; + num_added += 1; + } + } + + let mz_mean = mz_sum / num_added as f64; + let mean_nce = nce_sum / num_added as f64; + + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![mz_mean], + isolation_width: vec![mz_min - mz_max], + collision_energy: vec![mean_nce], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + println!( + "Number of expanded quad settings {}", + expanded_quadrupole_settings.len() + ); + expanded_quadrupole_settings +} + +pub fn expand_quadrupole_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + // Read the 'NUM_SUB_SUB_SPLITS' from env variables ... default to 1 + // (for now) + let mut expanded_quadrupole_settings: Vec = vec![]; for window_group in window_groups { let window = window_group.window_group; @@ -90,20 +168,8 @@ pub fn expand_quadrupole_settings( for (sws, swe) in scan_range_subsplit( subwindow_scan_start, subwindow_scan_end, - &splits, + &strategy, ) { - assert!( - sws >= subwindow_scan_start, - "{} >= {} not true", - sws, - subwindow_scan_start - ); - assert!( - swe <= subwindow_scan_end, - "{} <= {} not true", - swe, - subwindow_scan_end - ); let sub_quad_settings = QuadrupoleSettings { index: frame, scan_starts: vec![sws], diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 8804a32..1b541eb 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,5 +1,6 @@ use std::{path::Path, sync::Arc}; use timsrust::{ + io::readers::SpectrumReaderConfig, ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, FileReader, }; @@ -19,7 +20,9 @@ fn tdf_reader_frames1() { .unwrap() .to_string(); let frames: Vec = - FileReader::new(&file_path).unwrap().read_all_ms1_frames(); + FileReader::new(&file_path, SpectrumReaderConfig::default()) + .unwrap() + .read_all_ms1_frames(); let expected: Vec = vec![ Frame { scan_offsets: vec![0, 1, 3, 6, 10], @@ -62,7 +65,9 @@ fn tdf_reader_frames2() { .unwrap() .to_string(); let frames: Vec = - FileReader::new(&file_path).unwrap().read_all_ms2_frames(); + FileReader::new(&file_path, SpectrumReaderConfig::default()) + .unwrap() + .read_all_ms2_frames(); let expected: Vec = vec![ // Frame::default(), Frame { diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 085013f..133f0ce 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,5 +1,6 @@ use std::path::Path; use timsrust::{ + io::readers::SpectrumReaderConfig, ms_data::{Precursor, Spectrum}, FileReader, }; @@ -19,7 +20,9 @@ fn minitdf_reader() { .unwrap() .to_string(); let spectra: Vec = - FileReader::new(file_path).unwrap().read_all_spectra(); + FileReader::new(file_path, SpectrumReaderConfig::default()) + .unwrap() + .read_all_spectra(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -70,7 +73,9 @@ fn tdf_reader_dda() { .unwrap() .to_string(); let spectra: Vec = - FileReader::new(file_path).unwrap().read_all_spectra(); + FileReader::new(file_path, SpectrumReaderConfig::default()) + .unwrap() + .read_all_spectra(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], From 903ed349a675678af7aed4a22b92af9cd4529f00 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 17 Jul 2024 11:17:23 +0200 Subject: [PATCH 31/69] FIX: typo in tdfblob reader --- src/io/readers/file_readers/tdf_blob_reader.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index 1c2fe97..d8e64fc 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -75,7 +75,7 @@ impl IndexedTdfBlobReader { pub fn new( file_name: impl AsRef, binary_offsets: Vec, - ) -> Result { + ) -> Result { let blob_reader = TdfBlobReader::new(file_name)?; let reader = Self { binary_offsets, From c6f96de21116acb19f4f3bb31f8d308e32a0c8e0 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 17 Jul 2024 11:20:08 +0200 Subject: [PATCH 32/69] CHORE: rename unsuoppreted acquistion for precrsor reader --- src/io/readers/precursor_reader/tdf.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 30e1dd5..8c02020 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -41,9 +41,11 @@ impl TDFPrecursorReader { Box::new(DIATDFPrecursorReader::new(path)?) }, acquisition_type => { - return Err(TDFPrecursorReaderError::UnknownPrecursorType( - format!("{:?}", acquisition_type), - )) + return Err( + TDFPrecursorReaderError::UnsupportedAcquisition( + format!("{:?}", acquisition_type), + ), + ) }, }; let reader = Self { precursor_reader }; @@ -70,5 +72,5 @@ pub enum TDFPrecursorReaderError { #[error("{0}")] DIATDFPrecursorReaderError(#[from] DIATDFPrecursorReaderError), #[error("Invalid acquistion type for precursor reader: {0}")] - UnknownPrecursorType(String), + UnsupportedAcquisition(String), } From ea8be45b8f81c6522b53a7fc8fefafb5379d180f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 17 Jul 2024 11:27:05 +0200 Subject: [PATCH 33/69] FEAT: implemented error propagation for spectrum readers --- benches/speed_performance.rs | 6 +- src/io/readers/spectrum_reader.rs | 21 +++++-- src/io/readers/spectrum_reader/minitdf.rs | 57 +++++++++++++------ src/io/readers/spectrum_reader/tdf.rs | 46 +++++++++++---- src/io/readers/spectrum_reader/tdf/dda.rs | 24 +++++--- src/io/readers/spectrum_reader/tdf/dia.rs | 27 ++++++--- .../spectrum_reader/tdf/raw_spectra.rs | 32 ++++++++--- tests/spectrum_readers.rs | 6 +- 8 files changed, 157 insertions(+), 62 deletions(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index d9bd813..daab1cc 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -34,7 +34,7 @@ fn criterion_benchmark_dda(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DDA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -56,7 +56,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DIA_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DIA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -75,7 +75,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = SYP_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("SYP read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 336634a..d8851f9 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -2,10 +2,10 @@ mod minitdf; mod tdf; use core::fmt; -use minitdf::MiniTDFSpectrumReader; +use minitdf::{MiniTDFSpectrumReader, MiniTDFSpectrumReaderError}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; -use tdf::TDFSpectrumReader; +use tdf::{TDFSpectrumReader, TDFSpectrumReaderError}; use crate::ms_data::Spectrum; @@ -20,14 +20,15 @@ impl fmt::Debug for SpectrumReader { } impl SpectrumReader { - pub fn new(path: impl AsRef) -> Self { + pub fn new(path: impl AsRef) -> Result { let spectrum_reader: Box = match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)), - Some("d") => Box::new(TDFSpectrumReader::new(path)), + Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)?), + Some("d") => Box::new(TDFSpectrumReader::new(path)?), _ => panic!(), }; - Self { spectrum_reader } + let reader = Self { spectrum_reader }; + Ok(reader) } pub fn get(&self, index: usize) -> Spectrum { @@ -62,3 +63,11 @@ trait SpectrumReaderTrait: Sync { fn len(&self) -> usize; fn calibrate(&mut self); } + +#[derive(Debug, thiserror::Error)] +pub enum SpectrumReaderError { + #[error("{0}")] + MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError), + #[error("{0}")] + TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), +} diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 83c50bd..9d3938b 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -4,11 +4,15 @@ use crate::{ io::readers::{ file_readers::{ parquet_reader::{ - precursors::ParquetPrecursor, ReadableParquetTable, + precursors::ParquetPrecursor, ParquetError, + ReadableParquetTable, + }, + sql_reader::SqlError, + tdf_blob_reader::{ + IndexedTdfBlobReader, IndexedTdfBlobReaderError, }, - tdf_blob_reader::IndexedTdfBlobReader, }, - PrecursorReader, + PrecursorReader, PrecursorReaderError, }, ms_data::Spectrum, utils::find_extension, @@ -25,31 +29,36 @@ pub struct MiniTDFSpectrumReader { } impl MiniTDFSpectrumReader { - pub fn new(path: impl AsRef) -> Self { - let parquet_file_name = - find_extension(&path, "ms2spectrum.parquet").unwrap(); - let precursor_reader = - PrecursorReader::new(&parquet_file_name).unwrap(); - let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name) - .unwrap() + pub fn new( + path: impl AsRef, + ) -> Result { + let parquet_file_name = find_extension(&path, "ms2spectrum.parquet") + .ok_or(MiniTDFSpectrumReaderError::FileNotFound( + "analysis.tdf".to_string(), + ))?; + let precursor_reader = PrecursorReader::new(&parquet_file_name)?; + let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)? .iter() .map(|x| x.offset as usize) .collect(); let collision_energies = - ParquetPrecursor::from_parquet_file(&parquet_file_name) - .unwrap() + ParquetPrecursor::from_parquet_file(&parquet_file_name)? .iter() .map(|x| x.collision_energy) .collect(); - let bin_file_name = find_extension(&path, "bin").unwrap(); - let blob_reader = - IndexedTdfBlobReader::new(&bin_file_name, offsets).unwrap(); - Self { + let bin_file_name = find_extension(&path, "bin").ok_or( + MiniTDFSpectrumReaderError::FileNotFound( + "analysis.tdf".to_string(), + ), + )?; + let blob_reader = IndexedTdfBlobReader::new(&bin_file_name, offsets)?; + let reader = Self { path: path.as_ref().to_path_buf(), precursor_reader, blob_reader, collision_energies, - } + }; + Ok(reader) } } @@ -100,3 +109,17 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { fn calibrate(&mut self) {} } + +#[derive(Debug, thiserror::Error)] +pub enum MiniTDFSpectrumReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + PrecursorReaderError(#[from] PrecursorReaderError), + #[error("{0}")] + ParquetError(#[from] ParquetError), + #[error("{0}")] + IndexedTdfBlobReaderError(#[from] IndexedTdfBlobReaderError), + #[error("{0}")] + FileNotFound(String), +} diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 511730e..559010d 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -2,15 +2,16 @@ mod dda; mod dia; mod raw_spectra; -use raw_spectra::{RawSpectrum, RawSpectrumReader}; +use raw_spectra::{RawSpectrum, RawSpectrumReader, RawSpectrumReaderError}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; use crate::{ domain_converters::{ConvertableDomain, Tof2MzConverter}, io::readers::{ - file_readers::sql_reader::SqlReader, FrameReader, MetadataReader, - PrecursorReader, + file_readers::sql_reader::{SqlError, SqlReader}, + FrameReader, FrameReaderError, MetadataReader, MetadataReaderError, + PrecursorReader, PrecursorReaderError, }, ms_data::Spectrum, utils::find_extension, @@ -31,25 +32,30 @@ pub struct TDFSpectrumReader { } impl TDFSpectrumReader { - pub fn new(path_name: impl AsRef) -> Self { - let frame_reader: FrameReader = FrameReader::new(&path_name).unwrap(); - let sql_path = find_extension(&path_name, "analysis.tdf").unwrap(); - let metadata = MetadataReader::new(&sql_path).unwrap(); + pub fn new( + path_name: impl AsRef, + ) -> Result { + let frame_reader: FrameReader = FrameReader::new(&path_name)?; + let sql_path = find_extension(&path_name, "analysis.tdf").ok_or( + TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()), + )?; + let metadata = MetadataReader::new(&sql_path)?; let mz_reader: Tof2MzConverter = metadata.mz_converter; - let tdf_sql_reader = SqlReader::open(&sql_path).unwrap(); - let precursor_reader = PrecursorReader::new(&sql_path).unwrap(); + let tdf_sql_reader = SqlReader::open(&sql_path)?; + let precursor_reader = PrecursorReader::new(&sql_path)?; let acquisition_type = frame_reader.get_acquisition(); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, frame_reader, acquisition_type, - ); - Self { + )?; + let reader = Self { path: path_name.as_ref().to_path_buf(), precursor_reader, mz_reader, raw_spectrum_reader, - } + }; + Ok(reader) } pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { @@ -104,3 +110,19 @@ impl SpectrumReaderTrait for TDFSpectrumReader { } } } + +#[derive(Debug, thiserror::Error)] +pub enum TDFSpectrumReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + PrecursorReaderError(#[from] PrecursorReaderError), + #[error("{0}")] + MetadaReaderError(#[from] MetadataReaderError), + #[error("{0}")] + FrameReaderError(#[from] FrameReaderError), + #[error("{0}")] + RawSpectrumReaderError(#[from] RawSpectrumReaderError), + #[error("{0}")] + FileNotFound(String), +} diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index 93ab962..be674ab 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -1,7 +1,8 @@ use crate::{ io::readers::{ file_readers::sql_reader::{ - pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, + pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlError, + SqlReader, }, FrameReader, }, @@ -19,13 +20,15 @@ pub struct DDARawSpectrumReader { } impl DDARawSpectrumReader { - pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { - let pasef_frames = - SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap(); + pub fn new( + tdf_sql_reader: &SqlReader, + frame_reader: FrameReader, + ) -> Result { + let pasef_frames = SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader)?; let pasef_precursors = &pasef_frames.iter().map(|x| x.precursor).collect(); let order: Vec = argsort(&pasef_precursors); - let max_precursor = pasef_precursors.iter().max().unwrap(); + let max_precursor = pasef_precursors.iter().max().unwrap(); // SqlReader cannot return empty vecs, so always succeeds let mut offsets: Vec = Vec::with_capacity(max_precursor + 1); offsets.push(0); for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { @@ -35,12 +38,13 @@ impl DDARawSpectrumReader { } } offsets.push(order.len()); - Self { + let reader = Self { order, offsets, pasef_frames, frame_reader, - } + }; + Ok(reader) } pub fn iterate_over_pasef_frames( @@ -97,3 +101,9 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { raw_spectrum } } + +#[derive(Debug, thiserror::Error)] +pub enum DDARawSpectrumReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), +} diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index e0342e4..13e4a6a 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,9 +1,9 @@ use crate::{ io::readers::{ file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, ReadableSqlTable, SqlReader, + frame_groups::SqlWindowGroup, ReadableSqlTable, SqlError, SqlReader, }, - FrameReader, QuadrupoleSettingsReader, + FrameReader, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }, ms_data::QuadrupoleSettings, utils::vec_utils::group_and_sum, @@ -18,11 +18,13 @@ pub struct DIARawSpectrumReader { } impl DIARawSpectrumReader { - pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self { - let window_groups = - SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap(); + pub fn new( + tdf_sql_reader: &SqlReader, + frame_reader: FrameReader, + ) -> Result { + let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; let quadrupole_settings = - QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path()).unwrap(); + QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path())?; let mut expanded_quadrupole_settings: Vec = vec![]; for window_group in window_groups { let window = window_group.window_group; @@ -40,10 +42,11 @@ impl DIARawSpectrumReader { expanded_quadrupole_settings.push(sub_quad_settings) } } - Self { + let reader = Self { expanded_quadrupole_settings, frame_reader, - } + }; + Ok(reader) } } @@ -76,3 +79,11 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { raw_spectrum } } + +#[derive(Debug, thiserror::Error)] +pub enum DIARawSpectrumReaderError { + #[error("{0}")] + SqlError(#[from] SqlError), + #[error("{0}")] + QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), +} diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index 7172940..8b78d65 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -7,7 +7,10 @@ use crate::{ utils::vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, }; -use super::{dda::DDARawSpectrumReader, dia::DIARawSpectrumReader}; +use super::{ + dda::{DDARawSpectrumReader, DDARawSpectrumReaderError}, + dia::{DIARawSpectrumReader, DIARawSpectrumReaderError}, +}; #[derive(Debug, PartialEq, Default, Clone)] pub(crate) struct RawSpectrum { @@ -91,20 +94,25 @@ impl RawSpectrumReader { tdf_sql_reader: &SqlReader, frame_reader: FrameReader, acquisition_type: AcquisitionType, - ) -> Self { + ) -> Result { let raw_spectrum_reader: Box = match acquisition_type { AcquisitionType::DDAPASEF => Box::new( - DDARawSpectrumReader::new(tdf_sql_reader, frame_reader), + DDARawSpectrumReader::new(tdf_sql_reader, frame_reader)?, ), AcquisitionType::DIAPASEF => Box::new( - DIARawSpectrumReader::new(tdf_sql_reader, frame_reader), + DIARawSpectrumReader::new(tdf_sql_reader, frame_reader)?, ), - _ => panic!(), + acquisition_type => { + return Err(RawSpectrumReaderError::UnsupportedAcquisition( + format!("{:?}", acquisition_type), + )) + }, }; - Self { + let reader = Self { raw_spectrum_reader, - } + }; + Ok(reader) } pub fn get(&self, index: usize) -> RawSpectrum { @@ -115,3 +123,13 @@ impl RawSpectrumReader { pub trait RawSpectrumReaderTrait: Sync { fn get(&self, index: usize) -> RawSpectrum; } + +#[derive(Debug, thiserror::Error)] +pub enum RawSpectrumReaderError { + #[error("{0}")] + DDARawSpectrumReaderError(#[from] DDARawSpectrumReaderError), + #[error("{0}")] + DIARawSpectrumReaderError(#[from] DIARawSpectrumReaderError), + #[error("Invalid acquistion type for Raw spectrum reader: {0}")] + UnsupportedAcquisition(String), +} diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 3473d18..43ff4b8 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -18,7 +18,8 @@ fn minitdf_reader() { .to_str() .unwrap() .to_string(); - let spectra: Vec = SpectrumReader::new(file_path).get_all(); + let spectra: Vec = + SpectrumReader::new(file_path).unwrap().get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -68,7 +69,8 @@ fn tdf_reader_dda() { .to_str() .unwrap() .to_string(); - let spectra: Vec = SpectrumReader::new(file_path).get_all(); + let spectra: Vec = + SpectrumReader::new(file_path).unwrap().get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], From 0b7f4b8cfdbafeb182f11c8cf7b4a530456c2e1c Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 17 Jul 2024 14:47:37 +0200 Subject: [PATCH 34/69] FEAT: added error propagation macro --- src/errors.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/errors.rs b/src/errors.rs index 44782f1..82ffef8 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -21,3 +21,16 @@ pub enum Error { // #[error("BinError: {0}")] // BinError(#[from] TdfBlobError), } + +#[macro_export] +macro_rules! propagated_error_enum { + ($name:ident, $($variant:ident),+) => { + #[derive(Debug, thiserror::Error)] + pub enum $name { + $( + #[error(transparent)] + $variant(#[from] $variant), + )+ + } + }; +} From 2adb6857d6fb3953c3e2f014a697de9094a0df84 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 18 Jul 2024 11:38:42 -0700 Subject: [PATCH 35/69] CHORE(wip): Partial addition of tests and removal of debug prints --- src/io/readers.rs | 1 + src/io/readers/tdf_utils.rs | 38 -------------- tests/frame_readers.rs | 102 +++++++++++++++++++++++++++++++++++- 3 files changed, 101 insertions(+), 40 deletions(-) diff --git a/src/io/readers.rs b/src/io/readers.rs index fd9f3ce..7e350a5 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -11,3 +11,4 @@ pub use metadata_reader::*; pub use precursor_reader::*; pub use quad_settings_reader::*; pub use spectrum_reader::*; +pub use tdf_utils::QuadWindowExpansionStrategy; diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs index 6850aaf..93b9601 100644 --- a/src/io/readers/tdf_utils.rs +++ b/src/io/readers/tdf_utils.rs @@ -58,36 +58,6 @@ fn scan_range_subsplit( out } -fn expansion_strategy_from_env() -> QuadWindowExpansionStrategy { - let splits = match std::env::var("NUM_SUB_SUB_SPLITS") { - Ok(s) => match s.parse::() { - Ok(n) => { - println!("Number of splits: {} from env", n); - QuadWindowExpansionStrategy::Even(n) - }, - Err(_) => { - println!("Invalid number of splits: {}", s); - QuadWindowExpansionStrategy::None - }, - }, - Err(_) => match std::env::var("SUB_SPLITS_SPAN") { - Ok(s) => match s.parse::() { - Ok(n) => { - println!("Number of scans per split: {} from env", n); - QuadWindowExpansionStrategy::Uniform((n, n / 2)) - }, - Err(_) => { - println!("Invalid number of splits: {}", s); - QuadWindowExpansionStrategy::None - }, - }, - Err(_) => QuadWindowExpansionStrategy::None, - }, - }; - - splits -} - pub fn expand_window_settings( window_groups: &[SqlWindowGroup], quadrupole_settings: &[QuadrupoleSettings], @@ -142,10 +112,6 @@ pub fn expand_window_settings( expanded_quadrupole_settings.push(sub_quad_settings) } } - println!( - "Number of expanded quad settings {}", - expanded_quadrupole_settings.len() - ); expanded_quadrupole_settings } @@ -182,9 +148,5 @@ pub fn expand_quadrupole_settings( } } } - println!( - "Number of expanded quad settings {}", - expanded_quadrupole_settings.len() - ); expanded_quadrupole_settings } diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index c11d2ec..aab0dca 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,7 +1,8 @@ use std::{path::Path, sync::Arc}; use timsrust::{ io::readers::{ - FrameReader, FrameWindowSplittingStrategy, SpectrumReaderConfig, + FrameReader, FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, + SpectrumReaderConfig, }, ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, }; @@ -108,4 +109,101 @@ fn tdf_reader_frames2() { } } -// TODO test for DIA +#[test] +fn tdf_reader_frames_dia() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + let frames: Vec = + FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + + assert_eq!(frames.len(), 4); + for i in 0..frames.len() { + assert_eq!(frames[i].scan_offsets.len(), 710); + assert_eq!(frames[i].scan_offsets[0], 0); + assert_eq!( + frames[i].scan_offsets.last().unwrap(), + &frames[i].intensities.len() + ); + assert_eq!(frames[i].tof_indices.len(), frames[i].intensities.len()); + } + assert_eq!(&frames[0].tof_indices[0], &251695u32); + assert_eq!(&frames[0].intensities[0], &503392u32); + assert_eq!(&frames[0].tof_indices.len(), &754376); + assert_eq!(&frames[0].intensities.len(), &754376); + + assert_eq!(&frames[1].tof_indices[0], &1006071u32); + assert_eq!(&frames[1].intensities[0], &2012144u32); + assert_eq!(&frames[1].tof_indices.len(), &1257057); + assert_eq!(&frames[1].intensities.len(), &1257057); + + assert_eq!(&frames[2].tof_indices[0], &4022866u32); + assert_eq!(&frames[2].intensities[0], &8045734u32); + assert_eq!(&frames[2].tof_indices.len(), &2262419); + assert_eq!(&frames[2].intensities.len(), &2262419); + + assert_eq!(&frames[3].tof_indices[0], &6285285u32); + assert_eq!(&frames[3].intensities[0], &12570572u32); + assert_eq!(&frames[3].tof_indices.len(), &2765100); + assert_eq!(&frames[3].intensities.len(), &2765100); +} + +#[test] +fn test_dia_even() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + + for i in 1..3 { + let frames: Vec = FrameReader::new( + &file_path, + FrameWindowSplittingStrategy::Quadrupole( + QuadWindowExpansionStrategy::Even(i), + ), + ) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + + assert_eq!(frames.len(), 4 * i); + } +} + +#[test] +fn test_dia_uniform() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + + for i in [100, 200, 300] { + let frames: Vec = FrameReader::new( + &file_path, + FrameWindowSplittingStrategy::Quadrupole( + QuadWindowExpansionStrategy::Uniform((i, i)), + ), + ) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + + assert_eq!(frames.len(), 4 * i); + } +} From 1261967ed200cd502b921134ded43a04d679ce2a Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 18 Jul 2024 12:19:00 -0700 Subject: [PATCH 36/69] CHORE: More work towards testing splitting methods --- src/io/readers/spectrum_reader/tdf/dia.rs | 8 --- src/io/readers/tdf_utils.rs | 21 +++++++ tests/frame_readers.rs | 52 ----------------- tests/spectrum_readers.rs | 69 ++++++++++++++++++++++- 4 files changed, 88 insertions(+), 62 deletions(-) diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index f3f9d19..bf016f9 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -51,14 +51,6 @@ impl DIARawSpectrumReader { impl RawSpectrumReaderTrait for DIARawSpectrumReader { fn get(&self, index: usize) -> RawSpectrum { let quad_settings = &self.expanded_quadrupole_settings[index]; - if index < 10 { - println!("{}", index); - println!("{:?}", quad_settings); - } - if index > (self.expanded_quadrupole_settings.len() - 10) { - println!("{}", index); - println!("{:?}", quad_settings); - } let collision_energy = quad_settings.collision_energy[0]; let isolation_mz = quad_settings.isolation_mz[0]; diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs index 93b9601..4e59968 100644 --- a/src/io/readers/tdf_utils.rs +++ b/src/io/readers/tdf_utils.rs @@ -3,6 +3,24 @@ use crate::ms_data::QuadrupoleSettings; type SpanStep = (usize, usize); +/// Strategy for expanding quadrupole settings +/// +/// This enum is used to determine how to expand quadrupole settings +/// when reading in DIA data. And exporting spectra (not frames RN). +/// +/// # Variants +/// +/// For example if we have a window with scan start 50 and end 500 +/// +/// * `None` - Do not expand quadrupole settings; use the original settings +/// * `Even(usize)` - Split the quadrupole settings into `usize` evenly spaced +/// subwindows; e.g. if `usize` is 2, the window will be split into 2 subwindows +/// of equal width. +/// * `Uniform(SpanStep)` - Split the quadrupole settings into subwindows of +/// width `SpanStep.0` and step `SpanStep.1`; e.g. if `SpanStep` is (100, 50), +/// the window will be split into subwindows of width 100 and step 50 between their +/// scan start and end. +/// #[derive(Debug, Copy, Clone)] pub enum QuadWindowExpansionStrategy { None, @@ -41,6 +59,9 @@ fn scan_range_subsplit( curr_start += step; curr_end += step; } + if curr_start < end { + out.push((curr_start, end)); + } out }, }; diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index aab0dca..400ebf7 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -155,55 +155,3 @@ fn tdf_reader_frames_dia() { assert_eq!(&frames[3].tof_indices.len(), &2765100); assert_eq!(&frames[3].intensities.len(), &2765100); } - -#[test] -fn test_dia_even() { - let file_name = "dia_test.d"; - let file_path = get_local_directory() - .join(file_name) - .to_str() - .unwrap() - .to_string(); - - for i in 1..3 { - let frames: Vec = FrameReader::new( - &file_path, - FrameWindowSplittingStrategy::Quadrupole( - QuadWindowExpansionStrategy::Even(i), - ), - ) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); - - assert_eq!(frames.len(), 4 * i); - } -} - -#[test] -fn test_dia_uniform() { - let file_name = "dia_test.d"; - let file_path = get_local_directory() - .join(file_name) - .to_str() - .unwrap() - .to_string(); - - for i in [100, 200, 300] { - let frames: Vec = FrameReader::new( - &file_path, - FrameWindowSplittingStrategy::Quadrupole( - QuadWindowExpansionStrategy::Uniform((i, i)), - ), - ) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); - - assert_eq!(frames.len(), 4 * i); - } -} diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 7132d48..85488c8 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,7 +1,9 @@ use std::path::Path; use timsrust::{ - io::readers::SpectrumReader, - io::readers::SpectrumReaderConfig, + io::readers::{ + FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, + SpectrumProcessingParams, SpectrumReader, SpectrumReaderConfig, + }, ms_data::{Precursor, Spectrum}, }; @@ -131,3 +133,66 @@ fn tdf_reader_dda() { assert_eq!(spectra[i], expected[i]); } } + +#[test] +fn test_dia_even() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + + for i in 1..3 { + let frames: Vec = SpectrumReader::new( + &file_path, + SpectrumReaderConfig { + frame_splitting_params: + FrameWindowSplittingStrategy::Quadrupole( + QuadWindowExpansionStrategy::Even(i), + ), + spectrum_processing_params: SpectrumProcessingParams::default(), + }, + ) + .get_all(); + + println!(">>>>> EVEN {:?}", frames.len()); + + // 4 frames, 2 windows in each, i splits/window + assert_eq!(frames.len(), 4 * 2 * i); + } +} + +#[test] +fn test_dia_uniform() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + + for i in [100, 200, 300] { + let frames: Vec = SpectrumReader::new( + &file_path, + SpectrumReaderConfig { + frame_splitting_params: FrameWindowSplittingStrategy::Window( + QuadWindowExpansionStrategy::Uniform((i, i)), + ), + spectrum_processing_params: SpectrumProcessingParams::default(), + }, + ) + .get_all(); + + println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); + for f in frames.iter() { + println!("{:?}", f.precursor); + } + + // Not all frames have scan windows from 0 to 709 ... so ... I need to think + // on how to express this in the test + // assert_eq!(frames.len(), 4 * ((709 / i) + 1)); + assert!(frames.len() > (709 / i)); + assert!(frames.len() < 3 * ((709 / i) + 1)); + } +} From cfa0574664d4229c1cd42fd5d73310dfbb48ee8b Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 25 Jul 2024 15:29:31 +0200 Subject: [PATCH 37/69] CHORE: resolve merge conflicts --- src/io/readers.rs | 2 + src/io/readers/frame_reader.rs | 10 +- src/io/readers/precursor_reader.rs | 18 +- src/io/readers/precursor_reader/tdf.rs | 37 +++- src/io/readers/precursor_reader/tdf/dia.rs | 36 ++-- src/io/readers/spectrum_reader.rs | 39 +++- src/io/readers/spectrum_reader/minitdf.rs | 2 +- src/io/readers/spectrum_reader/tdf.rs | 34 ++-- src/io/readers/spectrum_reader/tdf/dda.rs | 4 + src/io/readers/spectrum_reader/tdf/dia.rs | 40 ++-- .../spectrum_reader/tdf/raw_spectra.rs | 5 + src/io/readers/tdf_utils.rs | 173 ++++++++++++++++++ src/utils/vec_utils.rs | 4 +- tests/frame_readers.rs | 78 ++++++-- tests/spectrum_readers.rs | 76 +++++++- 15 files changed, 478 insertions(+), 80 deletions(-) create mode 100644 src/io/readers/tdf_utils.rs diff --git a/src/io/readers.rs b/src/io/readers.rs index 03d5248..7e350a5 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -4,9 +4,11 @@ mod metadata_reader; mod precursor_reader; mod quad_settings_reader; mod spectrum_reader; +mod tdf_utils; pub use frame_reader::*; pub use metadata_reader::*; pub use precursor_reader::*; pub use quad_settings_reader::*; pub use spectrum_reader::*; +pub use tdf_utils::QuadWindowExpansionStrategy; diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index af9b713..f4b0d48 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -19,7 +19,8 @@ use super::{ }, tdf_blob_reader::{TdfBlob, TdfBlobReader, TdfBlobReaderError}, }, - QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, + FrameWindowSplittingStrategy, QuadrupoleSettingsReader, + QuadrupoleSettingsReaderError, }; #[derive(Debug)] @@ -30,10 +31,14 @@ pub struct FrameReader { acquisition: AcquisitionType, window_groups: Vec, quadrupole_settings: Vec>, + pub splitting_strategy: FrameWindowSplittingStrategy, } impl FrameReader { - pub fn new(path: impl AsRef) -> Result { + pub fn new( + path: impl AsRef, + config: FrameWindowSplittingStrategy, + ) -> Result { let sql_path = find_extension(&path, "analysis.tdf").ok_or( FrameReaderError::FileNotFound("analysis.tdf".to_string()), )?; @@ -74,6 +79,7 @@ impl FrameReader { .into_iter() .map(|x| Arc::new(x)) .collect(), + splitting_strategy: config, }; Ok(reader) } diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index 5544dce..b755a25 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -9,6 +9,8 @@ use tdf::{TDFPrecursorReader, TDFPrecursorReaderError}; use crate::ms_data::Precursor; +use super::FrameWindowSplittingStrategy; + pub struct PrecursorReader { precursor_reader: Box, } @@ -20,11 +22,19 @@ impl fmt::Debug for PrecursorReader { } impl PrecursorReader { - pub fn new(path: impl AsRef) -> Result { + pub fn new( + path: impl AsRef, + config: Option, + ) -> Result { + let tmp = path.as_ref().extension().and_then(|e| e.to_str()); let precursor_reader: Box = - match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)?), - Some("tdf") => Box::new(TDFPrecursorReader::new(path)?), + match (tmp, config) { + (Some("parquet"), None) => { + Box::new(MiniTDFPrecursorReader::new(path)?) + }, + (Some("tdf"), strat) => { + Box::new(TDFPrecursorReader::new(path, strat)?) + }, _ => panic!(), }; let reader = Self { precursor_reader }; diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 8c02020..1795e4c 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -7,7 +7,10 @@ use dda::{DDATDFPrecursorReader, DDATDFPrecursorReaderError}; use dia::{DIATDFPrecursorReader, DIATDFPrecursorReaderError}; use crate::{ - io::readers::file_readers::sql_reader::{SqlError, SqlReader}, + io::readers::{ + file_readers::sql_reader::{SqlError, SqlReader}, + FrameWindowSplittingStrategy, + }, ms_data::{AcquisitionType, Precursor}, }; @@ -20,6 +23,7 @@ pub struct TDFPrecursorReader { impl TDFPrecursorReader { pub fn new( path: impl AsRef, + splitting_strategy: Option, ) -> Result { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path)?; @@ -33,17 +37,36 @@ impl TDFPrecursorReader { AcquisitionType::Unknown }; let precursor_reader: Box = - match acquisition_type { - AcquisitionType::DDAPASEF => { + match (acquisition_type, splitting_strategy) { + (AcquisitionType::DDAPASEF, None) => { Box::new(DDATDFPrecursorReader::new(path)?) }, - AcquisitionType::DIAPASEF => { - Box::new(DIATDFPrecursorReader::new(path)?) + ( + AcquisitionType::DDAPASEF, + Some(FrameWindowSplittingStrategy::None), + ) => { + // Not 100% sure when this happens ... + // By this I mean generating a Some(None) + // ./tests/frame_readers.rs:60:25 generates it. + // JSPP - 2024-Jul-16 + Box::new(DDATDFPrecursorReader::new(path)?) + }, + (AcquisitionType::DIAPASEF, Some(splitting_strat)) => { + Box::new(DIATDFPrecursorReader::new(path, splitting_strat)?) + }, + (AcquisitionType::DIAPASEF, None) => { + Box::new(DIATDFPrecursorReader::new( + path, + FrameWindowSplittingStrategy::None, + )?) }, - acquisition_type => { + (acq_type, acq_config) => { return Err( TDFPrecursorReaderError::UnsupportedAcquisition( - format!("{:?}", acquisition_type), + format!( + "{:?} + {:?}", + acquisition_type, acq_config + ), ), ) }, diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index 9531fdd..2b4dae0 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,5 +1,9 @@ use std::path::Path; +use crate::io::readers::tdf_utils::{ + expand_quadrupole_settings, expand_window_settings, +}; +use crate::io::readers::FrameWindowSplittingStrategy; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, @@ -26,6 +30,7 @@ pub struct DIATDFPrecursorReader { impl DIATDFPrecursorReader { pub fn new( path: impl AsRef, + splitting_strat: FrameWindowSplittingStrategy, ) -> Result { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path)?; @@ -35,23 +40,20 @@ impl DIATDFPrecursorReader { let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; let quadrupole_settings = QuadrupoleSettingsReader::new(tdf_sql_reader.get_path())?; - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - for sub_window in 0..group.isolation_mz.len() { - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![group.scan_starts[sub_window]], - scan_ends: vec![group.scan_ends[sub_window]], - isolation_mz: vec![group.isolation_mz[sub_window]], - isolation_width: vec![group.isolation_width[sub_window]], - collision_energy: vec![group.collision_energy[sub_window]], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } + let expanded_quadrupole_settings = match splitting_strat { + FrameWindowSplittingStrategy::None => quadrupole_settings, + FrameWindowSplittingStrategy::Quadrupole(x) => { + expand_quadrupole_settings( + &window_groups, + &quadrupole_settings, + &x, + ) + }, + FrameWindowSplittingStrategy::Window(x) => { + expand_window_settings(&window_groups, &quadrupole_settings, &x) + }, + }; + let reader = Self { expanded_quadrupole_settings, rt_converter, diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index d8851f9..b158afc 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -7,12 +7,44 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; use tdf::{TDFSpectrumReader, TDFSpectrumReaderError}; +use crate::io::readers::tdf_utils::QuadWindowExpansionStrategy; use crate::ms_data::Spectrum; pub struct SpectrumReader { spectrum_reader: Box, } +#[derive(Debug)] +pub struct SpectrumProcessingParams { + smoothing_window: u32, + centroiding_window: u32, + calibration_tolerance: f64, +} + +impl Default for SpectrumProcessingParams { + fn default() -> Self { + Self { + smoothing_window: 1, + centroiding_window: 1, + calibration_tolerance: 0.1, + } + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub enum FrameWindowSplittingStrategy { + #[default] + None, + Quadrupole(QuadWindowExpansionStrategy), + Window(QuadWindowExpansionStrategy), +} + +#[derive(Debug, Default)] +pub struct SpectrumReaderConfig { + pub spectrum_processing_params: SpectrumProcessingParams, + pub frame_splitting_params: FrameWindowSplittingStrategy, +} + impl fmt::Debug for SpectrumReader { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "SpectrumReader {{ /* fields omitted */ }}") @@ -20,11 +52,14 @@ impl fmt::Debug for SpectrumReader { } impl SpectrumReader { - pub fn new(path: impl AsRef) -> Result { + pub fn new( + path: impl AsRef, + config: SpectrumReaderConfig, + ) -> Result { let spectrum_reader: Box = match path.as_ref().extension().and_then(|e| e.to_str()) { Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)?), - Some("d") => Box::new(TDFSpectrumReader::new(path)?), + Some("d") => Box::new(TDFSpectrumReader::new(path, config)?), _ => panic!(), }; let reader = Self { spectrum_reader }; diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 9d3938b..5c24e77 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -36,7 +36,7 @@ impl MiniTDFSpectrumReader { .ok_or(MiniTDFSpectrumReaderError::FileNotFound( "analysis.tdf".to_string(), ))?; - let precursor_reader = PrecursorReader::new(&parquet_file_name)?; + let precursor_reader = PrecursorReader::new(&parquet_file_name, None)?; let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)? .iter() .map(|x| x.offset as usize) diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 559010d..2f18147 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -17,11 +17,7 @@ use crate::{ utils::find_extension, }; -use super::SpectrumReaderTrait; - -const SMOOTHING_WINDOW: u32 = 1; -const CENTROIDING_WINDOW: u32 = 1; -const CALIBRATION_TOLERANCE: f64 = 0.1; +use super::{SpectrumReaderConfig, SpectrumReaderTrait}; #[derive(Debug)] pub struct TDFSpectrumReader { @@ -29,20 +25,26 @@ pub struct TDFSpectrumReader { precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, raw_spectrum_reader: RawSpectrumReader, + config: SpectrumReaderConfig, } impl TDFSpectrumReader { pub fn new( path_name: impl AsRef, + config: SpectrumReaderConfig, ) -> Result { - let frame_reader: FrameReader = FrameReader::new(&path_name)?; + let frame_reader: FrameReader = + FrameReader::new(&path_name, config.frame_splitting_params)?; let sql_path = find_extension(&path_name, "analysis.tdf").ok_or( TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()), )?; let metadata = MetadataReader::new(&sql_path)?; let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path)?; - let precursor_reader = PrecursorReader::new(&sql_path)?; + let precursor_reader = PrecursorReader::new( + &sql_path, + Some(config.frame_splitting_params), + )?; let acquisition_type = frame_reader.get_acquisition(); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, @@ -54,6 +56,7 @@ impl TDFSpectrumReader { precursor_reader, mz_reader, raw_spectrum_reader, + config, }; Ok(reader) } @@ -61,8 +64,8 @@ impl TDFSpectrumReader { pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { let raw_spectrum = self.raw_spectrum_reader.get(index); raw_spectrum - .smooth(SMOOTHING_WINDOW) - .centroid(CENTROIDING_WINDOW) + .smooth(self.config.spectrum_processing_params.smoothing_window) + .centroid(self.config.spectrum_processing_params.centroiding_window) } } @@ -77,7 +80,11 @@ impl SpectrumReaderTrait for TDFSpectrumReader { } fn len(&self) -> usize { - self.precursor_reader.len() + debug_assert_eq!( + self.precursor_reader.len(), + self.raw_spectrum_reader.len() + ); + self.raw_spectrum_reader.len() } fn get_path(&self) -> PathBuf { @@ -94,7 +101,12 @@ impl SpectrumReaderTrait for TDFSpectrumReader { let mut result: Vec<(f64, u32)> = vec![]; for &tof_index in spectrum.tof_indices.iter() { let mz = self.mz_reader.convert(tof_index); - if (mz - precursor_mz).abs() < CALIBRATION_TOLERANCE { + if (mz - precursor_mz).abs() + < self + .config + .spectrum_processing_params + .calibration_tolerance + { let hit = (precursor_mz, tof_index); result.push(hit); } diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index be674ab..be27104 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -100,6 +100,10 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { }; raw_spectrum } + + fn len(&self) -> usize { + self.offsets.len() - 1 + } } #[derive(Debug, thiserror::Error)] diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 13e4a6a..5962aa0 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,3 +1,7 @@ +use crate::io::readers::tdf_utils::{ + expand_quadrupole_settings, expand_window_settings, +}; +use crate::io::readers::FrameWindowSplittingStrategy; use crate::{ io::readers::{ file_readers::sql_reader::{ @@ -25,23 +29,20 @@ impl DIARawSpectrumReader { let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; let quadrupole_settings = QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path())?; - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - for sub_window in 0..group.isolation_mz.len() { - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![group.scan_starts[sub_window]], - scan_ends: vec![group.scan_ends[sub_window]], - isolation_mz: vec![group.isolation_mz[sub_window]], - isolation_width: vec![group.isolation_width[sub_window]], - collision_energy: vec![group.collision_energy[sub_window]], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } + let expanded_quadrupole_settings = match frame_reader.splitting_strategy + { + FrameWindowSplittingStrategy::None => quadrupole_settings, + FrameWindowSplittingStrategy::Quadrupole(x) => { + expand_quadrupole_settings( + &window_groups, + &quadrupole_settings, + &x, + ) + }, + FrameWindowSplittingStrategy::Window(x) => { + expand_window_settings(&window_groups, &quadrupole_settings, &x) + }, + }; let reader = Self { expanded_quadrupole_settings, frame_reader, @@ -53,6 +54,7 @@ impl DIARawSpectrumReader { impl RawSpectrumReaderTrait for DIARawSpectrumReader { fn get(&self, index: usize) -> RawSpectrum { let quad_settings = &self.expanded_quadrupole_settings[index]; + let collision_energy = quad_settings.collision_energy[0]; let isolation_mz = quad_settings.isolation_mz[0]; let isolation_width = quad_settings.isolation_width[0]; @@ -78,6 +80,10 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { }; raw_spectrum } + + fn len(&self) -> usize { + self.expanded_quadrupole_settings.len() + } } #[derive(Debug, thiserror::Error)] diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index 8b78d65..d2239b0 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -118,10 +118,15 @@ impl RawSpectrumReader { pub fn get(&self, index: usize) -> RawSpectrum { self.raw_spectrum_reader.get(index) } + + pub fn len(&self) -> usize { + self.raw_spectrum_reader.len() + } } pub trait RawSpectrumReaderTrait: Sync { fn get(&self, index: usize) -> RawSpectrum; + fn len(&self) -> usize; } #[derive(Debug, thiserror::Error)] diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs new file mode 100644 index 0000000..4e59968 --- /dev/null +++ b/src/io/readers/tdf_utils.rs @@ -0,0 +1,173 @@ +use crate::io::readers::file_readers::sql_reader::frame_groups::SqlWindowGroup; +use crate::ms_data::QuadrupoleSettings; + +type SpanStep = (usize, usize); + +/// Strategy for expanding quadrupole settings +/// +/// This enum is used to determine how to expand quadrupole settings +/// when reading in DIA data. And exporting spectra (not frames RN). +/// +/// # Variants +/// +/// For example if we have a window with scan start 50 and end 500 +/// +/// * `None` - Do not expand quadrupole settings; use the original settings +/// * `Even(usize)` - Split the quadrupole settings into `usize` evenly spaced +/// subwindows; e.g. if `usize` is 2, the window will be split into 2 subwindows +/// of equal width. +/// * `Uniform(SpanStep)` - Split the quadrupole settings into subwindows of +/// width `SpanStep.0` and step `SpanStep.1`; e.g. if `SpanStep` is (100, 50), +/// the window will be split into subwindows of width 100 and step 50 between their +/// scan start and end. +/// +#[derive(Debug, Copy, Clone)] +pub enum QuadWindowExpansionStrategy { + None, + Even(usize), + Uniform(SpanStep), +} + +fn scan_range_subsplit( + start: usize, + end: usize, + strategy: &QuadWindowExpansionStrategy, +) -> Vec<(usize, usize)> { + let out = match strategy { + QuadWindowExpansionStrategy::None => { + vec![(start, end)] + }, + QuadWindowExpansionStrategy::Even(num_splits) => { + let sub_subwindow_width = (end - start) / (num_splits + 1); + let mut out = Vec::new(); + for sub_subwindow in 0..num_splits.clone() { + let sub_subwindow_scan_start = + start + (sub_subwindow_width * sub_subwindow); + let sub_subwindow_scan_end = + start + (sub_subwindow_width * (sub_subwindow + 2)); + + out.push((sub_subwindow_scan_start, sub_subwindow_scan_end)) + } + out + }, + QuadWindowExpansionStrategy::Uniform((span, step)) => { + let mut curr_start = start.clone(); + let mut curr_end = start + span; + let mut out = Vec::new(); + while curr_end < end { + out.push((curr_start, curr_end)); + curr_start += step; + curr_end += step; + } + if curr_start < end { + out.push((curr_start, end)); + } + out + }, + }; + + debug_assert!( + out.iter().all(|(s, e)| s < e), + "Invalid scan range: {:?}", + out + ); + debug_assert!( + out.iter().all(|(s, e)| *s >= start && *e <= end), + "Invalid scan range: {:?}", + out + ); + out +} + +pub fn expand_window_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + let window_group_start = + group.scan_starts.iter().min().unwrap().clone(); + let window_group_end = group.scan_ends.iter().max().unwrap().clone(); + + for (sws, swe) in + scan_range_subsplit(window_group_start, window_group_end, &strategy) + { + let mut mz_sum = 0.0; + let mut mz_min = std::f64::MAX; + let mut mz_max = std::f64::MIN; + let mut nce_sum = 0.0; + let mut num_added = 0; + + for i in 0..group.isolation_mz.len() { + // Should I be checking here for overlap instead of full containment? + if sws <= group.scan_starts[i] && swe >= group.scan_ends[i] { + mz_sum += group.isolation_mz[i]; + mz_min = mz_min.min( + group.isolation_mz[i] + - (group.isolation_width[i] / 2.0), + ); + mz_max = mz_max.max( + group.isolation_mz[i] + + (group.isolation_width[i] / 2.0), + ); + nce_sum += group.collision_energy[i]; + num_added += 1; + } + } + + let mz_mean = mz_sum / num_added as f64; + let mean_nce = nce_sum / num_added as f64; + + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![mz_mean], + isolation_width: vec![mz_min - mz_max], + collision_energy: vec![mean_nce], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + expanded_quadrupole_settings +} + +pub fn expand_quadrupole_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + // Read the 'NUM_SUB_SUB_SPLITS' from env variables ... default to 1 + // (for now) + + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + for sub_window in 0..group.isolation_mz.len() { + let subwindow_scan_start = group.scan_starts[sub_window]; + let subwindow_scan_end = group.scan_ends[sub_window]; + for (sws, swe) in scan_range_subsplit( + subwindow_scan_start, + subwindow_scan_end, + &strategy, + ) { + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![group.isolation_mz[sub_window]], + isolation_width: vec![group.isolation_width[sub_window]], + collision_energy: vec![group.collision_energy[sub_window]], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + } + expanded_quadrupole_settings +} diff --git a/src/utils/vec_utils.rs b/src/utils/vec_utils.rs index 724fc3c..3ee53c0 100644 --- a/src/utils/vec_utils.rs +++ b/src/utils/vec_utils.rs @@ -12,8 +12,8 @@ pub fn group_and_sum + Copy>( return (vec![], vec![]); } let order: Vec = argsort(&groups); - let mut new_groups: Vec = vec![]; - let mut new_values: Vec = vec![]; + let mut new_groups: Vec = Vec::with_capacity(order.len()); + let mut new_values: Vec = Vec::with_capacity(order.len()); let mut current_group: T = groups[order[0]]; let mut current_value: U = values[order[0]]; for &index in &order[1..] { diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index c67d7ab..400ebf7 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,6 +1,9 @@ use std::{path::Path, sync::Arc}; use timsrust::{ - io::readers::FrameReader, + io::readers::{ + FrameReader, FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, + SpectrumReaderConfig, + }, ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, }; @@ -18,12 +21,13 @@ fn tdf_reader_frames1() { .to_str() .unwrap() .to_string(); - let frames: Vec = FrameReader::new(&file_path) - .unwrap() - .get_all_ms1() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + let frames: Vec = + FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) + .unwrap() + .get_all_ms1() + .into_iter() + .map(|x| x.unwrap()) + .collect(); let expected: Vec = vec![ Frame { scan_offsets: vec![0, 1, 3, 6, 10], @@ -65,12 +69,13 @@ fn tdf_reader_frames2() { .to_str() .unwrap() .to_string(); - let frames: Vec = FrameReader::new(&file_path) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + let frames: Vec = + FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); let expected: Vec = vec![ // Frame::default(), Frame { @@ -104,4 +109,49 @@ fn tdf_reader_frames2() { } } -// TODO test for DIA +#[test] +fn tdf_reader_frames_dia() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + let frames: Vec = + FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + + assert_eq!(frames.len(), 4); + for i in 0..frames.len() { + assert_eq!(frames[i].scan_offsets.len(), 710); + assert_eq!(frames[i].scan_offsets[0], 0); + assert_eq!( + frames[i].scan_offsets.last().unwrap(), + &frames[i].intensities.len() + ); + assert_eq!(frames[i].tof_indices.len(), frames[i].intensities.len()); + } + assert_eq!(&frames[0].tof_indices[0], &251695u32); + assert_eq!(&frames[0].intensities[0], &503392u32); + assert_eq!(&frames[0].tof_indices.len(), &754376); + assert_eq!(&frames[0].intensities.len(), &754376); + + assert_eq!(&frames[1].tof_indices[0], &1006071u32); + assert_eq!(&frames[1].intensities[0], &2012144u32); + assert_eq!(&frames[1].tof_indices.len(), &1257057); + assert_eq!(&frames[1].intensities.len(), &1257057); + + assert_eq!(&frames[2].tof_indices[0], &4022866u32); + assert_eq!(&frames[2].intensities[0], &8045734u32); + assert_eq!(&frames[2].tof_indices.len(), &2262419); + assert_eq!(&frames[2].intensities.len(), &2262419); + + assert_eq!(&frames[3].tof_indices[0], &6285285u32); + assert_eq!(&frames[3].intensities[0], &12570572u32); + assert_eq!(&frames[3].tof_indices.len(), &2765100); + assert_eq!(&frames[3].intensities.len(), &2765100); +} diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 43ff4b8..0586059 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,6 +1,9 @@ use std::path::Path; use timsrust::{ - io::readers::SpectrumReader, + io::readers::{ + FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, + SpectrumProcessingParams, SpectrumReader, SpectrumReaderConfig, + }, ms_data::{Precursor, Spectrum}, }; @@ -19,7 +22,9 @@ fn minitdf_reader() { .unwrap() .to_string(); let spectra: Vec = - SpectrumReader::new(file_path).unwrap().get_all(); + SpectrumReader::new(file_path, SpectrumReaderConfig::default()) + .unwrap() + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -70,7 +75,9 @@ fn tdf_reader_dda() { .unwrap() .to_string(); let spectra: Vec = - SpectrumReader::new(file_path).unwrap().get_all(); + SpectrumReader::new(file_path, SpectrumReaderConfig::default()) + .unwrap() + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], @@ -128,3 +135,66 @@ fn tdf_reader_dda() { assert_eq!(spectra[i], expected[i]); } } + +#[test] +fn test_dia_even() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + + for i in 1..3 { + let frames: Vec = SpectrumReader::new( + &file_path, + SpectrumReaderConfig { + frame_splitting_params: + FrameWindowSplittingStrategy::Quadrupole( + QuadWindowExpansionStrategy::Even(i), + ), + spectrum_processing_params: SpectrumProcessingParams::default(), + }, + ) + .get_all(); + + println!(">>>>> EVEN {:?}", frames.len()); + + // 4 frames, 2 windows in each, i splits/window + assert_eq!(frames.len(), 4 * 2 * i); + } +} + +#[test] +fn test_dia_uniform() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + + for i in [100, 200, 300] { + let frames: Vec = SpectrumReader::new( + &file_path, + SpectrumReaderConfig { + frame_splitting_params: FrameWindowSplittingStrategy::Window( + QuadWindowExpansionStrategy::Uniform((i, i)), + ), + spectrum_processing_params: SpectrumProcessingParams::default(), + }, + ) + .get_all(); + + println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); + for f in frames.iter() { + println!("{:?}", f.precursor); + } + + // Not all frames have scan windows from 0 to 709 ... so ... I need to think + // on how to express this in the test + // assert_eq!(frames.len(), 4 * ((709 / i) + 1)); + assert!(frames.len() > (709 / i)); + assert!(frames.len() < 3 * ((709 / i) + 1)); + } +} From a5fd90d59013af5b04a5616f2e6271c137a4702f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 25 Jul 2024 15:30:00 +0200 Subject: [PATCH 38/69] FIX: resolve test and bench conflicts --- benches/speed_performance.rs | 29 ++++++++++++++++++++++------- tests/spectrum_readers.rs | 2 ++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index daab1cc..e3f1b5a 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -1,7 +1,10 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rayon::iter::ParallelIterator; use timsrust::{ - io::readers::{FrameReader, SpectrumReader}, + io::readers::{ + FrameReader, FrameWindowSplittingStrategy, SpectrumReader, + SpectrumReaderConfig, + }, ms_data::Frame, }; @@ -33,8 +36,12 @@ fn criterion_benchmark_dda(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; - let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); + let frame_reader = + FrameReader::new(d_folder_name, FrameWindowSplittingStrategy::None) + .unwrap(); + let spectrum_reader = + SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) + .unwrap(); group.bench_function("DDA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -55,8 +62,12 @@ fn criterion_benchmark_dia(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DIA_TEST; - let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); + let frame_reader = + FrameReader::new(d_folder_name, FrameWindowSplittingStrategy::None) + .unwrap(); + let spectrum_reader = + SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) + .unwrap(); group.bench_function("DIA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -74,8 +85,12 @@ fn criterion_benchmark_syp(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = SYP_TEST; - let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); + let frame_reader = + FrameReader::new(d_folder_name, FrameWindowSplittingStrategy::None) + .unwrap(); + let spectrum_reader = + SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) + .unwrap(); group.bench_function("SYP read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 0586059..4d44e31 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -156,6 +156,7 @@ fn test_dia_even() { spectrum_processing_params: SpectrumProcessingParams::default(), }, ) + .unwrap() .get_all(); println!(">>>>> EVEN {:?}", frames.len()); @@ -184,6 +185,7 @@ fn test_dia_uniform() { spectrum_processing_params: SpectrumProcessingParams::default(), }, ) + .unwrap() .get_all(); println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); From db6a217d7b75241703bd024154a32005a8fdf66b Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 25 Jul 2024 16:18:41 +0200 Subject: [PATCH 39/69] CHORE: compartmentalize how to build a spectrumreader --- src/io/readers/spectrum_reader.rs | 36 ++++++++++++++++++++++-- tests/spectrum_readers.rs | 46 ++++++++++++++++--------------- 2 files changed, 58 insertions(+), 24 deletions(-) diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index b158afc..8ba2c89 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -14,7 +14,7 @@ pub struct SpectrumReader { spectrum_reader: Box, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct SpectrumProcessingParams { smoothing_window: u32, centroiding_window: u32, @@ -39,7 +39,7 @@ pub enum FrameWindowSplittingStrategy { Window(QuadWindowExpansionStrategy), } -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub struct SpectrumReaderConfig { pub spectrum_processing_params: SpectrumProcessingParams, pub frame_splitting_params: FrameWindowSplittingStrategy, @@ -51,7 +51,39 @@ impl fmt::Debug for SpectrumReader { } } +#[derive(Debug, Default, Clone)] +pub struct SpectrumReaderBuilder { + path: PathBuf, + config: SpectrumReaderConfig, +} + +impl SpectrumReaderBuilder { + pub fn with_path(&self, path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + config: self.config.clone(), + } + } + + pub fn with_config(&self, config: SpectrumReaderConfig) -> Self { + Self { + path: self.path.clone(), + config: config, + } + } + + pub fn finalize(&self) -> Result { + let reader = + SpectrumReader::new(self.path.clone(), self.config.clone())?; + Ok(reader) + } +} + impl SpectrumReader { + pub fn build() -> SpectrumReaderBuilder { + SpectrumReaderBuilder::default() + } + pub fn new( path: impl AsRef, config: SpectrumReaderConfig, diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 4d44e31..e931b78 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -21,10 +21,11 @@ fn minitdf_reader() { .to_str() .unwrap() .to_string(); - let spectra: Vec = - SpectrumReader::new(file_path, SpectrumReaderConfig::default()) - .unwrap() - .get_all(); + let spectra: Vec = SpectrumReader::build() + .with_path(file_path) + .finalize() + .unwrap() + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -74,10 +75,11 @@ fn tdf_reader_dda() { .to_str() .unwrap() .to_string(); - let spectra: Vec = - SpectrumReader::new(file_path, SpectrumReaderConfig::default()) - .unwrap() - .get_all(); + let spectra: Vec = SpectrumReader::build() + .with_path(file_path) + .finalize() + .unwrap() + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], @@ -146,18 +148,18 @@ fn test_dia_even() { .to_string(); for i in 1..3 { - let frames: Vec = SpectrumReader::new( - &file_path, - SpectrumReaderConfig { + let frames: Vec = SpectrumReader::build() + .with_path(&file_path) + .with_config(SpectrumReaderConfig { frame_splitting_params: FrameWindowSplittingStrategy::Quadrupole( QuadWindowExpansionStrategy::Even(i), ), spectrum_processing_params: SpectrumProcessingParams::default(), - }, - ) - .unwrap() - .get_all(); + }) + .finalize() + .unwrap() + .get_all(); println!(">>>>> EVEN {:?}", frames.len()); @@ -176,17 +178,17 @@ fn test_dia_uniform() { .to_string(); for i in [100, 200, 300] { - let frames: Vec = SpectrumReader::new( - &file_path, - SpectrumReaderConfig { + let frames: Vec = SpectrumReader::build() + .with_path(&file_path) + .with_config(SpectrumReaderConfig { frame_splitting_params: FrameWindowSplittingStrategy::Window( QuadWindowExpansionStrategy::Uniform((i, i)), ), spectrum_processing_params: SpectrumProcessingParams::default(), - }, - ) - .unwrap() - .get_all(); + }) + .finalize() + .unwrap() + .get_all(); println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); for f in frames.iter() { From c0039d8a50ce353c75eedb93389165953fd39552 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 25 Jul 2024 16:24:37 +0200 Subject: [PATCH 40/69] FEAT: simplified frame reader creation --- src/io/readers/frame_reader.rs | 14 +++++--- src/io/readers/precursor_reader/tdf.rs | 5 +-- src/io/readers/spectrum_reader/tdf.rs | 4 +-- tests/frame_readers.rs | 44 +++++++++++--------------- 4 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index f4b0d48..a5d09a0 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -35,10 +35,7 @@ pub struct FrameReader { } impl FrameReader { - pub fn new( - path: impl AsRef, - config: FrameWindowSplittingStrategy, - ) -> Result { + pub fn new(path: impl AsRef) -> Result { let sql_path = find_extension(&path, "analysis.tdf").ok_or( FrameReaderError::FileNotFound("analysis.tdf".to_string()), )?; @@ -79,11 +76,18 @@ impl FrameReader { .into_iter() .map(|x| Arc::new(x)) .collect(), - splitting_strategy: config, + splitting_strategy: FrameWindowSplittingStrategy::default(), }; Ok(reader) } + pub fn set_splitting_strategy( + &mut self, + config: &FrameWindowSplittingStrategy, + ) { + self.splitting_strategy = *config; + } + pub fn parallel_filter<'a, F: Fn(&SqlFrame) -> bool + Sync + Send + 'a>( &'a self, predicate: F, diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 1795e4c..901c320 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -63,10 +63,7 @@ impl TDFPrecursorReader { (acq_type, acq_config) => { return Err( TDFPrecursorReaderError::UnsupportedAcquisition( - format!( - "{:?} + {:?}", - acquisition_type, acq_config - ), + format!("{:?} + {:?}", acq_type, acq_config), ), ) }, diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 2f18147..b569f33 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -33,8 +33,8 @@ impl TDFSpectrumReader { path_name: impl AsRef, config: SpectrumReaderConfig, ) -> Result { - let frame_reader: FrameReader = - FrameReader::new(&path_name, config.frame_splitting_params)?; + let mut frame_reader: FrameReader = FrameReader::new(&path_name)?; + frame_reader.set_splitting_strategy(&config.frame_splitting_params); let sql_path = find_extension(&path_name, "analysis.tdf").ok_or( TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()), )?; diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 400ebf7..b6fa001 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,9 +1,6 @@ use std::{path::Path, sync::Arc}; use timsrust::{ - io::readers::{ - FrameReader, FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, - SpectrumReaderConfig, - }, + io::readers::FrameReader, ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, }; @@ -21,13 +18,12 @@ fn tdf_reader_frames1() { .to_str() .unwrap() .to_string(); - let frames: Vec = - FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) - .unwrap() - .get_all_ms1() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms1() + .into_iter() + .map(|x| x.unwrap()) + .collect(); let expected: Vec = vec![ Frame { scan_offsets: vec![0, 1, 3, 6, 10], @@ -69,13 +65,12 @@ fn tdf_reader_frames2() { .to_str() .unwrap() .to_string(); - let frames: Vec = - FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); let expected: Vec = vec![ // Frame::default(), Frame { @@ -117,13 +112,12 @@ fn tdf_reader_frames_dia() { .to_str() .unwrap() .to_string(); - let frames: Vec = - FrameReader::new(&file_path, FrameWindowSplittingStrategy::default()) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); assert_eq!(frames.len(), 4); for i in 0..frames.len() { From e5a69f1d076cc9646b17c55ae5e2b3d82fcf7827 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 25 Jul 2024 16:26:10 +0200 Subject: [PATCH 41/69] FIX: rebuild bench tests --- benches/speed_performance.rs | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index e3f1b5a..e1e6533 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -1,11 +1,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rayon::iter::ParallelIterator; -use timsrust::{ - io::readers::{ - FrameReader, FrameWindowSplittingStrategy, SpectrumReader, - SpectrumReaderConfig, - }, - ms_data::Frame, +use timsrust::io::readers::{ + FrameReader, SpectrumReader, SpectrumReaderConfig, }; const DDA_TEST: &str = @@ -36,9 +32,7 @@ fn criterion_benchmark_dda(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; - let frame_reader = - FrameReader::new(d_folder_name, FrameWindowSplittingStrategy::None) - .unwrap(); + let frame_reader = FrameReader::new(d_folder_name).unwrap(); let spectrum_reader = SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) .unwrap(); @@ -62,9 +56,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DIA_TEST; - let frame_reader = - FrameReader::new(d_folder_name, FrameWindowSplittingStrategy::None) - .unwrap(); + let frame_reader = FrameReader::new(d_folder_name).unwrap(); let spectrum_reader = SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) .unwrap(); @@ -85,9 +77,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) { let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = SYP_TEST; - let frame_reader = - FrameReader::new(d_folder_name, FrameWindowSplittingStrategy::None) - .unwrap(); + let frame_reader = FrameReader::new(d_folder_name).unwrap(); let spectrum_reader = SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) .unwrap(); From e53b95807433ef1a6059f3282272d0c2932f4692 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 11:17:34 +0200 Subject: [PATCH 42/69] FEAT: implemented domain inverters --- src/domain_converters.rs | 2 ++ src/domain_converters/frame_to_rt.rs | 16 ++++++++++++++++ src/domain_converters/scan_to_im.rs | 9 +++++++-- src/domain_converters/tof_to_mz.rs | 8 ++++++-- 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/domain_converters.rs b/src/domain_converters.rs index 11c1387..e38bbf4 100644 --- a/src/domain_converters.rs +++ b/src/domain_converters.rs @@ -10,4 +10,6 @@ pub use tof_to_mz::Tof2MzConverter; /// Convert from one domain (e.g. Time of Flight) to another (m/z). pub trait ConvertableDomain { fn convert + Copy>(&self, value: T) -> f64; + + fn invert + Copy>(&self, value: T) -> f64; } diff --git a/src/domain_converters/frame_to_rt.rs b/src/domain_converters/frame_to_rt.rs index eb7d1d1..090b8b4 100644 --- a/src/domain_converters/frame_to_rt.rs +++ b/src/domain_converters/frame_to_rt.rs @@ -16,4 +16,20 @@ impl super::ConvertableDomain for Frame2RtConverter { let upper_value: f64 = self.rt_values[value.into().ceil() as usize]; (lower_value + upper_value) / 2. } + fn invert + Copy>(&self, value: T) -> f64 { + let rt_value = value.into(); + match self.rt_values.binary_search_by(|probe| { + probe.partial_cmp(&rt_value).expect("Cannot handle NaNs") + }) { + Ok(index) => index as f64, + Err(index) => match index { + _ if (index > 0) && (index < self.rt_values.len()) => { + let start = self.rt_values[index - 1]; + let end = self.rt_values[index]; + index as f64 + (rt_value - start) / (end - start) + }, + _ => index as f64, + }, + } + } } diff --git a/src/domain_converters/scan_to_im.rs b/src/domain_converters/scan_to_im.rs index e7390ff..2a9d8bd 100644 --- a/src/domain_converters/scan_to_im.rs +++ b/src/domain_converters/scan_to_im.rs @@ -22,7 +22,12 @@ impl Scan2ImConverter { impl super::ConvertableDomain for Scan2ImConverter { fn convert + Copy>(&self, value: T) -> f64 { - let scan_index_f64: f64 = value.into(); - self.scan_intercept + self.scan_slope * scan_index_f64 + let scan_index: f64 = value.into(); + self.scan_intercept + self.scan_slope * scan_index + } + + fn invert + Copy>(&self, value: T) -> f64 { + let im_value: f64 = value.into(); + (im_value - self.scan_intercept) / self.scan_slope } } diff --git a/src/domain_converters/tof_to_mz.rs b/src/domain_converters/tof_to_mz.rs index c9a3abc..c42078b 100644 --- a/src/domain_converters/tof_to_mz.rs +++ b/src/domain_converters/tof_to_mz.rs @@ -36,7 +36,11 @@ impl Tof2MzConverter { impl super::ConvertableDomain for Tof2MzConverter { fn convert + Copy>(&self, value: T) -> f64 { - let tof_index_f64: f64 = value.into(); - (self.tof_intercept + self.tof_slope * tof_index_f64).powi(2) + let tof_index: f64 = value.into(); + (self.tof_intercept + self.tof_slope * tof_index).powi(2) + } + fn invert + Copy>(&self, value: T) -> f64 { + let mz_value: f64 = value.into(); + (mz_value.sqrt() - self.tof_intercept) / self.tof_slope } } From b5879eb92a4d092f81adefe2ea9d8b8326a21d43 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 11:27:09 +0200 Subject: [PATCH 43/69] DOCS: add future plans to readme --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 83300ee..1283485 100644 --- a/README.md +++ b/README.md @@ -43,3 +43,12 @@ Two file formats are supported: ## Python bindings The [timsrust_pyo3](https://github.com/jspaezp/timsrust_pyo3) package is an example of how the performance of TimsRust can be utilized in Python + +## Planned changes for future versions + +* Improve docs +* Improve tests +* Pase CompressionType1 +* Error propagation for SpectrumReader(Trait).get +* Make Path of TimsTOF data into special type +* ... From 64f1cad8f837ecc5003436b6fe397e0bf3650b31 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 11:29:52 +0200 Subject: [PATCH 44/69] FEAT: MAde spectrum reader more user friendly to create and set proper defaults for dia data --- src/io/readers/precursor_reader.rs | 13 +++------ src/io/readers/precursor_reader/tdf.rs | 32 ++++++---------------- src/io/readers/precursor_reader/tdf/dia.rs | 1 - src/io/readers/spectrum_reader.rs | 10 +++++-- src/io/readers/spectrum_reader/minitdf.rs | 7 +++-- src/io/readers/spectrum_reader/tdf.rs | 6 ++-- src/io/readers/spectrum_reader/tdf/dia.rs | 1 - 7 files changed, 26 insertions(+), 44 deletions(-) diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index b755a25..b5e4aef 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -24,17 +24,12 @@ impl fmt::Debug for PrecursorReader { impl PrecursorReader { pub fn new( path: impl AsRef, - config: Option, + config: FrameWindowSplittingStrategy, ) -> Result { - let tmp = path.as_ref().extension().and_then(|e| e.to_str()); let precursor_reader: Box = - match (tmp, config) { - (Some("parquet"), None) => { - Box::new(MiniTDFPrecursorReader::new(path)?) - }, - (Some("tdf"), strat) => { - Box::new(TDFPrecursorReader::new(path, strat)?) - }, + match path.as_ref().extension().and_then(|e| e.to_str()) { + Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)?), + Some("tdf") => Box::new(TDFPrecursorReader::new(path, config)?), _ => panic!(), }; let reader = Self { precursor_reader }; diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 901c320..2f92a78 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -23,7 +23,7 @@ pub struct TDFPrecursorReader { impl TDFPrecursorReader { pub fn new( path: impl AsRef, - splitting_strategy: Option, + splitting_strategy: FrameWindowSplittingStrategy, ) -> Result { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path)?; @@ -37,33 +37,17 @@ impl TDFPrecursorReader { AcquisitionType::Unknown }; let precursor_reader: Box = - match (acquisition_type, splitting_strategy) { - (AcquisitionType::DDAPASEF, None) => { + match acquisition_type { + AcquisitionType::DDAPASEF => { Box::new(DDATDFPrecursorReader::new(path)?) }, - ( - AcquisitionType::DDAPASEF, - Some(FrameWindowSplittingStrategy::None), - ) => { - // Not 100% sure when this happens ... - // By this I mean generating a Some(None) - // ./tests/frame_readers.rs:60:25 generates it. - // JSPP - 2024-Jul-16 - Box::new(DDATDFPrecursorReader::new(path)?) - }, - (AcquisitionType::DIAPASEF, Some(splitting_strat)) => { - Box::new(DIATDFPrecursorReader::new(path, splitting_strat)?) - }, - (AcquisitionType::DIAPASEF, None) => { - Box::new(DIATDFPrecursorReader::new( - path, - FrameWindowSplittingStrategy::None, - )?) - }, - (acq_type, acq_config) => { + AcquisitionType::DIAPASEF => Box::new( + DIATDFPrecursorReader::new(path, splitting_strategy)?, + ), + acquisition_type => { return Err( TDFPrecursorReaderError::UnsupportedAcquisition( - format!("{:?} + {:?}", acq_type, acq_config), + format!("{:?}", acquisition_type), ), ) }, diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index 2b4dae0..c7b23df 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -41,7 +41,6 @@ impl DIATDFPrecursorReader { let quadrupole_settings = QuadrupoleSettingsReader::new(tdf_sql_reader.get_path())?; let expanded_quadrupole_settings = match splitting_strat { - FrameWindowSplittingStrategy::None => quadrupole_settings, FrameWindowSplittingStrategy::Quadrupole(x) => { expand_quadrupole_settings( &window_groups, diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 8ba2c89..0920e52 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -31,14 +31,18 @@ impl Default for SpectrumProcessingParams { } } -#[derive(Debug, Clone, Copy, Default)] +#[derive(Debug, Clone, Copy)] pub enum FrameWindowSplittingStrategy { - #[default] - None, Quadrupole(QuadWindowExpansionStrategy), Window(QuadWindowExpansionStrategy), } +impl Default for FrameWindowSplittingStrategy { + fn default() -> Self { + Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) + } +} + #[derive(Debug, Default, Clone)] pub struct SpectrumReaderConfig { pub spectrum_processing_params: SpectrumProcessingParams, diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 5c24e77..16f5b39 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -18,7 +18,7 @@ use crate::{ utils::find_extension, }; -use super::SpectrumReaderTrait; +use super::{FrameWindowSplittingStrategy, SpectrumReaderTrait}; #[derive(Debug)] pub struct MiniTDFSpectrumReader { @@ -36,7 +36,10 @@ impl MiniTDFSpectrumReader { .ok_or(MiniTDFSpectrumReaderError::FileNotFound( "analysis.tdf".to_string(), ))?; - let precursor_reader = PrecursorReader::new(&parquet_file_name, None)?; + let precursor_reader = PrecursorReader::new( + &parquet_file_name, + FrameWindowSplittingStrategy::default(), + )?; let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)? .iter() .map(|x| x.offset as usize) diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index b569f33..36437bd 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -41,10 +41,8 @@ impl TDFSpectrumReader { let metadata = MetadataReader::new(&sql_path)?; let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path)?; - let precursor_reader = PrecursorReader::new( - &sql_path, - Some(config.frame_splitting_params), - )?; + let precursor_reader = + PrecursorReader::new(&sql_path, config.frame_splitting_params)?; let acquisition_type = frame_reader.get_acquisition(); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 5962aa0..1f0c9d0 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -31,7 +31,6 @@ impl DIARawSpectrumReader { QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path())?; let expanded_quadrupole_settings = match frame_reader.splitting_strategy { - FrameWindowSplittingStrategy::None => quadrupole_settings, FrameWindowSplittingStrategy::Quadrupole(x) => { expand_quadrupole_settings( &window_groups, From 1f950232eca5f91fa70e978079eb05c156f36e5b Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 11:55:08 +0200 Subject: [PATCH 45/69] FEAT: simplified spectrumreader new and build --- benches/speed_performance.rs | 12 +-- src/io/readers/spectrum_reader.rs | 150 +++++++++++++++--------------- 2 files changed, 78 insertions(+), 84 deletions(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index e1e6533..3beeeac 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -33,9 +33,7 @@ fn criterion_benchmark_dda(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = - SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) - .unwrap(); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DDA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -57,9 +55,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DIA_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = - SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) - .unwrap(); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DIA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -78,9 +74,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) { group.significance_level(0.001).sample_size(10); let d_folder_name: &str = SYP_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = - SpectrumReader::new(d_folder_name, SpectrumReaderConfig::default()) - .unwrap(); + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("SYP read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 0920e52..d7e8f34 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -14,92 +14,19 @@ pub struct SpectrumReader { spectrum_reader: Box, } -#[derive(Debug, Clone)] -pub struct SpectrumProcessingParams { - smoothing_window: u32, - centroiding_window: u32, - calibration_tolerance: f64, -} - -impl Default for SpectrumProcessingParams { - fn default() -> Self { - Self { - smoothing_window: 1, - centroiding_window: 1, - calibration_tolerance: 0.1, - } - } -} - -#[derive(Debug, Clone, Copy)] -pub enum FrameWindowSplittingStrategy { - Quadrupole(QuadWindowExpansionStrategy), - Window(QuadWindowExpansionStrategy), -} - -impl Default for FrameWindowSplittingStrategy { - fn default() -> Self { - Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) - } -} - -#[derive(Debug, Default, Clone)] -pub struct SpectrumReaderConfig { - pub spectrum_processing_params: SpectrumProcessingParams, - pub frame_splitting_params: FrameWindowSplittingStrategy, -} - impl fmt::Debug for SpectrumReader { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "SpectrumReader {{ /* fields omitted */ }}") } } -#[derive(Debug, Default, Clone)] -pub struct SpectrumReaderBuilder { - path: PathBuf, - config: SpectrumReaderConfig, -} - -impl SpectrumReaderBuilder { - pub fn with_path(&self, path: impl AsRef) -> Self { - Self { - path: path.as_ref().to_path_buf(), - config: self.config.clone(), - } - } - - pub fn with_config(&self, config: SpectrumReaderConfig) -> Self { - Self { - path: self.path.clone(), - config: config, - } - } - - pub fn finalize(&self) -> Result { - let reader = - SpectrumReader::new(self.path.clone(), self.config.clone())?; - Ok(reader) - } -} - impl SpectrumReader { pub fn build() -> SpectrumReaderBuilder { SpectrumReaderBuilder::default() } - pub fn new( - path: impl AsRef, - config: SpectrumReaderConfig, - ) -> Result { - let spectrum_reader: Box = - match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)?), - Some("d") => Box::new(TDFSpectrumReader::new(path, config)?), - _ => panic!(), - }; - let reader = Self { spectrum_reader }; - Ok(reader) + pub fn new(path: impl AsRef) -> Result { + Ok(Self::build().with_path(path).finalize()?) } pub fn get(&self, index: usize) -> Spectrum { @@ -128,6 +55,44 @@ impl SpectrumReader { } } +#[derive(Debug, Default, Clone)] +pub struct SpectrumReaderBuilder { + path: PathBuf, + config: SpectrumReaderConfig, +} + +impl SpectrumReaderBuilder { + pub fn with_path(&self, path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + ..self.clone() + } + } + + pub fn with_config(&self, config: SpectrumReaderConfig) -> Self { + Self { + config: config, + ..self.clone() + } + } + + pub fn finalize(&self) -> Result { + let spectrum_reader: Box = + match self.path.extension().and_then(|e| e.to_str()) { + Some("ms2") => { + Box::new(MiniTDFSpectrumReader::new(self.path.clone())?) + }, + Some("d") => Box::new(TDFSpectrumReader::new( + self.path.clone(), + self.config.clone(), + )?), + _ => panic!(), + }; + let reader = SpectrumReader { spectrum_reader }; + Ok(reader) + } +} + trait SpectrumReaderTrait: Sync { fn get(&self, index: usize) -> Spectrum; fn get_path(&self) -> PathBuf; @@ -142,3 +107,38 @@ pub enum SpectrumReaderError { #[error("{0}")] TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), } + +#[derive(Debug, Clone)] +pub struct SpectrumProcessingParams { + smoothing_window: u32, + centroiding_window: u32, + calibration_tolerance: f64, +} + +impl Default for SpectrumProcessingParams { + fn default() -> Self { + Self { + smoothing_window: 1, + centroiding_window: 1, + calibration_tolerance: 0.1, + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum FrameWindowSplittingStrategy { + Quadrupole(QuadWindowExpansionStrategy), + Window(QuadWindowExpansionStrategy), +} + +impl Default for FrameWindowSplittingStrategy { + fn default() -> Self { + Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) + } +} + +#[derive(Debug, Default, Clone)] +pub struct SpectrumReaderConfig { + pub spectrum_processing_params: SpectrumProcessingParams, + pub frame_splitting_params: FrameWindowSplittingStrategy, +} From 9c75d2a22034215824352a8262f4191f12a644ae Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 12:12:24 +0200 Subject: [PATCH 46/69] FEAT: cleaned up precursorreader --- src/io/readers/precursor_reader.rs | 64 ++++++++++++++++++----- src/io/readers/spectrum_reader/minitdf.rs | 9 ++-- src/io/readers/spectrum_reader/tdf.rs | 6 ++- 3 files changed, 59 insertions(+), 20 deletions(-) diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index b5e4aef..4cb3357 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -2,7 +2,7 @@ mod minitdf; mod tdf; use core::fmt; -use std::path::Path; +use std::path::{Path, PathBuf}; use minitdf::{MiniTDFPrecursorReader, MiniTDFPrecursorReaderError}; use tdf::{TDFPrecursorReader, TDFPrecursorReaderError}; @@ -22,18 +22,12 @@ impl fmt::Debug for PrecursorReader { } impl PrecursorReader { - pub fn new( - path: impl AsRef, - config: FrameWindowSplittingStrategy, - ) -> Result { - let precursor_reader: Box = - match path.as_ref().extension().and_then(|e| e.to_str()) { - Some("parquet") => Box::new(MiniTDFPrecursorReader::new(path)?), - Some("tdf") => Box::new(TDFPrecursorReader::new(path, config)?), - _ => panic!(), - }; - let reader = Self { precursor_reader }; - Ok(reader) + pub fn build() -> PrecursorReaderBuilder { + PrecursorReaderBuilder::default() + } + + pub fn new(path: impl AsRef) -> Result { + Ok(Self::build().with_path(path).finalize()?) } pub fn get(&self, index: usize) -> Option { @@ -45,6 +39,48 @@ impl PrecursorReader { } } +#[derive(Debug, Default, Clone)] +pub struct PrecursorReaderBuilder { + path: PathBuf, + config: FrameWindowSplittingStrategy, +} + +impl PrecursorReaderBuilder { + pub fn with_path(&self, path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + ..self.clone() + } + } + + pub fn with_config(&self, config: FrameWindowSplittingStrategy) -> Self { + Self { + config: config, + ..self.clone() + } + } + + pub fn finalize(&self) -> Result { + let precursor_reader: Box = + match self.path.extension().and_then(|e| e.to_str()) { + Some("parquet") => { + Box::new(MiniTDFPrecursorReader::new(self.path.clone())?) + }, + Some("tdf") => Box::new(TDFPrecursorReader::new( + self.path.clone(), + self.config.clone(), + )?), + _ => { + return Err(PrecursorReaderError::PrecursorReaderFileError( + self.path.clone(), + )) + }, + }; + let reader = PrecursorReader { precursor_reader }; + Ok(reader) + } +} + trait PrecursorReaderTrait: Sync { fn get(&self, index: usize) -> Option; fn len(&self) -> usize; @@ -56,4 +92,6 @@ pub enum PrecursorReaderError { MiniTDFPrecursorReaderError(#[from] MiniTDFPrecursorReaderError), #[error("{0}")] TDFPrecursorReaderError(#[from] TDFPrecursorReaderError), + #[error("File {0} not valid")] + PrecursorReaderFileError(PathBuf), } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 16f5b39..6cbccaf 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -18,7 +18,7 @@ use crate::{ utils::find_extension, }; -use super::{FrameWindowSplittingStrategy, SpectrumReaderTrait}; +use super::SpectrumReaderTrait; #[derive(Debug)] pub struct MiniTDFSpectrumReader { @@ -36,10 +36,9 @@ impl MiniTDFSpectrumReader { .ok_or(MiniTDFSpectrumReaderError::FileNotFound( "analysis.tdf".to_string(), ))?; - let precursor_reader = PrecursorReader::new( - &parquet_file_name, - FrameWindowSplittingStrategy::default(), - )?; + let precursor_reader = PrecursorReader::build() + .with_path(&parquet_file_name) + .finalize()?; let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)? .iter() .map(|x| x.offset as usize) diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 36437bd..ac56eff 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -41,8 +41,10 @@ impl TDFSpectrumReader { let metadata = MetadataReader::new(&sql_path)?; let mz_reader: Tof2MzConverter = metadata.mz_converter; let tdf_sql_reader = SqlReader::open(&sql_path)?; - let precursor_reader = - PrecursorReader::new(&sql_path, config.frame_splitting_params)?; + let precursor_reader = PrecursorReader::build() + .with_path(&sql_path) + .with_config(config.frame_splitting_params) + .finalize()?; let acquisition_type = frame_reader.get_acquisition(); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, From a404bcd0937973d52cb591c5d3874797359be3a2 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 17:10:05 +0200 Subject: [PATCH 47/69] FEAT: propagating spectrum reader errrors (pt1) --- src/io/readers/spectrum_reader.rs | 21 ++++++++++------ src/io/readers/spectrum_reader/minitdf.rs | 6 ++--- src/io/readers/spectrum_reader/tdf.rs | 25 ++++++++++++------- src/io/readers/spectrum_reader/tdf/dda.rs | 8 +++--- src/io/readers/spectrum_reader/tdf/dia.rs | 8 +++--- .../spectrum_reader/tdf/raw_spectra.rs | 7 ++++-- tests/spectrum_readers.rs | 20 ++++++++++++--- 7 files changed, 64 insertions(+), 31 deletions(-) diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index d7e8f34..d0c4249 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -26,10 +26,10 @@ impl SpectrumReader { } pub fn new(path: impl AsRef) -> Result { - Ok(Self::build().with_path(path).finalize()?) + Self::build().with_path(path).finalize() } - pub fn get(&self, index: usize) -> Spectrum { + pub fn get(&self, index: usize) -> Result { self.spectrum_reader.get(index) } @@ -41,12 +41,13 @@ impl SpectrumReader { self.spectrum_reader.len() } - pub fn get_all(&self) -> Vec { - let mut spectra: Vec = (0..self.len()) + pub fn get_all(&self) -> Vec> { + let mut spectra: Vec> = (0..self + .len()) .into_par_iter() .map(|index| self.get(index)) .collect(); - spectra.sort_by_key(|x| x.precursor.unwrap().index); + spectra.sort_by_key(|x| x.as_ref().unwrap().precursor.unwrap().index); spectra } @@ -86,7 +87,11 @@ impl SpectrumReaderBuilder { self.path.clone(), self.config.clone(), )?), - _ => panic!(), + _ => { + return Err(SpectrumReaderError::SpectrumReaderFileError( + self.path.clone(), + )) + }, }; let reader = SpectrumReader { spectrum_reader }; Ok(reader) @@ -94,7 +99,7 @@ impl SpectrumReaderBuilder { } trait SpectrumReaderTrait: Sync { - fn get(&self, index: usize) -> Spectrum; + fn get(&self, index: usize) -> Result; fn get_path(&self) -> PathBuf; fn len(&self) -> usize; fn calibrate(&mut self); @@ -106,6 +111,8 @@ pub enum SpectrumReaderError { MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError), #[error("{0}")] TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), + #[error("File {0} not valid")] + SpectrumReaderFileError(PathBuf), } #[derive(Debug, Clone)] diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 6cbccaf..03adce4 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -18,7 +18,7 @@ use crate::{ utils::find_extension, }; -use super::SpectrumReaderTrait; +use super::{SpectrumReaderError, SpectrumReaderTrait}; #[derive(Debug)] pub struct MiniTDFSpectrumReader { @@ -65,7 +65,7 @@ impl MiniTDFSpectrumReader { } impl SpectrumReaderTrait for MiniTDFSpectrumReader { - fn get(&self, index: usize) -> Spectrum { + fn get(&self, index: usize) -> Result { let mut spectrum = Spectrum::default(); spectrum.index = index; let blob = self.blob_reader.get(index).unwrap(); @@ -98,7 +98,7 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { } else { 2.0 + (precursor.mz - 700.0) / 100.0 }; //FIX? - spectrum + Ok(spectrum) } fn len(&self) -> usize { diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index ac56eff..9ee78c5 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -17,7 +17,7 @@ use crate::{ utils::find_extension, }; -use super::{SpectrumReaderConfig, SpectrumReaderTrait}; +use super::{SpectrumReaderConfig, SpectrumReaderError, SpectrumReaderTrait}; #[derive(Debug)] pub struct TDFSpectrumReader { @@ -61,22 +61,29 @@ impl TDFSpectrumReader { Ok(reader) } - pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum { - let raw_spectrum = self.raw_spectrum_reader.get(index); - raw_spectrum + pub fn read_single_raw_spectrum( + &self, + index: usize, + ) -> Result { + let raw_spectrum = self + .raw_spectrum_reader + .get(index)? .smooth(self.config.spectrum_processing_params.smoothing_window) - .centroid(self.config.spectrum_processing_params.centroiding_window) + .centroid( + self.config.spectrum_processing_params.centroiding_window, + ); + Ok(raw_spectrum) } } impl SpectrumReaderTrait for TDFSpectrumReader { - fn get(&self, index: usize) -> Spectrum { - let raw_spectrum = self.read_single_raw_spectrum(index); + fn get(&self, index: usize) -> Result { + let raw_spectrum = self.read_single_raw_spectrum(index).unwrap(); let spectrum = raw_spectrum.finalize( self.precursor_reader.get(index).unwrap(), &self.mz_reader, ); - spectrum + Ok(spectrum) } fn len(&self) -> usize { @@ -95,7 +102,7 @@ impl SpectrumReaderTrait for TDFSpectrumReader { let hits: Vec<(f64, u32)> = (0..self.precursor_reader.len()) .into_par_iter() .map(|index| { - let spectrum = self.read_single_raw_spectrum(index); + let spectrum = self.read_single_raw_spectrum(index).unwrap(); let precursor = self.precursor_reader.get(index).unwrap(); let precursor_mz: f64 = precursor.mz; let mut result: Vec<(f64, u32)> = vec![]; diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index be27104..15cb08f 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -9,7 +9,9 @@ use crate::{ utils::vec_utils::{argsort, group_and_sum}, }; -use super::raw_spectra::{RawSpectrum, RawSpectrumReaderTrait}; +use super::raw_spectra::{ + RawSpectrum, RawSpectrumReaderError, RawSpectrumReaderTrait, +}; #[derive(Debug)] pub struct DDARawSpectrumReader { @@ -60,7 +62,7 @@ impl DDARawSpectrumReader { } impl RawSpectrumReaderTrait for DDARawSpectrumReader { - fn get(&self, index: usize) -> RawSpectrum { + fn get(&self, index: usize) -> Result { let mut collision_energy = 0.0; let mut isolation_mz = 0.0; let mut isolation_width = 0.0; @@ -98,7 +100,7 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { isolation_mz, isolation_width, }; - raw_spectrum + Ok(raw_spectrum) } fn len(&self) -> usize { diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 1f0c9d0..2934442 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -13,7 +13,9 @@ use crate::{ utils::vec_utils::group_and_sum, }; -use super::raw_spectra::{RawSpectrum, RawSpectrumReaderTrait}; +use super::raw_spectra::{ + RawSpectrum, RawSpectrumReaderError, RawSpectrumReaderTrait, +}; #[derive(Debug)] pub struct DIARawSpectrumReader { @@ -51,7 +53,7 @@ impl DIARawSpectrumReader { } impl RawSpectrumReaderTrait for DIARawSpectrumReader { - fn get(&self, index: usize) -> RawSpectrum { + fn get(&self, index: usize) -> Result { let quad_settings = &self.expanded_quadrupole_settings[index]; let collision_energy = quad_settings.collision_energy[0]; @@ -77,7 +79,7 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { isolation_mz, isolation_width, }; - raw_spectrum + Ok(raw_spectrum) } fn len(&self) -> usize { diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index d2239b0..ac7e441 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -115,7 +115,10 @@ impl RawSpectrumReader { Ok(reader) } - pub fn get(&self, index: usize) -> RawSpectrum { + pub fn get( + &self, + index: usize, + ) -> Result { self.raw_spectrum_reader.get(index) } @@ -125,7 +128,7 @@ impl RawSpectrumReader { } pub trait RawSpectrumReaderTrait: Sync { - fn get(&self, index: usize) -> RawSpectrum; + fn get(&self, index: usize) -> Result; fn len(&self) -> usize; } diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index e931b78..32ae31a 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -25,7 +25,10 @@ fn minitdf_reader() { .with_path(file_path) .finalize() .unwrap() - .get_all(); + .get_all() + .into_iter() + .map(|x| x.unwrap()) + .collect(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -79,7 +82,10 @@ fn tdf_reader_dda() { .with_path(file_path) .finalize() .unwrap() - .get_all(); + .get_all() + .into_iter() + .map(|x| x.unwrap()) + .collect(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], @@ -159,7 +165,10 @@ fn test_dia_even() { }) .finalize() .unwrap() - .get_all(); + .get_all() + .into_iter() + .map(|x| x.unwrap()) + .collect(); println!(">>>>> EVEN {:?}", frames.len()); @@ -188,7 +197,10 @@ fn test_dia_uniform() { }) .finalize() .unwrap() - .get_all(); + .get_all() + .into_iter() + .map(|x| x.unwrap()) + .collect(); println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); for f in frames.iter() { From 7f4ab57d54521e91e56cc81f8e6107e0ef0fc8d0 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 26 Jul 2024 17:41:39 +0200 Subject: [PATCH 48/69] FIX: More error propagation for spectrumreaders --- src/io/readers/spectrum_reader/minitdf.rs | 22 +++++++++++++++++----- src/io/readers/spectrum_reader/tdf.rs | 18 +++++++++++++----- src/io/readers/spectrum_reader/tdf/dda.rs | 19 ++++++++++++++----- src/io/readers/spectrum_reader/tdf/dia.rs | 19 ++++++++++++++----- 4 files changed, 58 insertions(+), 20 deletions(-) diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 03adce4..80246b3 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -62,13 +62,14 @@ impl MiniTDFSpectrumReader { }; Ok(reader) } -} -impl SpectrumReaderTrait for MiniTDFSpectrumReader { - fn get(&self, index: usize) -> Result { + fn _get( + &self, + index: usize, + ) -> Result { let mut spectrum = Spectrum::default(); spectrum.index = index; - let blob = self.blob_reader.get(index).unwrap(); + let blob = self.blob_reader.get(index)?; if !blob.is_empty() { let size: usize = blob.len(); let spectrum_data: Vec = @@ -86,7 +87,10 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { intensity_values.iter().map(|&x| x as f64).collect(); spectrum.mz_values = mz_values.to_vec(); } - let precursor = self.precursor_reader.get(index).unwrap(); + let precursor = self + .precursor_reader + .get(index) + .ok_or(MiniTDFSpectrumReaderError::NoPrecursor)?; spectrum.precursor = Some(precursor); spectrum.index = precursor.index; spectrum.collision_energy = self.collision_energies[index]; @@ -100,6 +104,12 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { }; //FIX? Ok(spectrum) } +} + +impl SpectrumReaderTrait for MiniTDFSpectrumReader { + fn get(&self, index: usize) -> Result { + Ok(self._get(index)?) + } fn len(&self) -> usize { self.precursor_reader.len() @@ -124,4 +134,6 @@ pub enum MiniTDFSpectrumReaderError { IndexedTdfBlobReaderError(#[from] IndexedTdfBlobReaderError), #[error("{0}")] FileNotFound(String), + #[error("No precursor")] + NoPrecursor, } diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 9ee78c5..06cffb7 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -74,17 +74,23 @@ impl TDFSpectrumReader { ); Ok(raw_spectrum) } -} -impl SpectrumReaderTrait for TDFSpectrumReader { - fn get(&self, index: usize) -> Result { - let raw_spectrum = self.read_single_raw_spectrum(index).unwrap(); + fn _get(&self, index: usize) -> Result { + let raw_spectrum = self.read_single_raw_spectrum(index)?; let spectrum = raw_spectrum.finalize( - self.precursor_reader.get(index).unwrap(), + self.precursor_reader + .get(index) + .ok_or(TDFSpectrumReaderError::NoPrecursor)?, &self.mz_reader, ); Ok(spectrum) } +} + +impl SpectrumReaderTrait for TDFSpectrumReader { + fn get(&self, index: usize) -> Result { + Ok(self._get(index)?) + } fn len(&self) -> usize { debug_assert_eq!( @@ -144,4 +150,6 @@ pub enum TDFSpectrumReaderError { RawSpectrumReaderError(#[from] RawSpectrumReaderError), #[error("{0}")] FileNotFound(String), + #[error("No precursor")] + NoPrecursor, } diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index 15cb08f..2434192 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -4,7 +4,7 @@ use crate::{ pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlError, SqlReader, }, - FrameReader, + FrameReader, FrameReaderError, }, utils::vec_utils::{argsort, group_and_sum}, }; @@ -59,10 +59,11 @@ impl DDARawSpectrumReader { .iter() .map(|&x| &self.pasef_frames[x]) } -} -impl RawSpectrumReaderTrait for DDARawSpectrumReader { - fn get(&self, index: usize) -> Result { + fn _get( + &self, + index: usize, + ) -> Result { let mut collision_energy = 0.0; let mut isolation_mz = 0.0; let mut isolation_width = 0.0; @@ -73,7 +74,7 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { isolation_mz = pasef_frame.isolation_mz; isolation_width = pasef_frame.isolation_width; let frame_index: usize = pasef_frame.frame - 1; - let frame = self.frame_reader.get(frame_index).unwrap(); + let frame = self.frame_reader.get(frame_index)?; if frame.intensities.len() == 0 { continue; } @@ -102,6 +103,12 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { }; Ok(raw_spectrum) } +} + +impl RawSpectrumReaderTrait for DDARawSpectrumReader { + fn get(&self, index: usize) -> Result { + Ok(self._get(index)?) + } fn len(&self) -> usize { self.offsets.len() - 1 @@ -112,4 +119,6 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { pub enum DDARawSpectrumReaderError { #[error("{0}")] SqlError(#[from] SqlError), + #[error("{0}")] + FrameReaderError(#[from] FrameReaderError), } diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 2934442..7fd9ded 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,7 +1,7 @@ use crate::io::readers::tdf_utils::{ expand_quadrupole_settings, expand_window_settings, }; -use crate::io::readers::FrameWindowSplittingStrategy; +use crate::io::readers::{FrameReaderError, FrameWindowSplittingStrategy}; use crate::{ io::readers::{ file_readers::sql_reader::{ @@ -50,10 +50,11 @@ impl DIARawSpectrumReader { }; Ok(reader) } -} -impl RawSpectrumReaderTrait for DIARawSpectrumReader { - fn get(&self, index: usize) -> Result { + fn _get( + &self, + index: usize, + ) -> Result { let quad_settings = &self.expanded_quadrupole_settings[index]; let collision_energy = quad_settings.collision_energy[0]; @@ -62,7 +63,7 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { let scan_start = quad_settings.scan_starts[0]; let scan_end = quad_settings.scan_ends[0]; let frame_index = quad_settings.index - 1; - let frame = self.frame_reader.get(frame_index).unwrap(); + let frame = self.frame_reader.get(frame_index)?; let offset_start = frame.scan_offsets[scan_start] as usize; let offset_end = frame.scan_offsets[scan_end] as usize; let tof_indices = &frame.tof_indices[offset_start..offset_end]; @@ -81,6 +82,12 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { }; Ok(raw_spectrum) } +} + +impl RawSpectrumReaderTrait for DIARawSpectrumReader { + fn get(&self, index: usize) -> Result { + Ok(self._get(index)?) + } fn len(&self) -> usize { self.expanded_quadrupole_settings.len() @@ -93,4 +100,6 @@ pub enum DIARawSpectrumReaderError { SqlError(#[from] SqlError), #[error("{0}")] QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), + #[error("{0}")] + FrameReaderError(#[from] FrameReaderError), } From ed57539fef9a3a281c1bfc8c368df01c7926f267 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 29 Jul 2024 13:17:25 +0200 Subject: [PATCH 49/69] FIX: More error propagation for SpectrumReaders --- src/io/readers/spectrum_reader.rs | 11 +++++------ src/io/readers/spectrum_reader/minitdf.rs | 9 +++++++-- tests/spectrum_readers.rs | 16 ++++------------ 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index d0c4249..f0b1d6a 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -41,14 +41,13 @@ impl SpectrumReader { self.spectrum_reader.len() } - pub fn get_all(&self) -> Vec> { - let mut spectra: Vec> = (0..self - .len()) + pub fn get_all(&self) -> Result, SpectrumReaderError> { + let mut spectra: Vec = (0..self.len()) .into_par_iter() .map(|index| self.get(index)) - .collect(); - spectra.sort_by_key(|x| x.as_ref().unwrap().precursor.unwrap().index); - spectra + .collect::, _>>()?; + spectra.sort_by_key(|x| x.precursor.unwrap_or_default().index); + Ok(spectra) } pub fn calibrate(&mut self) { diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 80246b3..e5cc23c 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -72,8 +72,11 @@ impl MiniTDFSpectrumReader { let blob = self.blob_reader.get(index)?; if !blob.is_empty() { let size: usize = blob.len(); - let spectrum_data: Vec = - (0..size).map(|i| blob.get(i).unwrap()).collect(); + let spectrum_data: Vec = (0..size) + .map(|i| { + blob.get(i).ok_or(MiniTDFSpectrumReaderError::BlobError) + }) + .collect::, _>>()?; let scan_count: usize = blob.len() / 3; let tof_indices_bytes: &[u32] = &spectrum_data[..scan_count as usize * 2]; @@ -136,4 +139,6 @@ pub enum MiniTDFSpectrumReaderError { FileNotFound(String), #[error("No precursor")] NoPrecursor, + #[error("BlobError")] + BlobError, } diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 32ae31a..8d6966e 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -26,9 +26,7 @@ fn minitdf_reader() { .finalize() .unwrap() .get_all() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + .unwrap(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -83,9 +81,7 @@ fn tdf_reader_dda() { .finalize() .unwrap() .get_all() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + .unwrap(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], @@ -166,9 +162,7 @@ fn test_dia_even() { .finalize() .unwrap() .get_all() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + .unwrap(); println!(">>>>> EVEN {:?}", frames.len()); @@ -198,9 +192,7 @@ fn test_dia_uniform() { .finalize() .unwrap() .get_all() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + .unwrap(); println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); for f in frames.iter() { From bea23c250799d22fd37584ab58f5d85b3bd14849 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 29 Jul 2024 13:17:53 +0200 Subject: [PATCH 50/69] CHORE: flagged todo items for error propagation --- src/io/readers/spectrum_reader/tdf.rs | 1 + src/io/readers/tdf_utils.rs | 4 ++-- src/io/writers/mgf.rs | 1 + src/ms_data/spectra.rs | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 06cffb7..0891b8c 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -108,6 +108,7 @@ impl SpectrumReaderTrait for TDFSpectrumReader { let hits: Vec<(f64, u32)> = (0..self.precursor_reader.len()) .into_par_iter() .map(|index| { + // TODO let spectrum = self.read_single_raw_spectrum(index).unwrap(); let precursor = self.precursor_reader.get(index).unwrap(); let precursor_mz: f64 = precursor.mz; diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs index 4e59968..5ea7064 100644 --- a/src/io/readers/tdf_utils.rs +++ b/src/io/readers/tdf_utils.rs @@ -90,8 +90,8 @@ pub fn expand_window_settings( let frame = window_group.frame; let group = &quadrupole_settings[window as usize - 1]; let window_group_start = - group.scan_starts.iter().min().unwrap().clone(); - let window_group_end = group.scan_ends.iter().max().unwrap().clone(); + group.scan_starts.iter().min().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds + let window_group_end = group.scan_ends.iter().max().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds for (sws, swe) in scan_range_subsplit(window_group_start, window_group_end, &strategy) diff --git a/src/io/writers/mgf.rs b/src/io/writers/mgf.rs index 0ad5ef0..715a5ed 100644 --- a/src/io/writers/mgf.rs +++ b/src/io/writers/mgf.rs @@ -30,6 +30,7 @@ pub struct MGFEntry; impl MGFEntry { pub fn write_header(spectrum: &Spectrum) -> String { + // TODO let precursor = spectrum.precursor.unwrap(); let title = precursor.index; let intensity = precursor.intensity.unwrap_or(0.0); diff --git a/src/ms_data/spectra.rs b/src/ms_data/spectra.rs index 7ffb9f7..36e2c33 100644 --- a/src/ms_data/spectra.rs +++ b/src/ms_data/spectra.rs @@ -17,6 +17,7 @@ impl Spectrum { let top_n = if n == 0 { self.len() } else { n }; let mut indexed: Vec<(f64, usize)> = self.intensities.iter().cloned().zip(0..).collect(); + // TODO indexed.sort_by(|a, b| { b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal) }); From 3fbfca37fc7925fb87340160190378a9e34415a7 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 29 Jul 2024 13:24:10 +0200 Subject: [PATCH 51/69] FEAT: added calibration option to spectrumreaderconfig --- src/io/readers/spectrum_reader.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index f0b1d6a..53bfa1f 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -92,7 +92,10 @@ impl SpectrumReaderBuilder { )) }, }; - let reader = SpectrumReader { spectrum_reader }; + let mut reader = SpectrumReader { spectrum_reader }; + if self.config.spectrum_processing_params.calibrate { + reader.calibrate(); + } Ok(reader) } } @@ -119,6 +122,7 @@ pub struct SpectrumProcessingParams { smoothing_window: u32, centroiding_window: u32, calibration_tolerance: f64, + calibrate: bool, } impl Default for SpectrumProcessingParams { @@ -127,6 +131,7 @@ impl Default for SpectrumProcessingParams { smoothing_window: 1, centroiding_window: 1, calibration_tolerance: 0.1, + calibrate: false, } } } From b4dc3bacdc4e21760dd2bfd4dc111f6c2bc9fb21 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 29 Jul 2024 13:24:31 +0200 Subject: [PATCH 52/69] FEAT: improved error options --- src/errors.rs | 45 ++++++++------------------ src/io/readers/quad_settings_reader.rs | 4 --- 2 files changed, 14 insertions(+), 35 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index 82ffef8..f8c713b 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,36 +1,19 @@ -#[derive(thiserror::Error, Debug)] -pub enum FileFormatError { - #[error("DirectoryDoesNotExist")] - DirectoryDoesNotExist, - #[error("NoParentWithBrukerExtension")] - NoParentWithBrukerExtension, - #[error("BinaryFilesAreMissing")] - BinaryFilesAreMissing, - #[error("MetadataFilesAreMissing")] - MetadataFilesAreMissing, -} +use crate::io::readers::{ + FrameReaderError, MetadataReaderError, PrecursorReaderError, + QuadrupoleSettingsReaderError, SpectrumReaderError, +}; /// An error that is produced by timsrust (uses [thiserror]). #[derive(thiserror::Error, Debug)] pub enum Error { - /// An error to indicate a path is not a Bruker File Format. - #[error("FileFormatError: {0}")] - FileFormatError(#[from] FileFormatError), - // #[error("SqlError: {0}")] - // SqlError(#[from] SqlError), - // #[error("BinError: {0}")] - // BinError(#[from] TdfBlobError), -} - -#[macro_export] -macro_rules! propagated_error_enum { - ($name:ident, $($variant:ident),+) => { - #[derive(Debug, thiserror::Error)] - pub enum $name { - $( - #[error(transparent)] - $variant(#[from] $variant), - )+ - } - }; + #[error("{0}")] + FrameReaderError(#[from] FrameReaderError), + #[error("{0}")] + SpectrumReaderError(#[from] SpectrumReaderError), + #[error("{0}")] + MetadataReaderError(#[from] MetadataReaderError), + #[error("{0}")] + PrecursorReaderError(#[from] PrecursorReaderError), + #[error("{0}")] + QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), } diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index 8a62d3c..8f71932 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -86,10 +86,6 @@ impl QuadrupoleSettingsReader { #[derive(Debug, thiserror::Error)] pub enum QuadrupoleSettingsReaderError { - // #[error("{0}")] - // MiniTDFPrecursorReaderError(#[from] MiniTDFPrecursorReaderError), - // #[error("{0}")] - // TDFPrecursorReaderError(#[from] TDFPrecursorReaderError), #[error("{0}")] SqlError(#[from] SqlError), } From d9e98c12dfa73925971cfda5e58696c80703ba89 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 29 Jul 2024 16:52:29 +0200 Subject: [PATCH 53/69] FEAT: moved tdf_utils to quadrupole_reader and cleaned up --- src/io/readers.rs | 2 - src/io/readers/frame_reader.rs | 12 +- src/io/readers/precursor_reader.rs | 2 +- src/io/readers/precursor_reader/tdf.rs | 2 +- src/io/readers/precursor_reader/tdf/dia.rs | 32 +-- src/io/readers/quad_settings_reader.rs | 203 +++++++++++++++++- src/io/readers/spectrum_reader.rs | 15 +- src/io/readers/spectrum_reader/tdf.rs | 4 +- src/io/readers/spectrum_reader/tdf/dia.rs | 32 +-- .../spectrum_reader/tdf/raw_spectra.rs | 16 +- src/io/readers/tdf_utils.rs | 173 --------------- src/ms_data/quadrupole.rs | 6 + 12 files changed, 244 insertions(+), 255 deletions(-) delete mode 100644 src/io/readers/tdf_utils.rs diff --git a/src/io/readers.rs b/src/io/readers.rs index 7e350a5..03d5248 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -4,11 +4,9 @@ mod metadata_reader; mod precursor_reader; mod quad_settings_reader; mod spectrum_reader; -mod tdf_utils; pub use frame_reader::*; pub use metadata_reader::*; pub use precursor_reader::*; pub use quad_settings_reader::*; pub use spectrum_reader::*; -pub use tdf_utils::QuadWindowExpansionStrategy; diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index a5d09a0..af9b713 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -19,8 +19,7 @@ use super::{ }, tdf_blob_reader::{TdfBlob, TdfBlobReader, TdfBlobReaderError}, }, - FrameWindowSplittingStrategy, QuadrupoleSettingsReader, - QuadrupoleSettingsReaderError, + QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }; #[derive(Debug)] @@ -31,7 +30,6 @@ pub struct FrameReader { acquisition: AcquisitionType, window_groups: Vec, quadrupole_settings: Vec>, - pub splitting_strategy: FrameWindowSplittingStrategy, } impl FrameReader { @@ -76,18 +74,10 @@ impl FrameReader { .into_iter() .map(|x| Arc::new(x)) .collect(), - splitting_strategy: FrameWindowSplittingStrategy::default(), }; Ok(reader) } - pub fn set_splitting_strategy( - &mut self, - config: &FrameWindowSplittingStrategy, - ) { - self.splitting_strategy = *config; - } - pub fn parallel_filter<'a, F: Fn(&SqlFrame) -> bool + Sync + Send + 'a>( &'a self, predicate: F, diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index c91d615..e4750c5 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -9,7 +9,7 @@ use tdf::{TDFPrecursorReader, TDFPrecursorReaderError}; use crate::ms_data::Precursor; -use super::FrameWindowSplittingStrategy; +use super::quad_settings_reader::FrameWindowSplittingStrategy; pub struct PrecursorReader { precursor_reader: Box, diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 2f92a78..34efef1 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -9,7 +9,7 @@ use dia::{DIATDFPrecursorReader, DIATDFPrecursorReaderError}; use crate::{ io::readers::{ file_readers::sql_reader::{SqlError, SqlReader}, - FrameWindowSplittingStrategy, + quad_settings_reader::FrameWindowSplittingStrategy, }, ms_data::{AcquisitionType, Precursor}, }; diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index c7b23df..9eacaa7 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,17 +1,12 @@ use std::path::Path; -use crate::io::readers::tdf_utils::{ - expand_quadrupole_settings, expand_window_settings, -}; -use crate::io::readers::FrameWindowSplittingStrategy; +use crate::io::readers::quad_settings_reader::FrameWindowSplittingStrategy; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, }, io::readers::{ - file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, ReadableSqlTable, SqlError, SqlReader, - }, + file_readers::sql_reader::{SqlError, SqlReader}, MetadataReader, MetadataReaderError, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }, @@ -30,29 +25,18 @@ pub struct DIATDFPrecursorReader { impl DIATDFPrecursorReader { pub fn new( path: impl AsRef, - splitting_strat: FrameWindowSplittingStrategy, + splitting_strategy: FrameWindowSplittingStrategy, ) -> Result { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path)?; let metadata = MetadataReader::new(&path)?; let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; - let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; - let quadrupole_settings = - QuadrupoleSettingsReader::new(tdf_sql_reader.get_path())?; - let expanded_quadrupole_settings = match splitting_strat { - FrameWindowSplittingStrategy::Quadrupole(x) => { - expand_quadrupole_settings( - &window_groups, - &quadrupole_settings, - &x, - ) - }, - FrameWindowSplittingStrategy::Window(x) => { - expand_window_settings(&window_groups, &quadrupole_settings, &x) - }, - }; - + let expanded_quadrupole_settings = + QuadrupoleSettingsReader::from_splitting( + tdf_sql_reader.get_path(), + splitting_strategy, + )?; let reader = Self { expanded_quadrupole_settings, rt_converter, diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index 8f71932..6f5f398 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -3,7 +3,8 @@ use std::path::Path; use crate::{ms_data::QuadrupoleSettings, utils::vec_utils::argsort}; use super::file_readers::sql_reader::{ - quad_settings::SqlQuadSettings, ReadableSqlTable, SqlError, SqlReader, + frame_groups::SqlWindowGroup, quad_settings::SqlQuadSettings, + ReadableSqlTable, SqlError, SqlReader, }; pub struct QuadrupoleSettingsReader { @@ -17,6 +18,12 @@ impl QuadrupoleSettingsReader { ) -> Result, QuadrupoleSettingsReaderError> { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(&sql_path)?; + Self::from_sql_settings(&tdf_sql_reader) + } + + pub fn from_sql_settings( + tdf_sql_reader: &SqlReader, + ) -> Result, QuadrupoleSettingsReaderError> { let sql_quadrupole_settings = SqlQuadSettings::from_sql_reader(&tdf_sql_reader)?; let window_group_count = sql_quadrupole_settings @@ -40,6 +47,29 @@ impl QuadrupoleSettingsReader { Ok(quad_reader.quadrupole_settings) } + pub fn from_splitting( + path: impl AsRef, + splitting_strat: FrameWindowSplittingStrategy, + ) -> Result, QuadrupoleSettingsReaderError> { + let sql_path = path.as_ref(); + let tdf_sql_reader = SqlReader::open(&sql_path)?; + let quadrupole_settings = Self::from_sql_settings(&tdf_sql_reader)?; + let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; + let expanded_quadrupole_settings = match splitting_strat { + FrameWindowSplittingStrategy::Quadrupole(x) => { + expand_quadrupole_settings( + &window_groups, + &quadrupole_settings, + &x, + ) + }, + FrameWindowSplittingStrategy::Window(x) => { + expand_window_settings(&window_groups, &quadrupole_settings, &x) + }, + }; + Ok(expanded_quadrupole_settings) + } + fn update_from_sql_quadrupole_settings(&mut self) { for window_group in self.sql_quadrupole_settings.iter() { let group = window_group.window_group - 1; @@ -89,3 +119,174 @@ pub enum QuadrupoleSettingsReaderError { #[error("{0}")] SqlError(#[from] SqlError), } + +type SpanStep = (usize, usize); + +/// Strategy for expanding quadrupole settings +/// +/// This enum is used to determine how to expand quadrupole settings +/// when reading in DIA data. And exporting spectra (not frames RN). +/// +/// # Variants +/// +/// For example if we have a window with scan start 50 and end 500 +/// +/// * `None` - Do not expand quadrupole settings; use the original settings +/// * `Even(usize)` - Split the quadrupole settings into `usize` evenly spaced +/// subwindows; e.g. if `usize` is 2, the window will be split into 2 subwindows +/// of equal width. +/// * `Uniform(SpanStep)` - Split the quadrupole settings into subwindows of +/// width `SpanStep.0` and step `SpanStep.1`; e.g. if `SpanStep` is (100, 50), +/// the window will be split into subwindows of width 100 and step 50 between their +/// scan start and end. +/// +#[derive(Debug, Copy, Clone)] +pub enum QuadWindowExpansionStrategy { + None, + Even(usize), + Uniform(SpanStep), +} + +#[derive(Debug, Clone, Copy)] +pub enum FrameWindowSplittingStrategy { + Quadrupole(QuadWindowExpansionStrategy), + Window(QuadWindowExpansionStrategy), +} + +impl Default for FrameWindowSplittingStrategy { + fn default() -> Self { + Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) + } +} + +fn scan_range_subsplit( + start: usize, + end: usize, + strategy: &QuadWindowExpansionStrategy, +) -> Vec<(usize, usize)> { + let out = match strategy { + QuadWindowExpansionStrategy::None => { + vec![(start, end)] + }, + QuadWindowExpansionStrategy::Even(num_splits) => { + let sub_subwindow_width = (end - start) / (num_splits + 1); + let mut out = Vec::new(); + for sub_subwindow in 0..num_splits.clone() { + let sub_subwindow_scan_start = + start + (sub_subwindow_width * sub_subwindow); + let sub_subwindow_scan_end = + start + (sub_subwindow_width * (sub_subwindow + 2)); + + out.push((sub_subwindow_scan_start, sub_subwindow_scan_end)) + } + out + }, + QuadWindowExpansionStrategy::Uniform((span, step)) => { + let mut curr_start = start.clone(); + let mut curr_end = start + span; + let mut out = Vec::new(); + while curr_end < end { + out.push((curr_start, curr_end)); + curr_start += step; + curr_end += step; + } + if curr_start < end { + out.push((curr_start, end)); + } + out + }, + }; + + debug_assert!( + out.iter().all(|(s, e)| s < e), + "Invalid scan range: {:?}", + out + ); + debug_assert!( + out.iter().all(|(s, e)| *s >= start && *e <= end), + "Invalid scan range: {:?}", + out + ); + out +} + +fn expand_window_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + let window_group_start = + group.scan_starts.iter().min().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds + let window_group_end = group.scan_ends.iter().max().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds + for (sws, swe) in + scan_range_subsplit(window_group_start, window_group_end, &strategy) + { + let mut mz_min = std::f64::MAX; + let mut mz_max = std::f64::MIN; + let mut nce_sum = 0.0; + let mut total_scan_width = 0.0; + for i in 0..group.len() { + let gss = group.scan_starts[i]; + let gse = group.scan_ends[i]; + if (swe <= gse) || (gss <= sws) { + continue; + } + let half_isolation_width = group.isolation_width[i] / 2.0; + let isolation_mz = group.isolation_mz[i]; + mz_min = mz_min.min(isolation_mz - half_isolation_width); + mz_max = mz_max.max(isolation_mz + half_isolation_width); + let scan_width = (gse.min(swe) - gss.max(sws)) as f64; + nce_sum += group.collision_energy[i] * scan_width; + total_scan_width += scan_width + } + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![(mz_min + mz_max) / 2.0], + isolation_width: vec![mz_min - mz_max], + collision_energy: vec![nce_sum / total_scan_width], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + expanded_quadrupole_settings +} + +fn expand_quadrupole_settings( + window_groups: &[SqlWindowGroup], + quadrupole_settings: &[QuadrupoleSettings], + strategy: &QuadWindowExpansionStrategy, +) -> Vec { + let mut expanded_quadrupole_settings: Vec = vec![]; + for window_group in window_groups { + let window = window_group.window_group; + let frame = window_group.frame; + let group = &quadrupole_settings[window as usize - 1]; + for sub_window in 0..group.isolation_mz.len() { + let subwindow_scan_start = group.scan_starts[sub_window]; + let subwindow_scan_end = group.scan_ends[sub_window]; + for (sws, swe) in scan_range_subsplit( + subwindow_scan_start, + subwindow_scan_end, + &strategy, + ) { + let sub_quad_settings = QuadrupoleSettings { + index: frame, + scan_starts: vec![sws], + scan_ends: vec![swe], + isolation_mz: vec![group.isolation_mz[sub_window]], + isolation_width: vec![group.isolation_width[sub_window]], + collision_energy: vec![group.collision_energy[sub_window]], + }; + expanded_quadrupole_settings.push(sub_quad_settings) + } + } + } + expanded_quadrupole_settings +} diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 53bfa1f..ef5f070 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -7,9 +7,10 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; use tdf::{TDFSpectrumReader, TDFSpectrumReaderError}; -use crate::io::readers::tdf_utils::QuadWindowExpansionStrategy; use crate::ms_data::Spectrum; +use super::FrameWindowSplittingStrategy; + pub struct SpectrumReader { spectrum_reader: Box, } @@ -136,18 +137,6 @@ impl Default for SpectrumProcessingParams { } } -#[derive(Debug, Clone, Copy)] -pub enum FrameWindowSplittingStrategy { - Quadrupole(QuadWindowExpansionStrategy), - Window(QuadWindowExpansionStrategy), -} - -impl Default for FrameWindowSplittingStrategy { - fn default() -> Self { - Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) - } -} - #[derive(Debug, Default, Clone)] pub struct SpectrumReaderConfig { pub spectrum_processing_params: SpectrumProcessingParams, diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 0891b8c..7f29748 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -33,8 +33,7 @@ impl TDFSpectrumReader { path_name: impl AsRef, config: SpectrumReaderConfig, ) -> Result { - let mut frame_reader: FrameReader = FrameReader::new(&path_name)?; - frame_reader.set_splitting_strategy(&config.frame_splitting_params); + let frame_reader: FrameReader = FrameReader::new(&path_name)?; let sql_path = find_extension(&path_name, "analysis.tdf").ok_or( TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()), )?; @@ -50,6 +49,7 @@ impl TDFSpectrumReader { &tdf_sql_reader, frame_reader, acquisition_type, + config.frame_splitting_params, )?; let reader = Self { path: path_name.as_ref().to_path_buf(), diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 7fd9ded..3313acf 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,12 +1,8 @@ -use crate::io::readers::tdf_utils::{ - expand_quadrupole_settings, expand_window_settings, -}; -use crate::io::readers::{FrameReaderError, FrameWindowSplittingStrategy}; +use crate::io::readers::quad_settings_reader::FrameWindowSplittingStrategy; +use crate::io::readers::FrameReaderError; use crate::{ io::readers::{ - file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, ReadableSqlTable, SqlError, SqlReader, - }, + file_readers::sql_reader::{SqlError, SqlReader}, FrameReader, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }, ms_data::QuadrupoleSettings, @@ -27,23 +23,13 @@ impl DIARawSpectrumReader { pub fn new( tdf_sql_reader: &SqlReader, frame_reader: FrameReader, + splitting_strategy: FrameWindowSplittingStrategy, ) -> Result { - let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; - let quadrupole_settings = - QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path())?; - let expanded_quadrupole_settings = match frame_reader.splitting_strategy - { - FrameWindowSplittingStrategy::Quadrupole(x) => { - expand_quadrupole_settings( - &window_groups, - &quadrupole_settings, - &x, - ) - }, - FrameWindowSplittingStrategy::Window(x) => { - expand_window_settings(&window_groups, &quadrupole_settings, &x) - }, - }; + let expanded_quadrupole_settings = + QuadrupoleSettingsReader::from_splitting( + tdf_sql_reader.get_path(), + splitting_strategy, + )?; let reader = Self { expanded_quadrupole_settings, frame_reader, diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index ac7e441..95b4a46 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -2,7 +2,10 @@ use core::fmt; use crate::{ domain_converters::{ConvertableDomain, Tof2MzConverter}, - io::readers::{file_readers::sql_reader::SqlReader, FrameReader}, + io::readers::{ + file_readers::sql_reader::SqlReader, + quad_settings_reader::FrameWindowSplittingStrategy, FrameReader, + }, ms_data::{AcquisitionType, Precursor, Spectrum}, utils::vec_utils::{filter_with_mask, find_sparse_local_maxima_mask}, }; @@ -94,15 +97,20 @@ impl RawSpectrumReader { tdf_sql_reader: &SqlReader, frame_reader: FrameReader, acquisition_type: AcquisitionType, + splitting_strategy: FrameWindowSplittingStrategy, ) -> Result { let raw_spectrum_reader: Box = match acquisition_type { AcquisitionType::DDAPASEF => Box::new( DDARawSpectrumReader::new(tdf_sql_reader, frame_reader)?, ), - AcquisitionType::DIAPASEF => Box::new( - DIARawSpectrumReader::new(tdf_sql_reader, frame_reader)?, - ), + AcquisitionType::DIAPASEF => { + Box::new(DIARawSpectrumReader::new( + tdf_sql_reader, + frame_reader, + splitting_strategy, + )?) + }, acquisition_type => { return Err(RawSpectrumReaderError::UnsupportedAcquisition( format!("{:?}", acquisition_type), diff --git a/src/io/readers/tdf_utils.rs b/src/io/readers/tdf_utils.rs deleted file mode 100644 index 5ea7064..0000000 --- a/src/io/readers/tdf_utils.rs +++ /dev/null @@ -1,173 +0,0 @@ -use crate::io::readers::file_readers::sql_reader::frame_groups::SqlWindowGroup; -use crate::ms_data::QuadrupoleSettings; - -type SpanStep = (usize, usize); - -/// Strategy for expanding quadrupole settings -/// -/// This enum is used to determine how to expand quadrupole settings -/// when reading in DIA data. And exporting spectra (not frames RN). -/// -/// # Variants -/// -/// For example if we have a window with scan start 50 and end 500 -/// -/// * `None` - Do not expand quadrupole settings; use the original settings -/// * `Even(usize)` - Split the quadrupole settings into `usize` evenly spaced -/// subwindows; e.g. if `usize` is 2, the window will be split into 2 subwindows -/// of equal width. -/// * `Uniform(SpanStep)` - Split the quadrupole settings into subwindows of -/// width `SpanStep.0` and step `SpanStep.1`; e.g. if `SpanStep` is (100, 50), -/// the window will be split into subwindows of width 100 and step 50 between their -/// scan start and end. -/// -#[derive(Debug, Copy, Clone)] -pub enum QuadWindowExpansionStrategy { - None, - Even(usize), - Uniform(SpanStep), -} - -fn scan_range_subsplit( - start: usize, - end: usize, - strategy: &QuadWindowExpansionStrategy, -) -> Vec<(usize, usize)> { - let out = match strategy { - QuadWindowExpansionStrategy::None => { - vec![(start, end)] - }, - QuadWindowExpansionStrategy::Even(num_splits) => { - let sub_subwindow_width = (end - start) / (num_splits + 1); - let mut out = Vec::new(); - for sub_subwindow in 0..num_splits.clone() { - let sub_subwindow_scan_start = - start + (sub_subwindow_width * sub_subwindow); - let sub_subwindow_scan_end = - start + (sub_subwindow_width * (sub_subwindow + 2)); - - out.push((sub_subwindow_scan_start, sub_subwindow_scan_end)) - } - out - }, - QuadWindowExpansionStrategy::Uniform((span, step)) => { - let mut curr_start = start.clone(); - let mut curr_end = start + span; - let mut out = Vec::new(); - while curr_end < end { - out.push((curr_start, curr_end)); - curr_start += step; - curr_end += step; - } - if curr_start < end { - out.push((curr_start, end)); - } - out - }, - }; - - debug_assert!( - out.iter().all(|(s, e)| s < e), - "Invalid scan range: {:?}", - out - ); - debug_assert!( - out.iter().all(|(s, e)| *s >= start && *e <= end), - "Invalid scan range: {:?}", - out - ); - out -} - -pub fn expand_window_settings( - window_groups: &[SqlWindowGroup], - quadrupole_settings: &[QuadrupoleSettings], - strategy: &QuadWindowExpansionStrategy, -) -> Vec { - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - let window_group_start = - group.scan_starts.iter().min().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds - let window_group_end = group.scan_ends.iter().max().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds - - for (sws, swe) in - scan_range_subsplit(window_group_start, window_group_end, &strategy) - { - let mut mz_sum = 0.0; - let mut mz_min = std::f64::MAX; - let mut mz_max = std::f64::MIN; - let mut nce_sum = 0.0; - let mut num_added = 0; - - for i in 0..group.isolation_mz.len() { - // Should I be checking here for overlap instead of full containment? - if sws <= group.scan_starts[i] && swe >= group.scan_ends[i] { - mz_sum += group.isolation_mz[i]; - mz_min = mz_min.min( - group.isolation_mz[i] - - (group.isolation_width[i] / 2.0), - ); - mz_max = mz_max.max( - group.isolation_mz[i] - + (group.isolation_width[i] / 2.0), - ); - nce_sum += group.collision_energy[i]; - num_added += 1; - } - } - - let mz_mean = mz_sum / num_added as f64; - let mean_nce = nce_sum / num_added as f64; - - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![sws], - scan_ends: vec![swe], - isolation_mz: vec![mz_mean], - isolation_width: vec![mz_min - mz_max], - collision_energy: vec![mean_nce], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } - expanded_quadrupole_settings -} - -pub fn expand_quadrupole_settings( - window_groups: &[SqlWindowGroup], - quadrupole_settings: &[QuadrupoleSettings], - strategy: &QuadWindowExpansionStrategy, -) -> Vec { - // Read the 'NUM_SUB_SUB_SPLITS' from env variables ... default to 1 - // (for now) - - let mut expanded_quadrupole_settings: Vec = vec![]; - for window_group in window_groups { - let window = window_group.window_group; - let frame = window_group.frame; - let group = &quadrupole_settings[window as usize - 1]; - for sub_window in 0..group.isolation_mz.len() { - let subwindow_scan_start = group.scan_starts[sub_window]; - let subwindow_scan_end = group.scan_ends[sub_window]; - for (sws, swe) in scan_range_subsplit( - subwindow_scan_start, - subwindow_scan_end, - &strategy, - ) { - let sub_quad_settings = QuadrupoleSettings { - index: frame, - scan_starts: vec![sws], - scan_ends: vec![swe], - isolation_mz: vec![group.isolation_mz[sub_window]], - isolation_width: vec![group.isolation_width[sub_window]], - collision_energy: vec![group.collision_energy[sub_window]], - }; - expanded_quadrupole_settings.push(sub_quad_settings) - } - } - } - expanded_quadrupole_settings -} diff --git a/src/ms_data/quadrupole.rs b/src/ms_data/quadrupole.rs index b9d2185..0d96f61 100644 --- a/src/ms_data/quadrupole.rs +++ b/src/ms_data/quadrupole.rs @@ -8,3 +8,9 @@ pub struct QuadrupoleSettings { pub isolation_width: Vec, pub collision_energy: Vec, } + +impl QuadrupoleSettings { + pub fn len(&self) -> usize { + self.isolation_mz.len() + } +} From 6bcd6328c072a72d7c6c31ac2207585862dfcc32 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 31 Jul 2024 10:21:40 +0200 Subject: [PATCH 54/69] FEATL reverted spectrum reader get all to vec of result instead of result of vec --- src/io/readers/spectrum_reader.rs | 17 ++++++++---- tests/spectrum_readers.rs | 46 ++++++++++++------------------- 2 files changed, 29 insertions(+), 34 deletions(-) diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index ef5f070..7f905cc 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -42,13 +42,20 @@ impl SpectrumReader { self.spectrum_reader.len() } - pub fn get_all(&self) -> Result, SpectrumReaderError> { - let mut spectra: Vec = (0..self.len()) + pub fn get_all(&self) -> Vec> { + let mut spectra: Vec> = (0..self + .len()) .into_par_iter() .map(|index| self.get(index)) - .collect::, _>>()?; - spectra.sort_by_key(|x| x.precursor.unwrap_or_default().index); - Ok(spectra) + .collect(); + spectra.sort_by_key(|x| match x { + Ok(spectrum) => match spectrum.precursor { + Some(precursor) => precursor.index, + None => spectrum.index, + }, + Err(_) => 0, + }); + spectra } pub fn calibrate(&mut self) { diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 8d6966e..8f6f198 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -21,12 +21,11 @@ fn minitdf_reader() { .to_str() .unwrap() .to_string(); - let spectra: Vec = SpectrumReader::build() + let spectra: Vec> = SpectrumReader::build() .with_path(file_path) .finalize() .unwrap() - .get_all() - .unwrap(); + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -63,8 +62,8 @@ fn minitdf_reader() { isolation_width: 3.0, }, ]; - for i in 0..spectra.len() { - assert_eq!(spectra[i], expected[i]); + for (i, spectrum) in spectra.into_iter().enumerate() { + assert_eq!(spectrum.unwrap(), expected[i]); } } @@ -76,12 +75,11 @@ fn tdf_reader_dda() { .to_str() .unwrap() .to_string(); - let spectra: Vec = SpectrumReader::build() + let spectra: Vec> = SpectrumReader::build() .with_path(file_path) .finalize() .unwrap() - .get_all() - .unwrap(); + .get_all(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], @@ -135,8 +133,8 @@ fn tdf_reader_dda() { isolation_width: 2.0, }, ]; - for i in 0..spectra.len() { - assert_eq!(spectra[i], expected[i]); + for (i, spectrum) in spectra.into_iter().enumerate() { + assert_eq!(spectrum.unwrap(), expected[i]); } } @@ -148,9 +146,8 @@ fn test_dia_even() { .to_str() .unwrap() .to_string(); - for i in 1..3 { - let frames: Vec = SpectrumReader::build() + let spectra = SpectrumReader::build() .with_path(&file_path) .with_config(SpectrumReaderConfig { frame_splitting_params: @@ -161,13 +158,9 @@ fn test_dia_even() { }) .finalize() .unwrap() - .get_all() - .unwrap(); - - println!(">>>>> EVEN {:?}", frames.len()); - + .get_all(); // 4 frames, 2 windows in each, i splits/window - assert_eq!(frames.len(), 4 * 2 * i); + assert_eq!(spectra.len(), 4 * 2 * i); } } @@ -179,9 +172,8 @@ fn test_dia_uniform() { .to_str() .unwrap() .to_string(); - for i in [100, 200, 300] { - let frames: Vec = SpectrumReader::build() + let spectra = SpectrumReader::build() .with_path(&file_path) .with_config(SpectrumReaderConfig { frame_splitting_params: FrameWindowSplittingStrategy::Window( @@ -191,18 +183,14 @@ fn test_dia_uniform() { }) .finalize() .unwrap() - .get_all() - .unwrap(); - - println!(">>>>> UNIFORM {} > {:?}", i, frames.len()); - for f in frames.iter() { - println!("{:?}", f.precursor); + .get_all(); + for f in spectra.iter() { + println!("{:?}", f.as_ref().unwrap().precursor); } - // Not all frames have scan windows from 0 to 709 ... so ... I need to think // on how to express this in the test // assert_eq!(frames.len(), 4 * ((709 / i) + 1)); - assert!(frames.len() > (709 / i)); - assert!(frames.len() < 3 * ((709 / i) + 1)); + assert!(spectra.len() > (709 / i)); + assert!(spectra.len() < 3 * ((709 / i) + 1)); } } From 23f43cb7b6d8c229d2ba526be29d801946d49c98 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 31 Jul 2024 10:22:07 +0200 Subject: [PATCH 55/69] DOCS: readme update --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 1283485..9b01172 100644 --- a/README.md +++ b/README.md @@ -45,10 +45,9 @@ Two file formats are supported: The [timsrust_pyo3](https://github.com/jspaezp/timsrust_pyo3) package is an example of how the performance of TimsRust can be utilized in Python ## Planned changes for future versions - +TODO * Improve docs * Improve tests * Pase CompressionType1 -* Error propagation for SpectrumReader(Trait).get * Make Path of TimsTOF data into special type * ... From 266fa86b206a2998e61e6c0acce23953864dbc9e Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 1 Aug 2024 11:22:25 +0200 Subject: [PATCH 56/69] FEAT: provided actual frames to FrameReader, rather than SqlFrames --- src/io/readers/frame_reader.rs | 88 ++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 30 deletions(-) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index af9b713..83b6efd 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -26,10 +26,9 @@ use super::{ pub struct FrameReader { path: PathBuf, tdf_bin_reader: TdfBlobReader, - sql_frames: Vec, + frames: Vec, acquisition: AcquisitionType, - window_groups: Vec, - quadrupole_settings: Vec>, + offsets: Vec, } impl FrameReader { @@ -50,6 +49,7 @@ impl FrameReader { } else { AcquisitionType::Unknown }; + // TODO should be refactored out to quadrupole reader let mut window_groups = vec![0; sql_frames.len()]; let quadrupole_settings; if acquisition == AcquisitionType::DIAPASEF { @@ -64,36 +64,48 @@ impl FrameReader { } else { quadrupole_settings = vec![]; } + let quadrupole_settings = quadrupole_settings + .into_iter() + .map(|x| Arc::new(x)) + .collect(); + let frames = (0..sql_frames.len()) + .into_par_iter() + .map(|index| { + get_frame_without_data( + index, + &sql_frames, + acquisition, + &window_groups, + &quadrupole_settings, + ) + }) + .collect(); + let offsets = sql_frames.iter().map(|x| x.binary_offset).collect(); let reader = Self { path: path.as_ref().to_path_buf(), tdf_bin_reader, - sql_frames, + frames, acquisition, - window_groups, - quadrupole_settings: quadrupole_settings - .into_iter() - .map(|x| Arc::new(x)) - .collect(), + offsets, }; Ok(reader) } - pub fn parallel_filter<'a, F: Fn(&SqlFrame) -> bool + Sync + Send + 'a>( + pub fn parallel_filter<'a, F: Fn(&Frame) -> bool + Sync + Send + 'a>( &'a self, predicate: F, ) -> impl ParallelIterator> + 'a { (0..self.len()) .into_par_iter() - .filter(move |x| predicate(&self.sql_frames[*x])) + .filter(move |x| predicate(&self.frames[*x])) .map(move |x| self.get(x)) } pub fn get(&self, index: usize) -> Result { - let mut frame: Frame = Frame::default(); - let sql_frame = &self.sql_frames[index]; - frame.index = sql_frame.id; - let blob = self.tdf_bin_reader.get(sql_frame.binary_offset)?; + let mut frame = self.frames[index].clone(); + let offset = self.offsets[index]; + let blob = self.tdf_bin_reader.get(offset)?; let scan_count: usize = blob.get(0).ok_or(FrameReaderError::CorruptFrame)? as usize; let peak_count: usize = (blob.len() - scan_count) / 2; @@ -105,18 +117,6 @@ impl FrameReader { &blob, &frame.scan_offsets, )?; - frame.ms_level = MSLevel::read_from_msms_type(sql_frame.msms_type); - frame.rt = sql_frame.rt; - frame.acquisition_type = self.acquisition; - frame.intensity_correction_factor = 1.0 / sql_frame.accumulation_time; - if (self.acquisition == AcquisitionType::DIAPASEF) - & (frame.ms_level == MSLevel::MS2) - { - let window_group = self.window_groups[index]; - frame.window_group = window_group; - frame.quadrupole_settings = - self.quadrupole_settings[window_group as usize - 1].clone(); - } Ok(frame) } @@ -125,11 +125,13 @@ impl FrameReader { } pub fn get_all_ms1(&self) -> Vec> { - self.parallel_filter(|x| x.msms_type == 0).collect() + self.parallel_filter(|x| x.ms_level == MSLevel::MS1) + .collect() } pub fn get_all_ms2(&self) -> Vec> { - self.parallel_filter(|x| x.msms_type != 0).collect() + self.parallel_filter(|x| x.ms_level == MSLevel::MS2) + .collect() } pub fn get_acquisition(&self) -> AcquisitionType { @@ -137,7 +139,7 @@ impl FrameReader { } pub fn len(&self) -> usize { - self.sql_frames.len() + self.frames.len() } pub fn get_path(&self) -> PathBuf { @@ -199,6 +201,32 @@ fn read_tof_indices( Ok(tof_indices) } +fn get_frame_without_data( + index: usize, + sql_frames: &Vec, + acquisition: AcquisitionType, + window_groups: &Vec, + quadrupole_settings: &Vec>, +) -> Frame { + let mut frame: Frame = Frame::default(); + let sql_frame = &sql_frames[index]; + frame.index = sql_frame.id; + frame.ms_level = MSLevel::read_from_msms_type(sql_frame.msms_type); + frame.rt = sql_frame.rt; + frame.acquisition_type = acquisition; + frame.intensity_correction_factor = 1.0 / sql_frame.accumulation_time; + if (acquisition == AcquisitionType::DIAPASEF) + & (frame.ms_level == MSLevel::MS2) + { + // TODO should be refactored out to quadrupole reader + let window_group = window_groups[index]; + frame.window_group = window_group; + frame.quadrupole_settings = + quadrupole_settings[window_group as usize - 1].clone(); + } + frame +} + #[derive(Debug, thiserror::Error)] pub enum FrameReaderError { #[error("{0}")] From 4b8d04b501006361d9fda803ae4a3ba666a3cc48 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 1 Aug 2024 11:27:49 +0200 Subject: [PATCH 57/69] CHORE: removed unused deps --- Cargo.lock | 1 - Cargo.toml | 1 - 2 files changed, 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 27b720b..5119052 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1124,7 +1124,6 @@ name = "timsrust" version = "0.4.0" dependencies = [ "bytemuck", - "byteorder", "criterion", "linreg", "memmap2", diff --git a/Cargo.toml b/Cargo.toml index a758bd6..eec9e16 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,6 @@ keywords = ["MS", "LC-TIMS-TOF", "PASEF"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -byteorder = "1.4.3" zstd = "0.13.2" rusqlite = { version = "0.31.0", features = ["bundled"] } rayon = "1.10.0" From a673946e24c05735fd49192b5804642e3f66b2d2 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 1 Aug 2024 06:51:26 -0700 Subject: [PATCH 58/69] (feature) reparametrized splitting to 1/k0 terms --- src/io/readers/precursor_reader/tdf/dia.rs | 3 +- src/io/readers/quad_settings_reader.rs | 74 +++++++++++++------ src/io/readers/spectrum_reader/tdf.rs | 1 + src/io/readers/spectrum_reader/tdf/dia.rs | 5 +- .../spectrum_reader/tdf/raw_spectra.rs | 4 +- tests/spectrum_readers.rs | 17 +++-- 6 files changed, 72 insertions(+), 32 deletions(-) diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index 9eacaa7..c0986de 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -34,8 +34,9 @@ impl DIATDFPrecursorReader { let im_converter: Scan2ImConverter = metadata.im_converter; let expanded_quadrupole_settings = QuadrupoleSettingsReader::from_splitting( - tdf_sql_reader.get_path(), + &tdf_sql_reader, splitting_strategy, + Some(&im_converter), )?; let reader = Self { expanded_quadrupole_settings, diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index 6f5f398..a18bce3 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -1,6 +1,10 @@ use std::path::Path; -use crate::{ms_data::QuadrupoleSettings, utils::vec_utils::argsort}; +use crate::{ + domain_converters::{ConvertableDomain, Scan2ImConverter}, + ms_data::QuadrupoleSettings, + utils::vec_utils::argsort, +}; use super::file_readers::sql_reader::{ frame_groups::SqlWindowGroup, quad_settings::SqlQuadSettings, @@ -48,11 +52,11 @@ impl QuadrupoleSettingsReader { } pub fn from_splitting( - path: impl AsRef, + tdf_sql_reader: &SqlReader, splitting_strat: FrameWindowSplittingStrategy, + scan_converter: Option<&Scan2ImConverter>, ) -> Result, QuadrupoleSettingsReaderError> { - let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(&sql_path)?; + let sql_path = tdf_sql_reader.get_path(); let quadrupole_settings = Self::from_sql_settings(&tdf_sql_reader)?; let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; let expanded_quadrupole_settings = match splitting_strat { @@ -61,11 +65,15 @@ impl QuadrupoleSettingsReader { &window_groups, &quadrupole_settings, &x, + scan_converter, ) }, - FrameWindowSplittingStrategy::Window(x) => { - expand_window_settings(&window_groups, &quadrupole_settings, &x) - }, + FrameWindowSplittingStrategy::Window(x) => expand_window_settings( + &window_groups, + &quadrupole_settings, + &x, + scan_converter, + ), }; Ok(expanded_quadrupole_settings) } @@ -120,7 +128,7 @@ pub enum QuadrupoleSettingsReaderError { SqlError(#[from] SqlError), } -type SpanStep = (usize, usize); +type SpanStep = (f64, f64); /// Strategy for expanding quadrupole settings /// @@ -136,9 +144,9 @@ type SpanStep = (usize, usize); /// subwindows; e.g. if `usize` is 2, the window will be split into 2 subwindows /// of equal width. /// * `Uniform(SpanStep)` - Split the quadrupole settings into subwindows of -/// width `SpanStep.0` and step `SpanStep.1`; e.g. if `SpanStep` is (100, 50), -/// the window will be split into subwindows of width 100 and step 50 between their -/// scan start and end. +/// width `SpanStep.0` and step `SpanStep.1`; e.g. if `SpanStep` is (0.05, 0.02), +/// the window will be split into subwindows of width 0.05 and step 0.02 between their +/// in the mobility dimension. /// #[derive(Debug, Copy, Clone)] pub enum QuadWindowExpansionStrategy { @@ -163,8 +171,9 @@ fn scan_range_subsplit( start: usize, end: usize, strategy: &QuadWindowExpansionStrategy, + converter: Option<&Scan2ImConverter>, ) -> Vec<(usize, usize)> { - let out = match strategy { + let out: Vec<(usize, usize)> = match strategy { QuadWindowExpansionStrategy::None => { vec![(start, end)] }, @@ -182,16 +191,29 @@ fn scan_range_subsplit( out }, QuadWindowExpansionStrategy::Uniform((span, step)) => { - let mut curr_start = start.clone(); - let mut curr_end = start + span; + let converter = converter + .as_ref() + .expect("Uniform expansion requires a scan to IM converter"); + + // Since scan start < scan end but low scans are high IMs, we need to + // subtract instead of adding. + let mut curr_start_offset = start.clone(); + let mut curr_start_im = converter.convert(curr_start_offset as f64); + + let mut curr_end_im = curr_start_im - span; + let mut curr_end_offset = converter.invert(curr_end_im) as usize; let mut out = Vec::new(); - while curr_end < end { - out.push((curr_start, curr_end)); - curr_start += step; - curr_end += step; + while curr_end_offset < end { + out.push((curr_start_offset, curr_end_offset)); + + curr_start_im = curr_start_im - step; + curr_start_offset = converter.invert(curr_start_im) as usize; + + curr_end_im = curr_start_im - span; + curr_end_offset = converter.invert(curr_end_im) as usize; } - if curr_start < end { - out.push((curr_start, end)); + if curr_start_offset < end { + out.push((curr_start_offset, end)); } out }, @@ -214,6 +236,7 @@ fn expand_window_settings( window_groups: &[SqlWindowGroup], quadrupole_settings: &[QuadrupoleSettings], strategy: &QuadWindowExpansionStrategy, + converter: Option<&Scan2ImConverter>, ) -> Vec { let mut expanded_quadrupole_settings: Vec = vec![]; for window_group in window_groups { @@ -223,9 +246,12 @@ fn expand_window_settings( let window_group_start = group.scan_starts.iter().min().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds let window_group_end = group.scan_ends.iter().max().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds - for (sws, swe) in - scan_range_subsplit(window_group_start, window_group_end, &strategy) - { + for (sws, swe) in scan_range_subsplit( + window_group_start, + window_group_end, + &strategy, + converter, + ) { let mut mz_min = std::f64::MAX; let mut mz_max = std::f64::MIN; let mut nce_sum = 0.0; @@ -262,6 +288,7 @@ fn expand_quadrupole_settings( window_groups: &[SqlWindowGroup], quadrupole_settings: &[QuadrupoleSettings], strategy: &QuadWindowExpansionStrategy, + converter: Option<&Scan2ImConverter>, ) -> Vec { let mut expanded_quadrupole_settings: Vec = vec![]; for window_group in window_groups { @@ -275,6 +302,7 @@ fn expand_quadrupole_settings( subwindow_scan_start, subwindow_scan_end, &strategy, + converter, ) { let sub_quad_settings = QuadrupoleSettings { index: frame, diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index 7f29748..f3d0d8c 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -50,6 +50,7 @@ impl TDFSpectrumReader { frame_reader, acquisition_type, config.frame_splitting_params, + Some(&metadata.im_converter), )?; let reader = Self { path: path_name.as_ref().to_path_buf(), diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 3313acf..1cb1e37 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,3 +1,4 @@ +use crate::domain_converters::Scan2ImConverter; use crate::io::readers::quad_settings_reader::FrameWindowSplittingStrategy; use crate::io::readers::FrameReaderError; use crate::{ @@ -24,11 +25,13 @@ impl DIARawSpectrumReader { tdf_sql_reader: &SqlReader, frame_reader: FrameReader, splitting_strategy: FrameWindowSplittingStrategy, + im_converter: Option<&Scan2ImConverter>, ) -> Result { let expanded_quadrupole_settings = QuadrupoleSettingsReader::from_splitting( - tdf_sql_reader.get_path(), + &tdf_sql_reader, splitting_strategy, + im_converter, )?; let reader = Self { expanded_quadrupole_settings, diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index 95b4a46..40edfa1 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -1,7 +1,7 @@ use core::fmt; use crate::{ - domain_converters::{ConvertableDomain, Tof2MzConverter}, + domain_converters::{ConvertableDomain, Scan2ImConverter, Tof2MzConverter}, io::readers::{ file_readers::sql_reader::SqlReader, quad_settings_reader::FrameWindowSplittingStrategy, FrameReader, @@ -98,6 +98,7 @@ impl RawSpectrumReader { frame_reader: FrameReader, acquisition_type: AcquisitionType, splitting_strategy: FrameWindowSplittingStrategy, + converter: Option<&Scan2ImConverter>, ) -> Result { let raw_spectrum_reader: Box = match acquisition_type { @@ -109,6 +110,7 @@ impl RawSpectrumReader { tdf_sql_reader, frame_reader, splitting_strategy, + converter, )?) }, acquisition_type => { diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 8f6f198..2c77759 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -172,7 +172,7 @@ fn test_dia_uniform() { .to_str() .unwrap() .to_string(); - for i in [100, 200, 300] { + for i in [0.02, 0.05, 0.1] { let spectra = SpectrumReader::build() .with_path(&file_path) .with_config(SpectrumReaderConfig { @@ -185,12 +185,17 @@ fn test_dia_uniform() { .unwrap() .get_all(); for f in spectra.iter() { - println!("{:?}", f.as_ref().unwrap().precursor); + println!("i={} -> {:?}", i, f.as_ref().unwrap().precursor); } - // Not all frames have scan windows from 0 to 709 ... so ... I need to think + // Not all frames have scan windows from 0.5 to 1.5 ... so ... I need to think // on how to express this in the test - // assert_eq!(frames.len(), 4 * ((709 / i) + 1)); - assert!(spectra.len() > (709 / i)); - assert!(spectra.len() < 3 * ((709 / i) + 1)); + assert!(spectra.len() >= (1.0 / i) as usize); + + // 4 frames, each split in 1.0/i chunks max, 1.0 is the IMS width of a frame + // but not all frames span windows in that range + assert!(spectra.len() < 4 * (1.0 / i) as usize,); + + // TODO make a more accurate test where we measure the differences in ion + // mobilities and see if they are within the expected range } } From 345623a3ff27a74198a486539722318825ce8d85 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 26 Aug 2024 11:03:01 +0200 Subject: [PATCH 59/69] FEAT: Provided option for slim versions that only use tdf or minitdf --- Cargo.toml | 9 +- benches/speed_performance.rs | 26 +- src/errors.rs | 10 +- src/io/readers.rs | 6 + src/io/readers/file_readers.rs | 2 + src/io/readers/precursor_reader.rs | 11 + src/io/readers/spectrum_reader.rs | 19 +- src/io/readers/spectrum_reader/minitdf.rs | 3 - src/lib.rs | 23 +- tests/frame_readers.rs | 286 +++++++++++----------- tests/spectrum_readers.rs | 15 +- 11 files changed, 242 insertions(+), 168 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index eec9e16..f69025d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,13 +15,18 @@ keywords = ["MS", "LC-TIMS-TOF", "PASEF"] [dependencies] zstd = "0.13.2" -rusqlite = { version = "0.31.0", features = ["bundled"] } rayon = "1.10.0" linreg = "0.2.0" bytemuck = "1.13.1" -parquet = "42.0.0" thiserror = "1.0.0" memmap2 = "0.9.3" +rusqlite = { version = "0.31.0", features = ["bundled"], optional = true} +parquet = { version = "42.0.0", optional = true } + +[features] +tdf = ["rusqlite"] +minitdf = ["parquet"] +default = ["tdf", "minitdf"] [dev-dependencies] criterion = { version = "0.5.1", features = ["html_reports"] } diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index 3beeeac..b0778fb 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -1,8 +1,8 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rayon::iter::ParallelIterator; -use timsrust::io::readers::{ - FrameReader, SpectrumReader, SpectrumReaderConfig, -}; +#[cfg(feature = "tdf")] +use timsrust::readers::FrameReader; +use timsrust::readers::{SpectrumReader, SpectrumReaderConfig}; const DDA_TEST: &str = "/mnt/c/Users/Sander.Willems/Documents/data/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/"; @@ -11,14 +11,17 @@ const DIA_TEST: &str = const SYP_TEST: &str = "/mnt/c/Users/Sander.Willems/Documents/data/20230505_TIMS05_PaSk_SA_HeLa_6min_syP_5scans_30Da_S1-D4_1_2330.d/"; +#[cfg(feature = "tdf")] fn read_all_frames(frame_reader: &FrameReader) { frame_reader.get_all(); } +#[cfg(feature = "tdf")] fn read_all_ms1_frames(frame_reader: &FrameReader) { frame_reader.get_all_ms1(); } +#[cfg(feature = "tdf")] fn read_all_ms2_frames(frame_reader: &FrameReader) { frame_reader.get_all_ms2(); } @@ -27,13 +30,13 @@ fn read_all_spectra(spectrum_reader: &SpectrumReader) { spectrum_reader.get_all(); } -fn criterion_benchmark_dda(c: &mut Criterion) { +#[cfg(feature = "tdf")] +fn criterion_benchmark_dda_frames(c: &mut Criterion) { // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); let mut group = c.benchmark_group("sample-size-example"); group.significance_level(0.001).sample_size(10); let d_folder_name: &str = DDA_TEST; let frame_reader = FrameReader::new(d_folder_name).unwrap(); - let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DDA read_all_frames 6m", |b| { b.iter(|| read_all_frames(black_box(&frame_reader))) }); @@ -43,12 +46,22 @@ fn criterion_benchmark_dda(c: &mut Criterion) { group.bench_function("DDA read_all_ms2_frames 6m", |b| { b.iter(|| read_all_ms2_frames(black_box(&frame_reader))) }); + group.finish(); +} + +fn criterion_benchmark_dda_spectra(c: &mut Criterion) { + // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); + let mut group = c.benchmark_group("sample-size-example"); + group.significance_level(0.001).sample_size(10); + let d_folder_name: &str = DDA_TEST; + let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap(); group.bench_function("DDA read_all_spectra 6m", |b| { b.iter(|| read_all_spectra(black_box(&spectrum_reader))) }); group.finish(); } +#[cfg(feature = "tdf")] fn criterion_benchmark_dia(c: &mut Criterion) { // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); let mut group = c.benchmark_group("sample-size-example"); @@ -68,6 +81,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) { group.finish(); } +#[cfg(feature = "tdf")] fn criterion_benchmark_syp(c: &mut Criterion) { // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); let mut group = c.benchmark_group("sample-size-example"); @@ -89,7 +103,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) { criterion_group!( benches, - criterion_benchmark_dda, + criterion_benchmark_dda_spectra, // criterion_benchmark_dia, // criterion_benchmark_syp ); diff --git a/src/errors.rs b/src/errors.rs index f8c713b..7758b0b 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,19 +1,23 @@ +#[cfg(feature = "tdf")] use crate::io::readers::{ - FrameReaderError, MetadataReaderError, PrecursorReaderError, - QuadrupoleSettingsReaderError, SpectrumReaderError, + FrameReaderError, MetadataReaderError, QuadrupoleSettingsReaderError, }; +use crate::io::readers::{PrecursorReaderError, SpectrumReaderError}; /// An error that is produced by timsrust (uses [thiserror]). #[derive(thiserror::Error, Debug)] -pub enum Error { +pub enum TimsRustError { + #[cfg(feature = "tdf")] #[error("{0}")] FrameReaderError(#[from] FrameReaderError), #[error("{0}")] SpectrumReaderError(#[from] SpectrumReaderError), + #[cfg(feature = "tdf")] #[error("{0}")] MetadataReaderError(#[from] MetadataReaderError), #[error("{0}")] PrecursorReaderError(#[from] PrecursorReaderError), + #[cfg(feature = "tdf")] #[error("{0}")] QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), } diff --git a/src/io/readers.rs b/src/io/readers.rs index 03d5248..c13f808 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -1,12 +1,18 @@ pub(crate) mod file_readers; +#[cfg(feature = "tdf")] mod frame_reader; +#[cfg(feature = "tdf")] mod metadata_reader; mod precursor_reader; +#[cfg(feature = "tdf")] mod quad_settings_reader; mod spectrum_reader; +#[cfg(feature = "tdf")] pub use frame_reader::*; +#[cfg(feature = "tdf")] pub use metadata_reader::*; pub use precursor_reader::*; +#[cfg(feature = "tdf")] pub use quad_settings_reader::*; pub use spectrum_reader::*; diff --git a/src/io/readers/file_readers.rs b/src/io/readers/file_readers.rs index 38aa955..9d6b37e 100644 --- a/src/io/readers/file_readers.rs +++ b/src/io/readers/file_readers.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "minitdf")] pub mod parquet_reader; +#[cfg(feature = "tdf")] pub mod sql_reader; pub mod tdf_blob_reader; diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index e4750c5..05bb1e7 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -1,14 +1,19 @@ +#[cfg(feature = "minitdf")] mod minitdf; +#[cfg(feature = "tdf")] mod tdf; use core::fmt; use std::path::{Path, PathBuf}; +#[cfg(feature = "minitdf")] use minitdf::{MiniTDFPrecursorReader, MiniTDFPrecursorReaderError}; +#[cfg(feature = "tdf")] use tdf::{TDFPrecursorReader, TDFPrecursorReaderError}; use crate::ms_data::Precursor; +#[cfg(feature = "tdf")] use super::quad_settings_reader::FrameWindowSplittingStrategy; pub struct PrecursorReader { @@ -42,6 +47,7 @@ impl PrecursorReader { #[derive(Debug, Default, Clone)] pub struct PrecursorReaderBuilder { path: PathBuf, + #[cfg(feature = "tdf")] config: FrameWindowSplittingStrategy, } @@ -53,6 +59,7 @@ impl PrecursorReaderBuilder { } } + #[cfg(feature = "tdf")] pub fn with_config(&self, config: FrameWindowSplittingStrategy) -> Self { Self { config: config, @@ -63,9 +70,11 @@ impl PrecursorReaderBuilder { pub fn finalize(&self) -> Result { let precursor_reader: Box = match self.path.extension().and_then(|e| e.to_str()) { + #[cfg(feature = "minitdf")] Some("parquet") => { Box::new(MiniTDFPrecursorReader::new(self.path.clone())?) }, + #[cfg(feature = "tdf")] Some("tdf") => Box::new(TDFPrecursorReader::new( self.path.clone(), self.config.clone(), @@ -88,8 +97,10 @@ trait PrecursorReaderTrait: Sync { #[derive(Debug, thiserror::Error)] pub enum PrecursorReaderError { + #[cfg(feature = "minitdf")] #[error("{0}")] MiniTDFPrecursorReaderError(#[from] MiniTDFPrecursorReaderError), + #[cfg(feature = "tdf")] #[error("{0}")] TDFPrecursorReaderError(#[from] TDFPrecursorReaderError), #[error("File {0} not valid")] diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 7f905cc..56fa327 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -1,14 +1,20 @@ +#[cfg(feature = "minitdf")] mod minitdf; +#[cfg(feature = "tdf")] mod tdf; use core::fmt; + +#[cfg(feature = "minitdf")] use minitdf::{MiniTDFSpectrumReader, MiniTDFSpectrumReaderError}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::path::{Path, PathBuf}; +#[cfg(feature = "tdf")] use tdf::{TDFSpectrumReader, TDFSpectrumReaderError}; use crate::ms_data::Spectrum; +#[cfg(feature = "tdf")] use super::FrameWindowSplittingStrategy; pub struct SpectrumReader { @@ -87,9 +93,11 @@ impl SpectrumReaderBuilder { pub fn finalize(&self) -> Result { let spectrum_reader: Box = match self.path.extension().and_then(|e| e.to_str()) { + #[cfg(feature = "minitdf")] Some("ms2") => { Box::new(MiniTDFSpectrumReader::new(self.path.clone())?) }, + #[cfg(feature = "tdf")] Some("d") => Box::new(TDFSpectrumReader::new( self.path.clone(), self.config.clone(), @@ -117,8 +125,10 @@ trait SpectrumReaderTrait: Sync { #[derive(Debug, thiserror::Error)] pub enum SpectrumReaderError { + #[cfg(feature = "minitdf")] #[error("{0}")] MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError), + #[cfg(feature = "tdf")] #[error("{0}")] TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), #[error("File {0} not valid")] @@ -127,10 +137,10 @@ pub enum SpectrumReaderError { #[derive(Debug, Clone)] pub struct SpectrumProcessingParams { - smoothing_window: u32, - centroiding_window: u32, - calibration_tolerance: f64, - calibrate: bool, + pub smoothing_window: u32, + pub centroiding_window: u32, + pub calibration_tolerance: f64, + pub calibrate: bool, } impl Default for SpectrumProcessingParams { @@ -147,5 +157,6 @@ impl Default for SpectrumProcessingParams { #[derive(Debug, Default, Clone)] pub struct SpectrumReaderConfig { pub spectrum_processing_params: SpectrumProcessingParams, + #[cfg(feature = "tdf")] pub frame_splitting_params: FrameWindowSplittingStrategy, } diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index e5cc23c..e1b5da9 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -7,7 +7,6 @@ use crate::{ precursors::ParquetPrecursor, ParquetError, ReadableParquetTable, }, - sql_reader::SqlError, tdf_blob_reader::{ IndexedTdfBlobReader, IndexedTdfBlobReaderError, }, @@ -127,8 +126,6 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { #[derive(Debug, thiserror::Error)] pub enum MiniTDFSpectrumReaderError { - #[error("{0}")] - SqlError(#[from] SqlError), #[error("{0}")] PrecursorReaderError(#[from] PrecursorReaderError), #[error("{0}")] diff --git a/src/lib.rs b/src/lib.rs index b519699..61bdd28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,10 +21,23 @@ //! * *.ms2spectrum.bin //! * *.ms2spectrum.parquet -pub mod domain_converters; -mod errors; -pub mod io; -pub mod ms_data; -mod utils; +pub(crate) mod domain_converters; +pub(crate) mod errors; +pub(crate) mod io; +pub(crate) mod ms_data; +pub(crate) mod utils; +pub mod converters { + //! Allows conversions between domains (e.g. Time of Flight and m/z) + pub use crate::domain_converters::*; +} +pub mod readers { + //! Readers for all data from Bruker compatible files. + pub use crate::io::readers::*; +} +pub mod writers { + //! Writers to generic file formats. + pub use crate::io::writers::*; +} pub use crate::errors::*; +pub use crate::ms_data::*; diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index b6fa001..5324360 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,151 +1,157 @@ -use std::{path::Path, sync::Arc}; -use timsrust::{ - io::readers::FrameReader, - ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, -}; +#[cfg(feature = "tdf")] +mod tests { + use std::{path::Path, sync::Arc}; + use timsrust::{ + readers::FrameReader, AcquisitionType, Frame, MSLevel, + QuadrupoleSettings, + }; -fn get_local_directory() -> &'static Path { - Path::new(std::file!()) - .parent() - .expect("Failed to get parent directory") -} + fn get_local_directory() -> &'static Path { + Path::new(std::file!()) + .parent() + .expect("Failed to get parent directory") + } -#[test] -fn tdf_reader_frames1() { - let file_name = "test.d"; - let file_path = get_local_directory() - .join(file_name) - .to_str() - .unwrap() - .to_string(); - let frames: Vec = FrameReader::new(&file_path) - .unwrap() - .get_all_ms1() - .into_iter() - .map(|x| x.unwrap()) - .collect(); - let expected: Vec = vec![ - Frame { - scan_offsets: vec![0, 1, 3, 6, 10], - tof_indices: (0..10).collect(), - intensities: (0..10).map(|x| (x + 1) * 2).collect(), - index: 1, - rt: 0.1, - ms_level: MSLevel::MS1, - quadrupole_settings: Arc::new(QuadrupoleSettings::default()), - acquisition_type: AcquisitionType::DDAPASEF, - intensity_correction_factor: 1.0 / 100.0, - window_group: 0, - }, - // Frame::default(), - Frame { - scan_offsets: vec![0, 9, 19, 30, 42], - tof_indices: (36..78).collect(), - intensities: (36..78).map(|x| (x + 1) * 2).collect(), - index: 3, - rt: 0.3, - ms_level: MSLevel::MS1, - quadrupole_settings: Arc::new(QuadrupoleSettings::default()), - acquisition_type: AcquisitionType::DDAPASEF, - intensity_correction_factor: 1.0 / 100.0, - window_group: 0, - }, - // Frame::default(), - ]; - for i in 0..expected.len() { - assert_eq!(&frames[i], &expected[i]) + #[test] + fn tdf_reader_frames1() { + let file_name = "test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms1() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + let expected: Vec = vec![ + Frame { + scan_offsets: vec![0, 1, 3, 6, 10], + tof_indices: (0..10).collect(), + intensities: (0..10).map(|x| (x + 1) * 2).collect(), + index: 1, + rt: 0.1, + ms_level: MSLevel::MS1, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, + intensity_correction_factor: 1.0 / 100.0, + window_group: 0, + }, + // Frame::default(), + Frame { + scan_offsets: vec![0, 9, 19, 30, 42], + tof_indices: (36..78).collect(), + intensities: (36..78).map(|x| (x + 1) * 2).collect(), + index: 3, + rt: 0.3, + ms_level: MSLevel::MS1, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, + intensity_correction_factor: 1.0 / 100.0, + window_group: 0, + }, + // Frame::default(), + ]; + for i in 0..expected.len() { + assert_eq!(&frames[i], &expected[i]) + } } -} -#[test] -fn tdf_reader_frames2() { - let file_name = "test.d"; - let file_path = get_local_directory() - .join(file_name) - .to_str() - .unwrap() - .to_string(); - let frames: Vec = FrameReader::new(&file_path) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); - let expected: Vec = vec![ - // Frame::default(), - Frame { - scan_offsets: vec![0, 5, 11, 18, 26], - tof_indices: (10..36).collect(), - intensities: (10..36).map(|x| (x + 1) * 2).collect(), - index: 2, - rt: 0.2, - ms_level: MSLevel::MS2, - quadrupole_settings: Arc::new(QuadrupoleSettings::default()), - acquisition_type: AcquisitionType::DDAPASEF, - intensity_correction_factor: 1.0 / 100.0, - window_group: 0, - }, - // Frame::default(), - Frame { - scan_offsets: vec![0, 13, 27, 42, 58], - tof_indices: (78..136).collect(), - intensities: (78..136).map(|x| (x + 1) * 2).collect(), - index: 4, - rt: 0.4, - ms_level: MSLevel::MS2, - quadrupole_settings: Arc::new(QuadrupoleSettings::default()), - acquisition_type: AcquisitionType::DDAPASEF, - intensity_correction_factor: 1.0 / 100.0, - window_group: 0, - }, - ]; - for i in 0..expected.len() { - assert_eq!(&frames[i], &expected[i]) + #[test] + fn tdf_reader_frames2() { + let file_name = "test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); + let expected: Vec = vec![ + // Frame::default(), + Frame { + scan_offsets: vec![0, 5, 11, 18, 26], + tof_indices: (10..36).collect(), + intensities: (10..36).map(|x| (x + 1) * 2).collect(), + index: 2, + rt: 0.2, + ms_level: MSLevel::MS2, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, + intensity_correction_factor: 1.0 / 100.0, + window_group: 0, + }, + // Frame::default(), + Frame { + scan_offsets: vec![0, 13, 27, 42, 58], + tof_indices: (78..136).collect(), + intensities: (78..136).map(|x| (x + 1) * 2).collect(), + index: 4, + rt: 0.4, + ms_level: MSLevel::MS2, + quadrupole_settings: Arc::new(QuadrupoleSettings::default()), + acquisition_type: AcquisitionType::DDAPASEF, + intensity_correction_factor: 1.0 / 100.0, + window_group: 0, + }, + ]; + for i in 0..expected.len() { + assert_eq!(&frames[i], &expected[i]) + } } -} -#[test] -fn tdf_reader_frames_dia() { - let file_name = "dia_test.d"; - let file_path = get_local_directory() - .join(file_name) - .to_str() - .unwrap() - .to_string(); - let frames: Vec = FrameReader::new(&file_path) - .unwrap() - .get_all_ms2() - .into_iter() - .map(|x| x.unwrap()) - .collect(); + #[test] + fn tdf_reader_frames_dia() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + let frames: Vec = FrameReader::new(&file_path) + .unwrap() + .get_all_ms2() + .into_iter() + .map(|x| x.unwrap()) + .collect(); - assert_eq!(frames.len(), 4); - for i in 0..frames.len() { - assert_eq!(frames[i].scan_offsets.len(), 710); - assert_eq!(frames[i].scan_offsets[0], 0); - assert_eq!( - frames[i].scan_offsets.last().unwrap(), - &frames[i].intensities.len() - ); - assert_eq!(frames[i].tof_indices.len(), frames[i].intensities.len()); - } - assert_eq!(&frames[0].tof_indices[0], &251695u32); - assert_eq!(&frames[0].intensities[0], &503392u32); - assert_eq!(&frames[0].tof_indices.len(), &754376); - assert_eq!(&frames[0].intensities.len(), &754376); + assert_eq!(frames.len(), 4); + for i in 0..frames.len() { + assert_eq!(frames[i].scan_offsets.len(), 710); + assert_eq!(frames[i].scan_offsets[0], 0); + assert_eq!( + frames[i].scan_offsets.last().unwrap(), + &frames[i].intensities.len() + ); + assert_eq!( + frames[i].tof_indices.len(), + frames[i].intensities.len() + ); + } + assert_eq!(&frames[0].tof_indices[0], &251695u32); + assert_eq!(&frames[0].intensities[0], &503392u32); + assert_eq!(&frames[0].tof_indices.len(), &754376); + assert_eq!(&frames[0].intensities.len(), &754376); - assert_eq!(&frames[1].tof_indices[0], &1006071u32); - assert_eq!(&frames[1].intensities[0], &2012144u32); - assert_eq!(&frames[1].tof_indices.len(), &1257057); - assert_eq!(&frames[1].intensities.len(), &1257057); + assert_eq!(&frames[1].tof_indices[0], &1006071u32); + assert_eq!(&frames[1].intensities[0], &2012144u32); + assert_eq!(&frames[1].tof_indices.len(), &1257057); + assert_eq!(&frames[1].intensities.len(), &1257057); - assert_eq!(&frames[2].tof_indices[0], &4022866u32); - assert_eq!(&frames[2].intensities[0], &8045734u32); - assert_eq!(&frames[2].tof_indices.len(), &2262419); - assert_eq!(&frames[2].intensities.len(), &2262419); + assert_eq!(&frames[2].tof_indices[0], &4022866u32); + assert_eq!(&frames[2].intensities[0], &8045734u32); + assert_eq!(&frames[2].tof_indices.len(), &2262419); + assert_eq!(&frames[2].intensities.len(), &2262419); - assert_eq!(&frames[3].tof_indices[0], &6285285u32); - assert_eq!(&frames[3].intensities[0], &12570572u32); - assert_eq!(&frames[3].tof_indices.len(), &2765100); - assert_eq!(&frames[3].intensities.len(), &2765100); + assert_eq!(&frames[3].tof_indices[0], &6285285u32); + assert_eq!(&frames[3].intensities[0], &12570572u32); + assert_eq!(&frames[3].tof_indices.len(), &2765100); + assert_eq!(&frames[3].intensities.len(), &2765100); + } } diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 8f6f198..9546105 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,10 +1,11 @@ use std::path::Path; +#[cfg(feature = "tdf")] +use timsrust::readers::{ + FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, +}; use timsrust::{ - io::readers::{ - FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, - SpectrumProcessingParams, SpectrumReader, SpectrumReaderConfig, - }, - ms_data::{Precursor, Spectrum}, + readers::{SpectrumProcessingParams, SpectrumReader, SpectrumReaderConfig}, + Precursor, Spectrum, }; fn get_local_directory() -> &'static Path { @@ -13,6 +14,7 @@ fn get_local_directory() -> &'static Path { .expect("Failed to get parent directory") } +#[cfg(feature = "minitdf")] #[test] fn minitdf_reader() { let file_name = "test2.ms2"; @@ -67,6 +69,7 @@ fn minitdf_reader() { } } +#[cfg(feature = "tdf")] #[test] fn tdf_reader_dda() { let file_name = "test.d"; @@ -138,6 +141,7 @@ fn tdf_reader_dda() { } } +#[cfg(feature = "tdf")] #[test] fn test_dia_even() { let file_name = "dia_test.d"; @@ -164,6 +168,7 @@ fn test_dia_even() { } } +#[cfg(feature = "tdf")] #[test] fn test_dia_uniform() { let file_name = "dia_test.d"; From 7f3d716c94823c02fc9613493c8dc101c18b7e89 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 26 Aug 2024 13:01:59 +0200 Subject: [PATCH 60/69] FEAT: expose dia_windows through frame reader --- src/io/readers/frame_reader.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 83b6efd..6460a83 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -29,6 +29,7 @@ pub struct FrameReader { frames: Vec, acquisition: AcquisitionType, offsets: Vec, + dia_windows: Option>>, } impl FrameReader { @@ -87,6 +88,10 @@ impl FrameReader { frames, acquisition, offsets, + dia_windows: match acquisition { + AcquisitionType::DIAPASEF => Some(quadrupole_settings), + _ => None, + }, }; Ok(reader) } @@ -102,6 +107,10 @@ impl FrameReader { .map(move |x| self.get(x)) } + pub fn get_dia_windows(&self) -> Option>> { + self.dia_windows.clone() + } + pub fn get(&self, index: usize) -> Result { let mut frame = self.frames[index].clone(); let offset = self.offsets[index]; From a0f86f1efc1b4d5ebbf94f5f29f243fb39a5e63c Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Mon, 26 Aug 2024 13:02:19 +0200 Subject: [PATCH 61/69] FEAT: added rt, im and mz min/mx values to metadata --- src/io/readers/metadata_reader.rs | 56 +++++++++++++++++++++++-------- src/ms_data/metadata.rs | 6 ++++ 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/src/io/readers/metadata_reader.rs b/src/io/readers/metadata_reader.rs index 1e47668..8944001 100644 --- a/src/io/readers/metadata_reader.rs +++ b/src/io/readers/metadata_reader.rs @@ -23,38 +23,59 @@ impl MetadataReader { SqlMetadata::from_sql_reader(&tdf_sql_reader)?; let compression_type = parse_value(&sql_metadata, "TimsCompressionType")?; + let (mz_min, mz_max) = get_mz_bounds(&sql_metadata)?; + let (im_min, im_max) = get_im_bounds(&sql_metadata)?; + let rt_values: Vec = + tdf_sql_reader.read_column_from_table("Time", "Frames")?; + let rt_min = rt_values + .iter() + .filter(|&&v| !v.is_nan()) // Filter out NaN values + .cloned() + .min_by(|a, b| a.partial_cmp(b).unwrap()) + .unwrap(); + let rt_max = rt_values + .iter() + .filter(|&&v| !v.is_nan()) // Filter out NaN values + .cloned() + .max_by(|a, b| a.partial_cmp(b).unwrap()) + .unwrap(); let metadata = Metadata { path: path.as_ref().to_path_buf(), - rt_converter: get_rt_converter(&tdf_sql_reader)?, + rt_converter: Frame2RtConverter::from_values(rt_values), im_converter: get_im_converter(&sql_metadata, &tdf_sql_reader)?, mz_converter: get_mz_converter(&sql_metadata)?, + lower_rt: rt_min, + upper_rt: rt_max, + lower_im: im_min, + upper_im: im_max, + lower_mz: mz_min, + upper_mz: mz_max, compression_type, }; Ok(metadata) } } -fn get_rt_converter( - tdf_sql_reader: &SqlReader, -) -> Result { - let rt_values: Vec = - tdf_sql_reader.read_column_from_table("Time", "Frames")?; - Ok(Frame2RtConverter::from_values(rt_values)) -} - -fn get_mz_converter( +fn get_mz_bounds( sql_metadata: &HashMap, -) -> Result { +) -> Result<(f64, f64), MetadataReaderError> { let software = sql_metadata.get("AcquisitionSoftware").ok_or( MetadataReaderError::KeyNotFound("AcquisitionSoftware".to_string()), )?; - let tof_max_index: u32 = parse_value(sql_metadata, "DigitizerNumSamples")?; let mut mz_min: f64 = parse_value(sql_metadata, "MzAcqRangeLower")?; let mut mz_max: f64 = parse_value(sql_metadata, "MzAcqRangeUpper")?; if software == OTOF_CONTROL { mz_min -= 5.0; mz_max += 5.0; } + Ok((mz_min, mz_max)) +} + +fn get_mz_converter( + sql_metadata: &HashMap, +) -> Result { + let (mz_min, mz_max) = get_mz_bounds(sql_metadata)?; + let tof_max_index: u32 = parse_value(sql_metadata, "DigitizerNumSamples")?; Ok(Tof2MzConverter::from_boundaries( mz_min, mz_max, @@ -62,6 +83,14 @@ fn get_mz_converter( )) } +fn get_im_bounds( + sql_metadata: &HashMap, +) -> Result<(f64, f64), MetadataReaderError> { + let im_min: f64 = parse_value(sql_metadata, "OneOverK0AcqRangeLower")?; + let im_max: f64 = parse_value(sql_metadata, "OneOverK0AcqRangeUpper")?; + Ok((im_min, im_max)) +} + fn get_im_converter( sql_metadata: &HashMap, tdf_sql_reader: &SqlReader, @@ -69,8 +98,7 @@ fn get_im_converter( let scan_counts: Vec = tdf_sql_reader.read_column_from_table("NumScans", "Frames")?; let scan_max_index = *scan_counts.iter().max().unwrap(); // SqlReader cannot return empty vecs, so always succeeds - let im_min: f64 = parse_value(sql_metadata, "OneOverK0AcqRangeLower")?; - let im_max: f64 = parse_value(sql_metadata, "OneOverK0AcqRangeUpper")?; + let (im_min, im_max) = get_im_bounds(sql_metadata)?; Ok(Scan2ImConverter::from_boundaries( im_min, im_max, diff --git a/src/ms_data/metadata.rs b/src/ms_data/metadata.rs index 14d1a9a..8e78364 100644 --- a/src/ms_data/metadata.rs +++ b/src/ms_data/metadata.rs @@ -13,4 +13,10 @@ pub struct Metadata { pub im_converter: Scan2ImConverter, pub mz_converter: Tof2MzConverter, pub compression_type: u8, + pub lower_rt: f64, + pub upper_rt: f64, + pub lower_im: f64, + pub upper_im: f64, + pub lower_mz: f64, + pub upper_mz: f64, } From 9b63805592d16dcdeee9eab11897122d59ab69c6 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 26 Aug 2024 17:20:22 -0700 Subject: [PATCH 62/69] (feat,wip) Adding back flat span and upstream query of spectrum converter --- src/io/readers/precursor_reader.rs | 9 +- src/io/readers/precursor_reader/tdf.rs | 4 +- src/io/readers/precursor_reader/tdf/dia.rs | 5 +- src/io/readers/quad_settings_reader.rs | 131 ++++++++++++++---- src/io/readers/spectrum_reader.rs | 4 +- src/io/readers/spectrum_reader/tdf.rs | 6 +- src/io/readers/spectrum_reader/tdf/dia.rs | 2 - .../spectrum_reader/tdf/raw_spectra.rs | 2 - tests/spectrum_readers.rs | 46 +++++- 9 files changed, 157 insertions(+), 52 deletions(-) diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index e4750c5..8f477d0 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -9,7 +9,7 @@ use tdf::{TDFPrecursorReader, TDFPrecursorReaderError}; use crate::ms_data::Precursor; -use super::quad_settings_reader::FrameWindowSplittingStrategy; +use super::FrameWindowSplittingConfiguration; pub struct PrecursorReader { precursor_reader: Box, @@ -42,7 +42,7 @@ impl PrecursorReader { #[derive(Debug, Default, Clone)] pub struct PrecursorReaderBuilder { path: PathBuf, - config: FrameWindowSplittingStrategy, + config: FrameWindowSplittingConfiguration, } impl PrecursorReaderBuilder { @@ -53,7 +53,10 @@ impl PrecursorReaderBuilder { } } - pub fn with_config(&self, config: FrameWindowSplittingStrategy) -> Self { + pub fn with_config( + &self, + config: FrameWindowSplittingConfiguration, + ) -> Self { Self { config: config, ..self.clone() diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 34efef1..60d179d 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -9,7 +9,7 @@ use dia::{DIATDFPrecursorReader, DIATDFPrecursorReaderError}; use crate::{ io::readers::{ file_readers::sql_reader::{SqlError, SqlReader}, - quad_settings_reader::FrameWindowSplittingStrategy, + FrameWindowSplittingConfiguration, }, ms_data::{AcquisitionType, Precursor}, }; @@ -23,7 +23,7 @@ pub struct TDFPrecursorReader { impl TDFPrecursorReader { pub fn new( path: impl AsRef, - splitting_strategy: FrameWindowSplittingStrategy, + splitting_strategy: FrameWindowSplittingConfiguration, ) -> Result { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path)?; diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index c0986de..b259f05 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,6 +1,7 @@ use std::path::Path; use crate::io::readers::quad_settings_reader::FrameWindowSplittingStrategy; +use crate::io::readers::FrameWindowSplittingConfiguration; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, @@ -25,18 +26,18 @@ pub struct DIATDFPrecursorReader { impl DIATDFPrecursorReader { pub fn new( path: impl AsRef, - splitting_strategy: FrameWindowSplittingStrategy, + splitting_config: FrameWindowSplittingConfiguration, ) -> Result { let sql_path = path.as_ref(); let tdf_sql_reader = SqlReader::open(sql_path)?; let metadata = MetadataReader::new(&path)?; let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; + let splitting_strategy = splitting_config.finalize(im_converter); let expanded_quadrupole_settings = QuadrupoleSettingsReader::from_splitting( &tdf_sql_reader, splitting_strategy, - Some(&im_converter), )?; let reader = Self { expanded_quadrupole_settings, diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index a18bce3..ffc22f9 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -54,9 +54,7 @@ impl QuadrupoleSettingsReader { pub fn from_splitting( tdf_sql_reader: &SqlReader, splitting_strat: FrameWindowSplittingStrategy, - scan_converter: Option<&Scan2ImConverter>, ) -> Result, QuadrupoleSettingsReaderError> { - let sql_path = tdf_sql_reader.get_path(); let quadrupole_settings = Self::from_sql_settings(&tdf_sql_reader)?; let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?; let expanded_quadrupole_settings = match splitting_strat { @@ -65,15 +63,11 @@ impl QuadrupoleSettingsReader { &window_groups, &quadrupole_settings, &x, - scan_converter, ) }, - FrameWindowSplittingStrategy::Window(x) => expand_window_settings( - &window_groups, - &quadrupole_settings, - &x, - scan_converter, - ), + FrameWindowSplittingStrategy::Window(x) => { + expand_window_settings(&window_groups, &quadrupole_settings, &x) + }, }; Ok(expanded_quadrupole_settings) } @@ -128,7 +122,8 @@ pub enum QuadrupoleSettingsReaderError { SqlError(#[from] SqlError), } -type SpanStep = (f64, f64); +type MobilitySpanStep = (f64, f64); +type ScanSpanStep = (usize, usize); /// Strategy for expanding quadrupole settings /// @@ -143,16 +138,62 @@ type SpanStep = (f64, f64); /// * `Even(usize)` - Split the quadrupole settings into `usize` evenly spaced /// subwindows; e.g. if `usize` is 2, the window will be split into 2 subwindows /// of equal width. -/// * `Uniform(SpanStep)` - Split the quadrupole settings into subwindows of -/// width `SpanStep.0` and step `SpanStep.1`; e.g. if `SpanStep` is (0.05, 0.02), +/// * `UniformMobility(SpanStep)` - Split the quadrupole settings into subwindows of +/// width `SpanStep.0` and step `SpanStep.1` in ion mobility space. +/// e.g. if `SpanStep` is (0.05, 0.02), /// the window will be split into subwindows of width 0.05 and step 0.02 between their /// in the mobility dimension. +/// * `UniformScan(SpanStep)` - Split the quadrupole settings into subwindows of +/// width `SpanStep.0` and step `SpanStep.1` in scan number space. +/// e.g. if `SpanStep` is (100, 80), +/// the window will be split into subwindows of width +/// 100 and step 80 between their in the scan number. /// #[derive(Debug, Copy, Clone)] pub enum QuadWindowExpansionStrategy { None, Even(usize), - Uniform(SpanStep), + UniformMobility(MobilitySpanStep, Scan2ImConverter), + UniformScan(ScanSpanStep), +} + +#[derive(Debug, Copy, Clone)] +pub enum QuadWindowExpansionConfiguration { + None, + Even(usize), + UniformMobility(MobilitySpanStep), + UniformScan(ScanSpanStep), +} + +impl Default for QuadWindowExpansionConfiguration { + fn default() -> Self { + Self::Even(1) + } +} + +impl QuadWindowExpansionConfiguration { + pub fn finalize( + self, + scan_converter: Scan2ImConverter, + ) -> QuadWindowExpansionStrategy { + match self { + QuadWindowExpansionConfiguration::None => { + QuadWindowExpansionStrategy::None + }, + QuadWindowExpansionConfiguration::Even(x) => { + QuadWindowExpansionStrategy::Even(x) + }, + QuadWindowExpansionConfiguration::UniformMobility((span, step)) => { + QuadWindowExpansionStrategy::UniformMobility( + (span, step), + scan_converter, + ) + }, + QuadWindowExpansionConfiguration::UniformScan((span, step)) => { + QuadWindowExpansionStrategy::UniformScan((span, step)) + }, + } + } } #[derive(Debug, Clone, Copy)] @@ -161,9 +202,33 @@ pub enum FrameWindowSplittingStrategy { Window(QuadWindowExpansionStrategy), } -impl Default for FrameWindowSplittingStrategy { +#[derive(Debug, Clone, Copy)] +pub enum FrameWindowSplittingConfiguration { + Quadrupole(QuadWindowExpansionConfiguration), + Window(QuadWindowExpansionConfiguration), +} + +impl Default for FrameWindowSplittingConfiguration { fn default() -> Self { - Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) + Self::Quadrupole(QuadWindowExpansionConfiguration::Even(1)) + } +} + +impl FrameWindowSplittingConfiguration { + pub fn finalize( + self, + scan_converter: Scan2ImConverter, + ) -> FrameWindowSplittingStrategy { + match self { + FrameWindowSplittingConfiguration::Quadrupole(x) => { + FrameWindowSplittingStrategy::Quadrupole( + x.finalize(scan_converter), + ) + }, + FrameWindowSplittingConfiguration::Window(x) => { + FrameWindowSplittingStrategy::Window(x.finalize(scan_converter)) + }, + } } } @@ -171,7 +236,6 @@ fn scan_range_subsplit( start: usize, end: usize, strategy: &QuadWindowExpansionStrategy, - converter: Option<&Scan2ImConverter>, ) -> Vec<(usize, usize)> { let out: Vec<(usize, usize)> = match strategy { QuadWindowExpansionStrategy::None => { @@ -190,11 +254,10 @@ fn scan_range_subsplit( } out }, - QuadWindowExpansionStrategy::Uniform((span, step)) => { - let converter = converter - .as_ref() - .expect("Uniform expansion requires a scan to IM converter"); - + QuadWindowExpansionStrategy::UniformMobility( + (span, step), + converter, + ) => { // Since scan start < scan end but low scans are high IMs, we need to // subtract instead of adding. let mut curr_start_offset = start.clone(); @@ -217,6 +280,20 @@ fn scan_range_subsplit( } out }, + QuadWindowExpansionStrategy::UniformScan((span, step)) => { + let mut curr_start_offset = start; + let mut curr_end_offset = end + span; + let mut out = Vec::new(); + while curr_end_offset < end { + out.push((curr_start_offset, curr_end_offset)); + curr_start_offset += step; + curr_end_offset += step; + } + if curr_start_offset > end { + out.push((curr_start_offset, end)); + } + out + }, }; debug_assert!( @@ -236,7 +313,6 @@ fn expand_window_settings( window_groups: &[SqlWindowGroup], quadrupole_settings: &[QuadrupoleSettings], strategy: &QuadWindowExpansionStrategy, - converter: Option<&Scan2ImConverter>, ) -> Vec { let mut expanded_quadrupole_settings: Vec = vec![]; for window_group in window_groups { @@ -246,12 +322,9 @@ fn expand_window_settings( let window_group_start = group.scan_starts.iter().min().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds let window_group_end = group.scan_ends.iter().max().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds - for (sws, swe) in scan_range_subsplit( - window_group_start, - window_group_end, - &strategy, - converter, - ) { + for (sws, swe) in + scan_range_subsplit(window_group_start, window_group_end, &strategy) + { let mut mz_min = std::f64::MAX; let mut mz_max = std::f64::MIN; let mut nce_sum = 0.0; @@ -288,7 +361,6 @@ fn expand_quadrupole_settings( window_groups: &[SqlWindowGroup], quadrupole_settings: &[QuadrupoleSettings], strategy: &QuadWindowExpansionStrategy, - converter: Option<&Scan2ImConverter>, ) -> Vec { let mut expanded_quadrupole_settings: Vec = vec![]; for window_group in window_groups { @@ -302,7 +374,6 @@ fn expand_quadrupole_settings( subwindow_scan_start, subwindow_scan_end, &strategy, - converter, ) { let sub_quad_settings = QuadrupoleSettings { index: frame, diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 7f905cc..3e5a533 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -9,7 +9,7 @@ use tdf::{TDFSpectrumReader, TDFSpectrumReaderError}; use crate::ms_data::Spectrum; -use super::FrameWindowSplittingStrategy; +use super::FrameWindowSplittingConfiguration; pub struct SpectrumReader { spectrum_reader: Box, @@ -147,5 +147,5 @@ impl Default for SpectrumProcessingParams { #[derive(Debug, Default, Clone)] pub struct SpectrumReaderConfig { pub spectrum_processing_params: SpectrumProcessingParams, - pub frame_splitting_params: FrameWindowSplittingStrategy, + pub frame_splitting_params: FrameWindowSplittingConfiguration, } diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index f3d0d8c..f166dc4 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -45,12 +45,14 @@ impl TDFSpectrumReader { .with_config(config.frame_splitting_params) .finalize()?; let acquisition_type = frame_reader.get_acquisition(); + let splitting_strategy = config + .frame_splitting_params + .finalize(metadata.im_converter); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, frame_reader, acquisition_type, - config.frame_splitting_params, - Some(&metadata.im_converter), + splitting_strategy, )?; let reader = Self { path: path_name.as_ref().to_path_buf(), diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 1cb1e37..46c26ee 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -25,13 +25,11 @@ impl DIARawSpectrumReader { tdf_sql_reader: &SqlReader, frame_reader: FrameReader, splitting_strategy: FrameWindowSplittingStrategy, - im_converter: Option<&Scan2ImConverter>, ) -> Result { let expanded_quadrupole_settings = QuadrupoleSettingsReader::from_splitting( &tdf_sql_reader, splitting_strategy, - im_converter, )?; let reader = Self { expanded_quadrupole_settings, diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index 40edfa1..6ad2b6e 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -98,7 +98,6 @@ impl RawSpectrumReader { frame_reader: FrameReader, acquisition_type: AcquisitionType, splitting_strategy: FrameWindowSplittingStrategy, - converter: Option<&Scan2ImConverter>, ) -> Result { let raw_spectrum_reader: Box = match acquisition_type { @@ -110,7 +109,6 @@ impl RawSpectrumReader { tdf_sql_reader, frame_reader, splitting_strategy, - converter, )?) }, acquisition_type => { diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 2c77759..4965161 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,7 +1,7 @@ use std::path::Path; use timsrust::{ io::readers::{ - FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, + FrameWindowSplittingConfiguration, QuadWindowExpansionConfiguration, SpectrumProcessingParams, SpectrumReader, SpectrumReaderConfig, }, ms_data::{Precursor, Spectrum}, @@ -151,8 +151,8 @@ fn test_dia_even() { .with_path(&file_path) .with_config(SpectrumReaderConfig { frame_splitting_params: - FrameWindowSplittingStrategy::Quadrupole( - QuadWindowExpansionStrategy::Even(i), + FrameWindowSplittingConfiguration::Quadrupole( + QuadWindowExpansionConfiguration::Even(i), ), spectrum_processing_params: SpectrumProcessingParams::default(), }) @@ -165,7 +165,7 @@ fn test_dia_even() { } #[test] -fn test_dia_uniform() { +fn test_dia_uniform_mobility() { let file_name = "dia_test.d"; let file_path = get_local_directory() .join(file_name) @@ -176,9 +176,12 @@ fn test_dia_uniform() { let spectra = SpectrumReader::build() .with_path(&file_path) .with_config(SpectrumReaderConfig { - frame_splitting_params: FrameWindowSplittingStrategy::Window( - QuadWindowExpansionStrategy::Uniform((i, i)), - ), + frame_splitting_params: + FrameWindowSplittingConfiguration::Window( + QuadWindowExpansionConfiguration::UniformMobility(( + i, i, + )), + ), spectrum_processing_params: SpectrumProcessingParams::default(), }) .finalize() @@ -199,3 +202,32 @@ fn test_dia_uniform() { // mobilities and see if they are within the expected range } } + +#[test] +fn test_dia_uniform_scans() { + let file_name = "dia_test.d"; + let file_path = get_local_directory() + .join(file_name) + .to_str() + .unwrap() + .to_string(); + for i in [50, 100, 200] { + let spectra = SpectrumReader::build() + .with_path(&file_path) + .with_config(SpectrumReaderConfig { + frame_splitting_params: + FrameWindowSplittingConfiguration::Window( + QuadWindowExpansionConfiguration::UniformScan((i, i)), + ), + spectrum_processing_params: SpectrumProcessingParams::default(), + }) + .finalize() + .unwrap() + .get_all(); + for f in spectra.iter() { + println!("i={} -> {:?}", i, f.as_ref().unwrap().precursor); + } + + panic!("not implemented"); + } +} From 12ef92b395eb2ef13a3f6bf6b1fbb0a269340223 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 26 Aug 2024 17:42:52 -0700 Subject: [PATCH 63/69] (feat) Added testing for uniform scan splitting --- src/io/readers/precursor_reader/tdf/dia.rs | 1 - src/io/readers/quad_settings_reader.rs | 5 +++-- src/io/readers/spectrum_reader/tdf/dia.rs | 1 - src/io/readers/spectrum_reader/tdf/raw_spectra.rs | 2 +- tests/spectrum_readers.rs | 14 ++++++++++++-- 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index b259f05..435636d 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,6 +1,5 @@ use std::path::Path; -use crate::io::readers::quad_settings_reader::FrameWindowSplittingStrategy; use crate::io::readers::FrameWindowSplittingConfiguration; use crate::{ domain_converters::{ diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index ffc22f9..8559122 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -282,14 +282,15 @@ fn scan_range_subsplit( }, QuadWindowExpansionStrategy::UniformScan((span, step)) => { let mut curr_start_offset = start; - let mut curr_end_offset = end + span; + let mut curr_end_offset = start + span; let mut out = Vec::new(); + while curr_end_offset < end { out.push((curr_start_offset, curr_end_offset)); curr_start_offset += step; curr_end_offset += step; } - if curr_start_offset > end { + if curr_start_offset < end { out.push((curr_start_offset, end)); } out diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 46c26ee..3dad26c 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -1,4 +1,3 @@ -use crate::domain_converters::Scan2ImConverter; use crate::io::readers::quad_settings_reader::FrameWindowSplittingStrategy; use crate::io::readers::FrameReaderError; use crate::{ diff --git a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs index 6ad2b6e..95b4a46 100644 --- a/src/io/readers/spectrum_reader/tdf/raw_spectra.rs +++ b/src/io/readers/spectrum_reader/tdf/raw_spectra.rs @@ -1,7 +1,7 @@ use core::fmt; use crate::{ - domain_converters::{ConvertableDomain, Scan2ImConverter, Tof2MzConverter}, + domain_converters::{ConvertableDomain, Tof2MzConverter}, io::readers::{ file_readers::sql_reader::SqlReader, quad_settings_reader::FrameWindowSplittingStrategy, FrameReader, diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 4965161..6b27e8c 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -211,7 +211,7 @@ fn test_dia_uniform_scans() { .to_str() .unwrap() .to_string(); - for i in [50, 100, 200] { + for i in [20, 100, 200] { let spectra = SpectrumReader::build() .with_path(&file_path) .with_config(SpectrumReaderConfig { @@ -228,6 +228,16 @@ fn test_dia_uniform_scans() { println!("i={} -> {:?}", i, f.as_ref().unwrap().precursor); } - panic!("not implemented"); + // Since there are 709 scans in the test data ... we can expect + // the number of breaks to be (709 / i) + 1 ... if we had a single + // window that spanned the entire scan range. + // ... A more strict test would filter for each frame index and + // within each make sure the number matches the ratio ... here I am + // Just checking the overall number. + const NUM_FRAMES: usize = 4; + const NUM_SCANS: usize = 709; + + assert!(spectra.len() >= (NUM_SCANS / i) as usize + 1); + assert!(spectra.len() < NUM_FRAMES * (NUM_SCANS / i) as usize + 1); } } From f5b18d7f50a3ea5658fd2a363756817dfa4b1b03 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Tue, 27 Aug 2024 16:42:05 +0200 Subject: [PATCH 64/69] DOCS: todo points added --- README.md | 2 ++ src/io/readers/quad_settings_reader.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/README.md b/README.md index 9b01172..8916caf 100644 --- a/README.md +++ b/README.md @@ -50,4 +50,6 @@ TODO * Improve tests * Pase CompressionType1 * Make Path of TimsTOF data into special type +* Single access point for all readers? +* Few unchecked unwraps left * ... diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index 6f5f398..9ffe93d 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -13,6 +13,7 @@ pub struct QuadrupoleSettingsReader { } impl QuadrupoleSettingsReader { + // TODO: refactor due to large size pub fn new( path: impl AsRef, ) -> Result, QuadrupoleSettingsReaderError> { From a953a207e6ebd184367b4d935e3cbb1a13141432 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 28 Aug 2024 15:13:58 +0200 Subject: [PATCH 65/69] FEAT: Simplified FrameWindowSplitting params --- src/io/readers/precursor_reader/tdf/dia.rs | 2 +- src/io/readers/quad_settings_reader.rs | 73 ++++++++-------------- src/io/readers/spectrum_reader/tdf.rs | 2 +- tests/spectrum_readers.rs | 19 +++--- 4 files changed, 38 insertions(+), 58 deletions(-) diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index 435636d..722b80c 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -32,7 +32,7 @@ impl DIATDFPrecursorReader { let metadata = MetadataReader::new(&path)?; let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; - let splitting_strategy = splitting_config.finalize(im_converter); + let splitting_strategy = splitting_config.finalize(Some(im_converter)); let expanded_quadrupole_settings = QuadrupoleSettingsReader::from_splitting( &tdf_sql_reader, diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index 022eb11..147ab84 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -154,49 +154,16 @@ type ScanSpanStep = (usize, usize); pub enum QuadWindowExpansionStrategy { None, Even(usize), - UniformMobility(MobilitySpanStep, Scan2ImConverter), + UniformMobility(MobilitySpanStep, Option), UniformScan(ScanSpanStep), } -#[derive(Debug, Copy, Clone)] -pub enum QuadWindowExpansionConfiguration { - None, - Even(usize), - UniformMobility(MobilitySpanStep), - UniformScan(ScanSpanStep), -} - -impl Default for QuadWindowExpansionConfiguration { +impl Default for QuadWindowExpansionStrategy { fn default() -> Self { Self::Even(1) } } -impl QuadWindowExpansionConfiguration { - pub fn finalize( - self, - scan_converter: Scan2ImConverter, - ) -> QuadWindowExpansionStrategy { - match self { - QuadWindowExpansionConfiguration::None => { - QuadWindowExpansionStrategy::None - }, - QuadWindowExpansionConfiguration::Even(x) => { - QuadWindowExpansionStrategy::Even(x) - }, - QuadWindowExpansionConfiguration::UniformMobility((span, step)) => { - QuadWindowExpansionStrategy::UniformMobility( - (span, step), - scan_converter, - ) - }, - QuadWindowExpansionConfiguration::UniformScan((span, step)) => { - QuadWindowExpansionStrategy::UniformScan((span, step)) - }, - } - } -} - #[derive(Debug, Clone, Copy)] pub enum FrameWindowSplittingStrategy { Quadrupole(QuadWindowExpansionStrategy), @@ -205,30 +172,43 @@ pub enum FrameWindowSplittingStrategy { #[derive(Debug, Clone, Copy)] pub enum FrameWindowSplittingConfiguration { - Quadrupole(QuadWindowExpansionConfiguration), - Window(QuadWindowExpansionConfiguration), + Quadrupole(QuadWindowExpansionStrategy), + Window(QuadWindowExpansionStrategy), } impl Default for FrameWindowSplittingConfiguration { fn default() -> Self { - Self::Quadrupole(QuadWindowExpansionConfiguration::Even(1)) + Self::Quadrupole(QuadWindowExpansionStrategy::Even(1)) } } impl FrameWindowSplittingConfiguration { pub fn finalize( self, - scan_converter: Scan2ImConverter, + scan_converter: Option, ) -> FrameWindowSplittingStrategy { match self { - FrameWindowSplittingConfiguration::Quadrupole(x) => { - FrameWindowSplittingStrategy::Quadrupole( - x.finalize(scan_converter), + Self::Quadrupole(x) => FrameWindowSplittingStrategy::Quadrupole( + Self::update_im_converter(x, scan_converter), + ), + Self::Window(x) => FrameWindowSplittingStrategy::Window( + Self::update_im_converter(x, scan_converter), + ), + } + } + + fn update_im_converter( + quad_strategy: QuadWindowExpansionStrategy, + scan_converter: Option, + ) -> QuadWindowExpansionStrategy { + match quad_strategy { + QuadWindowExpansionStrategy::UniformMobility((span, step), _) => { + QuadWindowExpansionStrategy::UniformMobility( + (span, step), + scan_converter, ) }, - FrameWindowSplittingConfiguration::Window(x) => { - FrameWindowSplittingStrategy::Window(x.finalize(scan_converter)) - }, + _ => quad_strategy.clone(), } } } @@ -257,10 +237,11 @@ fn scan_range_subsplit( }, QuadWindowExpansionStrategy::UniformMobility( (span, step), - converter, + _converter, ) => { // Since scan start < scan end but low scans are high IMs, we need to // subtract instead of adding. + let converter = _converter.unwrap(); // Should always pass if created from FrameWindowConfig let mut curr_start_offset = start.clone(); let mut curr_start_im = converter.convert(curr_start_offset as f64); diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index f166dc4..c230040 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -47,7 +47,7 @@ impl TDFSpectrumReader { let acquisition_type = frame_reader.get_acquisition(); let splitting_strategy = config .frame_splitting_params - .finalize(metadata.im_converter); + .finalize(Some(metadata.im_converter)); let raw_spectrum_reader = RawSpectrumReader::new( &tdf_sql_reader, frame_reader, diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 2b526f5..7ddab80 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,12 +1,10 @@ use std::path::Path; #[cfg(feature = "tdf")] -use timsrust::readers::{ - FrameWindowSplittingStrategy, QuadWindowExpansionStrategy, -}; +use timsrust::readers::QuadWindowExpansionStrategy; use timsrust::{ readers::{ - FrameWindowSplittingConfiguration, QuadWindowExpansionConfiguration, - SpectrumProcessingParams, SpectrumReader, SpectrumReaderConfig, + FrameWindowSplittingConfiguration, SpectrumProcessingParams, + SpectrumReader, SpectrumReaderConfig, }, Precursor, Spectrum, }; @@ -159,7 +157,7 @@ fn test_dia_even() { .with_config(SpectrumReaderConfig { frame_splitting_params: FrameWindowSplittingConfiguration::Quadrupole( - QuadWindowExpansionConfiguration::Even(i), + QuadWindowExpansionStrategy::Even(i), ), spectrum_processing_params: SpectrumProcessingParams::default(), }) @@ -186,9 +184,10 @@ fn test_dia_uniform_mobility() { .with_config(SpectrumReaderConfig { frame_splitting_params: FrameWindowSplittingConfiguration::Window( - QuadWindowExpansionConfiguration::UniformMobility(( - i, i, - )), + QuadWindowExpansionStrategy::UniformMobility( + (i, i), + None, + ), ), spectrum_processing_params: SpectrumProcessingParams::default(), }) @@ -225,7 +224,7 @@ fn test_dia_uniform_scans() { .with_config(SpectrumReaderConfig { frame_splitting_params: FrameWindowSplittingConfiguration::Window( - QuadWindowExpansionConfiguration::UniformScan((i, i)), + QuadWindowExpansionStrategy::UniformScan((i, i)), ), spectrum_processing_params: SpectrumProcessingParams::default(), }) From 29589e93d31b76f1f04b2ce1941c5f0682135d8c Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 28 Aug 2024 15:29:43 +0200 Subject: [PATCH 66/69] DOCS: readme update --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 8916caf..d0be7dd 100644 --- a/README.md +++ b/README.md @@ -52,4 +52,5 @@ TODO * Make Path of TimsTOF data into special type * Single access point for all readers? * Few unchecked unwraps left +* Queryable data in all dimensions * ... From f55b4c90091ee205451b52e20e4974aec57afbe4 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 28 Aug 2024 15:31:18 +0200 Subject: [PATCH 67/69] DOCS: readme disclaimer --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index d0be7dd..5b0ffac 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,10 @@ A crate to read Bruker TimsTof data. +## Stability + +**NOTE**: TimsRust does not yet have a stable version! Use with caution. + ## Installation Add this crate to your `Cargo.toml`: From 7d3a78daf83925c4f4199d751e88eaf47ad5e3ce Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 28 Aug 2024 15:45:48 +0200 Subject: [PATCH 68/69] FIX: minitdf vs tdf features --- benches/speed_performance.rs | 3 +++ tests/spectrum_readers.rs | 10 +++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index b0778fb..d1d0865 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -49,6 +49,7 @@ fn criterion_benchmark_dda_frames(c: &mut Criterion) { group.finish(); } +#[cfg(feature = "tdf")] fn criterion_benchmark_dda_spectra(c: &mut Criterion) { // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); let mut group = c.benchmark_group("sample-size-example"); @@ -101,10 +102,12 @@ fn criterion_benchmark_syp(c: &mut Criterion) { group.finish(); } +#[cfg(feature = "tdf")] criterion_group!( benches, criterion_benchmark_dda_spectra, // criterion_benchmark_dia, // criterion_benchmark_syp ); +#[cfg(feature = "tdf")] criterion_main!(benches); diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 7ddab80..ce188f0 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -1,11 +1,10 @@ use std::path::Path; #[cfg(feature = "tdf")] -use timsrust::readers::QuadWindowExpansionStrategy; +use timsrust::readers::{ + FrameWindowSplittingConfiguration, QuadWindowExpansionStrategy, +}; use timsrust::{ - readers::{ - FrameWindowSplittingConfiguration, SpectrumProcessingParams, - SpectrumReader, SpectrumReaderConfig, - }, + readers::{SpectrumProcessingParams, SpectrumReader, SpectrumReaderConfig}, Precursor, Spectrum, }; @@ -210,6 +209,7 @@ fn test_dia_uniform_mobility() { } } +#[cfg(feature = "tdf")] #[test] fn test_dia_uniform_scans() { let file_name = "dia_test.d"; From 4e108c138e2dff488d286c2c7a5402d9315c5fdc Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 28 Aug 2024 15:49:46 +0200 Subject: [PATCH 69/69] CHORE: bench update --- benches/speed_performance.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index d1d0865..d9ff504 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -5,7 +5,7 @@ use timsrust::readers::FrameReader; use timsrust::readers::{SpectrumReader, SpectrumReaderConfig}; const DDA_TEST: &str = - "/mnt/c/Users/Sander.Willems/Documents/data/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/"; + "/mnt/d/data/mpib/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/"; const DIA_TEST: &str = "/mnt/c/Users/Sander.Willems/Documents/data/20230505_TIMS05_PaSk_SA_HeLa_6min_diaP_8scans_S1-D3_1_2329.d/"; const SYP_TEST: &str =